Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/pandas/core/generic.py: 40%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

2608 statements  

1# pyright: reportPropertyTypeMismatch=false 

2from __future__ import annotations 

3 

4import collections 

5from copy import deepcopy 

6import datetime as dt 

7from functools import partial 

8import gc 

9from json import loads 

10import operator 

11import pickle 

12import re 

13import sys 

14from typing import ( 

15 TYPE_CHECKING, 

16 Any, 

17 Callable, 

18 ClassVar, 

19 Literal, 

20 NoReturn, 

21 cast, 

22 final, 

23 overload, 

24) 

25import warnings 

26import weakref 

27 

28import numpy as np 

29 

30from pandas._config import ( 

31 config, 

32 using_copy_on_write, 

33 warn_copy_on_write, 

34) 

35 

36from pandas._libs import lib 

37from pandas._libs.lib import is_range_indexer 

38from pandas._libs.tslibs import ( 

39 Period, 

40 Tick, 

41 Timestamp, 

42 to_offset, 

43) 

44from pandas._libs.tslibs.dtypes import freq_to_period_freqstr 

45from pandas._typing import ( 

46 AlignJoin, 

47 AnyArrayLike, 

48 ArrayLike, 

49 Axes, 

50 Axis, 

51 AxisInt, 

52 CompressionOptions, 

53 DtypeArg, 

54 DtypeBackend, 

55 DtypeObj, 

56 FilePath, 

57 FillnaOptions, 

58 FloatFormatType, 

59 FormattersType, 

60 Frequency, 

61 IgnoreRaise, 

62 IndexKeyFunc, 

63 IndexLabel, 

64 InterpolateOptions, 

65 IntervalClosedType, 

66 JSONSerializable, 

67 Level, 

68 Manager, 

69 NaPosition, 

70 NDFrameT, 

71 OpenFileErrors, 

72 RandomState, 

73 ReindexMethod, 

74 Renamer, 

75 Scalar, 

76 Self, 

77 SequenceNotStr, 

78 SortKind, 

79 StorageOptions, 

80 Suffixes, 

81 T, 

82 TimeAmbiguous, 

83 TimedeltaConvertibleTypes, 

84 TimeNonexistent, 

85 TimestampConvertibleTypes, 

86 TimeUnit, 

87 ValueKeyFunc, 

88 WriteBuffer, 

89 WriteExcelBuffer, 

90 npt, 

91) 

92from pandas.compat import PYPY 

93from pandas.compat._constants import REF_COUNT 

94from pandas.compat._optional import import_optional_dependency 

95from pandas.compat.numpy import function as nv 

96from pandas.errors import ( 

97 AbstractMethodError, 

98 ChainedAssignmentError, 

99 InvalidIndexError, 

100 SettingWithCopyError, 

101 SettingWithCopyWarning, 

102 _chained_assignment_method_msg, 

103 _chained_assignment_warning_method_msg, 

104 _check_cacher, 

105) 

106from pandas.util._decorators import ( 

107 deprecate_nonkeyword_arguments, 

108 doc, 

109) 

110from pandas.util._exceptions import find_stack_level 

111from pandas.util._validators import ( 

112 check_dtype_backend, 

113 validate_ascending, 

114 validate_bool_kwarg, 

115 validate_fillna_kwargs, 

116 validate_inclusive, 

117) 

118 

119from pandas.core.dtypes.astype import astype_is_view 

120from pandas.core.dtypes.common import ( 

121 ensure_object, 

122 ensure_platform_int, 

123 ensure_str, 

124 is_bool, 

125 is_bool_dtype, 

126 is_dict_like, 

127 is_extension_array_dtype, 

128 is_list_like, 

129 is_number, 

130 is_numeric_dtype, 

131 is_re_compilable, 

132 is_scalar, 

133 pandas_dtype, 

134) 

135from pandas.core.dtypes.dtypes import ( 

136 DatetimeTZDtype, 

137 ExtensionDtype, 

138) 

139from pandas.core.dtypes.generic import ( 

140 ABCDataFrame, 

141 ABCSeries, 

142) 

143from pandas.core.dtypes.inference import ( 

144 is_hashable, 

145 is_nested_list_like, 

146) 

147from pandas.core.dtypes.missing import ( 

148 isna, 

149 notna, 

150) 

151 

152from pandas.core import ( 

153 algorithms as algos, 

154 arraylike, 

155 common, 

156 indexing, 

157 missing, 

158 nanops, 

159 sample, 

160) 

161from pandas.core.array_algos.replace import should_use_regex 

162from pandas.core.arrays import ExtensionArray 

163from pandas.core.base import PandasObject 

164from pandas.core.construction import extract_array 

165from pandas.core.flags import Flags 

166from pandas.core.indexes.api import ( 

167 DatetimeIndex, 

168 Index, 

169 MultiIndex, 

170 PeriodIndex, 

171 RangeIndex, 

172 default_index, 

173 ensure_index, 

174) 

175from pandas.core.internals import ( 

176 ArrayManager, 

177 BlockManager, 

178 SingleArrayManager, 

179) 

180from pandas.core.internals.construction import ( 

181 mgr_to_mgr, 

182 ndarray_to_mgr, 

183) 

184from pandas.core.methods.describe import describe_ndframe 

185from pandas.core.missing import ( 

186 clean_fill_method, 

187 clean_reindex_fill_method, 

188 find_valid_index, 

189) 

190from pandas.core.reshape.concat import concat 

191from pandas.core.shared_docs import _shared_docs 

192from pandas.core.sorting import get_indexer_indexer 

193from pandas.core.window import ( 

194 Expanding, 

195 ExponentialMovingWindow, 

196 Rolling, 

197 Window, 

198) 

199 

200from pandas.io.formats.format import ( 

201 DataFrameFormatter, 

202 DataFrameRenderer, 

203) 

204from pandas.io.formats.printing import pprint_thing 

205 

206if TYPE_CHECKING: 

207 from collections.abc import ( 

208 Hashable, 

209 Iterator, 

210 Mapping, 

211 Sequence, 

212 ) 

213 

214 from pandas._libs.tslibs import BaseOffset 

215 

216 from pandas import ( 

217 DataFrame, 

218 ExcelWriter, 

219 HDFStore, 

220 Series, 

221 ) 

222 from pandas.core.indexers.objects import BaseIndexer 

223 from pandas.core.resample import Resampler 

224 

225# goal is to be able to define the docs close to function, while still being 

226# able to share 

227_shared_docs = {**_shared_docs} 

228_shared_doc_kwargs = { 

229 "axes": "keywords for axes", 

230 "klass": "Series/DataFrame", 

231 "axes_single_arg": "{0 or 'index'} for Series, {0 or 'index', 1 or 'columns'} for DataFrame", # noqa: E501 

232 "inplace": """ 

233 inplace : bool, default False 

234 If True, performs operation inplace and returns None.""", 

235 "optional_by": """ 

236 by : str or list of str 

237 Name or list of names to sort by""", 

238} 

239 

240 

241bool_t = bool # Need alias because NDFrame has def bool: 

242 

243 

244class NDFrame(PandasObject, indexing.IndexingMixin): 

245 """ 

246 N-dimensional analogue of DataFrame. Store multi-dimensional in a 

247 size-mutable, labeled data structure 

248 

249 Parameters 

250 ---------- 

251 data : BlockManager 

252 axes : list 

253 copy : bool, default False 

254 """ 

255 

256 _internal_names: list[str] = [ 

257 "_mgr", 

258 "_cacher", 

259 "_item_cache", 

260 "_cache", 

261 "_is_copy", 

262 "_name", 

263 "_metadata", 

264 "_flags", 

265 ] 

266 _internal_names_set: set[str] = set(_internal_names) 

267 _accessors: set[str] = set() 

268 _hidden_attrs: frozenset[str] = frozenset([]) 

269 _metadata: list[str] = [] 

270 _is_copy: weakref.ReferenceType[NDFrame] | str | None = None 

271 _mgr: Manager 

272 _attrs: dict[Hashable, Any] 

273 _typ: str 

274 

275 # ---------------------------------------------------------------------- 

276 # Constructors 

277 

278 def __init__(self, data: Manager) -> None: 

279 object.__setattr__(self, "_is_copy", None) 

280 object.__setattr__(self, "_mgr", data) 

281 object.__setattr__(self, "_item_cache", {}) 

282 object.__setattr__(self, "_attrs", {}) 

283 object.__setattr__(self, "_flags", Flags(self, allows_duplicate_labels=True)) 

284 

285 @final 

286 @classmethod 

287 def _init_mgr( 

288 cls, 

289 mgr: Manager, 

290 axes: dict[Literal["index", "columns"], Axes | None], 

291 dtype: DtypeObj | None = None, 

292 copy: bool_t = False, 

293 ) -> Manager: 

294 """passed a manager and a axes dict""" 

295 for a, axe in axes.items(): 

296 if axe is not None: 

297 axe = ensure_index(axe) 

298 bm_axis = cls._get_block_manager_axis(a) 

299 mgr = mgr.reindex_axis(axe, axis=bm_axis) 

300 

301 # make a copy if explicitly requested 

302 if copy: 

303 mgr = mgr.copy() 

304 if dtype is not None: 

305 # avoid further copies if we can 

306 if ( 

307 isinstance(mgr, BlockManager) 

308 and len(mgr.blocks) == 1 

309 and mgr.blocks[0].values.dtype == dtype 

310 ): 

311 pass 

312 else: 

313 mgr = mgr.astype(dtype=dtype) 

314 return mgr 

315 

316 @final 

317 def _as_manager(self, typ: str, copy: bool_t = True) -> Self: 

318 """ 

319 Private helper function to create a DataFrame with specific manager. 

320 

321 Parameters 

322 ---------- 

323 typ : {"block", "array"} 

324 copy : bool, default True 

325 Only controls whether the conversion from Block->ArrayManager 

326 copies the 1D arrays (to ensure proper/contiguous memory layout). 

327 

328 Returns 

329 ------- 

330 DataFrame 

331 New DataFrame using specified manager type. Is not guaranteed 

332 to be a copy or not. 

333 """ 

334 new_mgr: Manager 

335 new_mgr = mgr_to_mgr(self._mgr, typ=typ, copy=copy) 

336 # fastpath of passing a manager doesn't check the option/manager class 

337 return self._constructor_from_mgr(new_mgr, axes=new_mgr.axes).__finalize__(self) 

338 

339 @final 

340 @classmethod 

341 def _from_mgr(cls, mgr: Manager, axes: list[Index]) -> Self: 

342 """ 

343 Construct a new object of this type from a Manager object and axes. 

344 

345 Parameters 

346 ---------- 

347 mgr : Manager 

348 Must have the same ndim as cls. 

349 axes : list[Index] 

350 

351 Notes 

352 ----- 

353 The axes must match mgr.axes, but are required for future-proofing 

354 in the event that axes are refactored out of the Manager objects. 

355 """ 

356 obj = cls.__new__(cls) 

357 NDFrame.__init__(obj, mgr) 

358 return obj 

359 

360 # ---------------------------------------------------------------------- 

361 # attrs and flags 

362 

363 @property 

364 def attrs(self) -> dict[Hashable, Any]: 

365 """ 

366 Dictionary of global attributes of this dataset. 

367 

368 .. warning:: 

369 

370 attrs is experimental and may change without warning. 

371 

372 See Also 

373 -------- 

374 DataFrame.flags : Global flags applying to this object. 

375 

376 Notes 

377 ----- 

378 Many operations that create new datasets will copy ``attrs``. Copies 

379 are always deep so that changing ``attrs`` will only affect the 

380 present dataset. ``pandas.concat`` copies ``attrs`` only if all input 

381 datasets have the same ``attrs``. 

382 

383 Examples 

384 -------- 

385 For Series: 

386 

387 >>> ser = pd.Series([1, 2, 3]) 

388 >>> ser.attrs = {"A": [10, 20, 30]} 

389 >>> ser.attrs 

390 {'A': [10, 20, 30]} 

391 

392 For DataFrame: 

393 

394 >>> df = pd.DataFrame({'A': [1, 2], 'B': [3, 4]}) 

395 >>> df.attrs = {"A": [10, 20, 30]} 

396 >>> df.attrs 

397 {'A': [10, 20, 30]} 

398 """ 

399 return self._attrs 

400 

401 @attrs.setter 

402 def attrs(self, value: Mapping[Hashable, Any]) -> None: 

403 self._attrs = dict(value) 

404 

405 @final 

406 @property 

407 def flags(self) -> Flags: 

408 """ 

409 Get the properties associated with this pandas object. 

410 

411 The available flags are 

412 

413 * :attr:`Flags.allows_duplicate_labels` 

414 

415 See Also 

416 -------- 

417 Flags : Flags that apply to pandas objects. 

418 DataFrame.attrs : Global metadata applying to this dataset. 

419 

420 Notes 

421 ----- 

422 "Flags" differ from "metadata". Flags reflect properties of the 

423 pandas object (the Series or DataFrame). Metadata refer to properties 

424 of the dataset, and should be stored in :attr:`DataFrame.attrs`. 

425 

426 Examples 

427 -------- 

428 >>> df = pd.DataFrame({"A": [1, 2]}) 

429 >>> df.flags 

430 <Flags(allows_duplicate_labels=True)> 

431 

432 Flags can be get or set using ``.`` 

433 

434 >>> df.flags.allows_duplicate_labels 

435 True 

436 >>> df.flags.allows_duplicate_labels = False 

437 

438 Or by slicing with a key 

439 

440 >>> df.flags["allows_duplicate_labels"] 

441 False 

442 >>> df.flags["allows_duplicate_labels"] = True 

443 """ 

444 return self._flags 

445 

446 @final 

447 def set_flags( 

448 self, 

449 *, 

450 copy: bool_t = False, 

451 allows_duplicate_labels: bool_t | None = None, 

452 ) -> Self: 

453 """ 

454 Return a new object with updated flags. 

455 

456 Parameters 

457 ---------- 

458 copy : bool, default False 

459 Specify if a copy of the object should be made. 

460 

461 .. note:: 

462 The `copy` keyword will change behavior in pandas 3.0. 

463 `Copy-on-Write 

464 <https://pandas.pydata.org/docs/dev/user_guide/copy_on_write.html>`__ 

465 will be enabled by default, which means that all methods with a 

466 `copy` keyword will use a lazy copy mechanism to defer the copy and 

467 ignore the `copy` keyword. The `copy` keyword will be removed in a 

468 future version of pandas. 

469 

470 You can already get the future behavior and improvements through 

471 enabling copy on write ``pd.options.mode.copy_on_write = True`` 

472 allows_duplicate_labels : bool, optional 

473 Whether the returned object allows duplicate labels. 

474 

475 Returns 

476 ------- 

477 Series or DataFrame 

478 The same type as the caller. 

479 

480 See Also 

481 -------- 

482 DataFrame.attrs : Global metadata applying to this dataset. 

483 DataFrame.flags : Global flags applying to this object. 

484 

485 Notes 

486 ----- 

487 This method returns a new object that's a view on the same data 

488 as the input. Mutating the input or the output values will be reflected 

489 in the other. 

490 

491 This method is intended to be used in method chains. 

492 

493 "Flags" differ from "metadata". Flags reflect properties of the 

494 pandas object (the Series or DataFrame). Metadata refer to properties 

495 of the dataset, and should be stored in :attr:`DataFrame.attrs`. 

496 

497 Examples 

498 -------- 

499 >>> df = pd.DataFrame({"A": [1, 2]}) 

500 >>> df.flags.allows_duplicate_labels 

501 True 

502 >>> df2 = df.set_flags(allows_duplicate_labels=False) 

503 >>> df2.flags.allows_duplicate_labels 

504 False 

505 """ 

506 df = self.copy(deep=copy and not using_copy_on_write()) 

507 if allows_duplicate_labels is not None: 

508 df.flags["allows_duplicate_labels"] = allows_duplicate_labels 

509 return df 

510 

511 @final 

512 @classmethod 

513 def _validate_dtype(cls, dtype) -> DtypeObj | None: 

514 """validate the passed dtype""" 

515 if dtype is not None: 

516 dtype = pandas_dtype(dtype) 

517 

518 # a compound dtype 

519 if dtype.kind == "V": 

520 raise NotImplementedError( 

521 "compound dtypes are not implemented " 

522 f"in the {cls.__name__} constructor" 

523 ) 

524 

525 return dtype 

526 

527 # ---------------------------------------------------------------------- 

528 # Construction 

529 

530 @property 

531 def _constructor(self) -> Callable[..., Self]: 

532 """ 

533 Used when a manipulation result has the same dimensions as the 

534 original. 

535 """ 

536 raise AbstractMethodError(self) 

537 

538 # ---------------------------------------------------------------------- 

539 # Internals 

540 

541 @final 

542 @property 

543 def _data(self): 

544 # GH#33054 retained because some downstream packages uses this, 

545 # e.g. fastparquet 

546 # GH#33333 

547 warnings.warn( 

548 f"{type(self).__name__}._data is deprecated and will be removed in " 

549 "a future version. Use public APIs instead.", 

550 DeprecationWarning, 

551 stacklevel=find_stack_level(), 

552 ) 

553 return self._mgr 

554 

555 # ---------------------------------------------------------------------- 

556 # Axis 

557 _AXIS_ORDERS: list[Literal["index", "columns"]] 

558 _AXIS_TO_AXIS_NUMBER: dict[Axis, AxisInt] = {0: 0, "index": 0, "rows": 0} 

559 _info_axis_number: int 

560 _info_axis_name: Literal["index", "columns"] 

561 _AXIS_LEN: int 

562 

563 @final 

564 def _construct_axes_dict(self, axes: Sequence[Axis] | None = None, **kwargs): 

565 """Return an axes dictionary for myself.""" 

566 d = {a: self._get_axis(a) for a in (axes or self._AXIS_ORDERS)} 

567 # error: Argument 1 to "update" of "MutableMapping" has incompatible type 

568 # "Dict[str, Any]"; expected "SupportsKeysAndGetItem[Union[int, str], Any]" 

569 d.update(kwargs) # type: ignore[arg-type] 

570 return d 

571 

572 @final 

573 @classmethod 

574 def _get_axis_number(cls, axis: Axis) -> AxisInt: 

575 try: 

576 return cls._AXIS_TO_AXIS_NUMBER[axis] 

577 except KeyError: 

578 raise ValueError(f"No axis named {axis} for object type {cls.__name__}") 

579 

580 @final 

581 @classmethod 

582 def _get_axis_name(cls, axis: Axis) -> Literal["index", "columns"]: 

583 axis_number = cls._get_axis_number(axis) 

584 return cls._AXIS_ORDERS[axis_number] 

585 

586 @final 

587 def _get_axis(self, axis: Axis) -> Index: 

588 axis_number = self._get_axis_number(axis) 

589 assert axis_number in {0, 1} 

590 return self.index if axis_number == 0 else self.columns 

591 

592 @final 

593 @classmethod 

594 def _get_block_manager_axis(cls, axis: Axis) -> AxisInt: 

595 """Map the axis to the block_manager axis.""" 

596 axis = cls._get_axis_number(axis) 

597 ndim = cls._AXIS_LEN 

598 if ndim == 2: 

599 # i.e. DataFrame 

600 return 1 - axis 

601 return axis 

602 

603 @final 

604 def _get_axis_resolvers(self, axis: str) -> dict[str, Series | MultiIndex]: 

605 # index or columns 

606 axis_index = getattr(self, axis) 

607 d = {} 

608 prefix = axis[0] 

609 

610 for i, name in enumerate(axis_index.names): 

611 if name is not None: 

612 key = level = name 

613 else: 

614 # prefix with 'i' or 'c' depending on the input axis 

615 # e.g., you must do ilevel_0 for the 0th level of an unnamed 

616 # multiiindex 

617 key = f"{prefix}level_{i}" 

618 level = i 

619 

620 level_values = axis_index.get_level_values(level) 

621 s = level_values.to_series() 

622 s.index = axis_index 

623 d[key] = s 

624 

625 # put the index/columns itself in the dict 

626 if isinstance(axis_index, MultiIndex): 

627 dindex = axis_index 

628 else: 

629 dindex = axis_index.to_series() 

630 

631 d[axis] = dindex 

632 return d 

633 

634 @final 

635 def _get_index_resolvers(self) -> dict[Hashable, Series | MultiIndex]: 

636 from pandas.core.computation.parsing import clean_column_name 

637 

638 d: dict[str, Series | MultiIndex] = {} 

639 for axis_name in self._AXIS_ORDERS: 

640 d.update(self._get_axis_resolvers(axis_name)) 

641 

642 return {clean_column_name(k): v for k, v in d.items() if not isinstance(k, int)} 

643 

644 @final 

645 def _get_cleaned_column_resolvers(self) -> dict[Hashable, Series]: 

646 """ 

647 Return the special character free column resolvers of a dataframe. 

648 

649 Column names with special characters are 'cleaned up' so that they can 

650 be referred to by backtick quoting. 

651 Used in :meth:`DataFrame.eval`. 

652 """ 

653 from pandas.core.computation.parsing import clean_column_name 

654 from pandas.core.series import Series 

655 

656 if isinstance(self, ABCSeries): 

657 return {clean_column_name(self.name): self} 

658 

659 return { 

660 clean_column_name(k): Series( 

661 v, copy=False, index=self.index, name=k, dtype=self.dtypes[k] 

662 ).__finalize__(self) 

663 for k, v in zip(self.columns, self._iter_column_arrays()) 

664 if not isinstance(k, int) 

665 } 

666 

667 @final 

668 @property 

669 def _info_axis(self) -> Index: 

670 return getattr(self, self._info_axis_name) 

671 

672 def _is_view_after_cow_rules(self): 

673 # Only to be used in cases of chained assignment checks, this is a 

674 # simplified check that assumes that either the whole object is a view 

675 # or a copy 

676 if len(self._mgr.blocks) == 0: # type: ignore[union-attr] 

677 return False 

678 return self._mgr.blocks[0].refs.has_reference() # type: ignore[union-attr] 

679 

680 @property 

681 def shape(self) -> tuple[int, ...]: 

682 """ 

683 Return a tuple of axis dimensions 

684 """ 

685 return tuple(len(self._get_axis(a)) for a in self._AXIS_ORDERS) 

686 

687 @property 

688 def axes(self) -> list[Index]: 

689 """ 

690 Return index label(s) of the internal NDFrame 

691 """ 

692 # we do it this way because if we have reversed axes, then 

693 # the block manager shows then reversed 

694 return [self._get_axis(a) for a in self._AXIS_ORDERS] 

695 

696 @final 

697 @property 

698 def ndim(self) -> int: 

699 """ 

700 Return an int representing the number of axes / array dimensions. 

701 

702 Return 1 if Series. Otherwise return 2 if DataFrame. 

703 

704 See Also 

705 -------- 

706 ndarray.ndim : Number of array dimensions. 

707 

708 Examples 

709 -------- 

710 >>> s = pd.Series({'a': 1, 'b': 2, 'c': 3}) 

711 >>> s.ndim 

712 1 

713 

714 >>> df = pd.DataFrame({'col1': [1, 2], 'col2': [3, 4]}) 

715 >>> df.ndim 

716 2 

717 """ 

718 return self._mgr.ndim 

719 

720 @final 

721 @property 

722 def size(self) -> int: 

723 """ 

724 Return an int representing the number of elements in this object. 

725 

726 Return the number of rows if Series. Otherwise return the number of 

727 rows times number of columns if DataFrame. 

728 

729 See Also 

730 -------- 

731 ndarray.size : Number of elements in the array. 

732 

733 Examples 

734 -------- 

735 >>> s = pd.Series({'a': 1, 'b': 2, 'c': 3}) 

736 >>> s.size 

737 3 

738 

739 >>> df = pd.DataFrame({'col1': [1, 2], 'col2': [3, 4]}) 

740 >>> df.size 

741 4 

742 """ 

743 

744 return int(np.prod(self.shape)) 

745 

746 def set_axis( 

747 self, 

748 labels, 

749 *, 

750 axis: Axis = 0, 

751 copy: bool_t | None = None, 

752 ) -> Self: 

753 """ 

754 Assign desired index to given axis. 

755 

756 Indexes for%(extended_summary_sub)s row labels can be changed by assigning 

757 a list-like or Index. 

758 

759 Parameters 

760 ---------- 

761 labels : list-like, Index 

762 The values for the new index. 

763 

764 axis : %(axes_single_arg)s, default 0 

765 The axis to update. The value 0 identifies the rows. For `Series` 

766 this parameter is unused and defaults to 0. 

767 

768 copy : bool, default True 

769 Whether to make a copy of the underlying data. 

770 

771 .. note:: 

772 The `copy` keyword will change behavior in pandas 3.0. 

773 `Copy-on-Write 

774 <https://pandas.pydata.org/docs/dev/user_guide/copy_on_write.html>`__ 

775 will be enabled by default, which means that all methods with a 

776 `copy` keyword will use a lazy copy mechanism to defer the copy and 

777 ignore the `copy` keyword. The `copy` keyword will be removed in a 

778 future version of pandas. 

779 

780 You can already get the future behavior and improvements through 

781 enabling copy on write ``pd.options.mode.copy_on_write = True`` 

782 

783 Returns 

784 ------- 

785 %(klass)s 

786 An object of type %(klass)s. 

787 

788 See Also 

789 -------- 

790 %(klass)s.rename_axis : Alter the name of the index%(see_also_sub)s. 

791 """ 

792 return self._set_axis_nocheck(labels, axis, inplace=False, copy=copy) 

793 

794 @final 

795 def _set_axis_nocheck( 

796 self, labels, axis: Axis, inplace: bool_t, copy: bool_t | None 

797 ): 

798 if inplace: 

799 setattr(self, self._get_axis_name(axis), labels) 

800 else: 

801 # With copy=False, we create a new object but don't copy the 

802 # underlying data. 

803 obj = self.copy(deep=copy and not using_copy_on_write()) 

804 setattr(obj, obj._get_axis_name(axis), labels) 

805 return obj 

806 

807 @final 

808 def _set_axis(self, axis: AxisInt, labels: AnyArrayLike | list) -> None: 

809 """ 

810 This is called from the cython code when we set the `index` attribute 

811 directly, e.g. `series.index = [1, 2, 3]`. 

812 """ 

813 labels = ensure_index(labels) 

814 self._mgr.set_axis(axis, labels) 

815 self._clear_item_cache() 

816 

817 @final 

818 def swapaxes(self, axis1: Axis, axis2: Axis, copy: bool_t | None = None) -> Self: 

819 """ 

820 Interchange axes and swap values axes appropriately. 

821 

822 .. deprecated:: 2.1.0 

823 ``swapaxes`` is deprecated and will be removed. 

824 Please use ``transpose`` instead. 

825 

826 Returns 

827 ------- 

828 same as input 

829 

830 Examples 

831 -------- 

832 Please see examples for :meth:`DataFrame.transpose`. 

833 """ 

834 warnings.warn( 

835 # GH#51946 

836 f"'{type(self).__name__}.swapaxes' is deprecated and " 

837 "will be removed in a future version. " 

838 f"Please use '{type(self).__name__}.transpose' instead.", 

839 FutureWarning, 

840 stacklevel=find_stack_level(), 

841 ) 

842 

843 i = self._get_axis_number(axis1) 

844 j = self._get_axis_number(axis2) 

845 

846 if i == j: 

847 return self.copy(deep=copy and not using_copy_on_write()) 

848 

849 mapping = {i: j, j: i} 

850 

851 new_axes = [self._get_axis(mapping.get(k, k)) for k in range(self._AXIS_LEN)] 

852 new_values = self._values.swapaxes(i, j) # type: ignore[union-attr] 

853 if self._mgr.is_single_block and isinstance(self._mgr, BlockManager): 

854 # This should only get hit in case of having a single block, otherwise a 

855 # copy is made, we don't have to set up references. 

856 new_mgr = ndarray_to_mgr( 

857 new_values, 

858 new_axes[0], 

859 new_axes[1], 

860 dtype=None, 

861 copy=False, 

862 typ="block", 

863 ) 

864 assert isinstance(new_mgr, BlockManager) 

865 assert isinstance(self._mgr, BlockManager) 

866 new_mgr.blocks[0].refs = self._mgr.blocks[0].refs 

867 new_mgr.blocks[0].refs.add_reference(new_mgr.blocks[0]) 

868 if not using_copy_on_write() and copy is not False: 

869 new_mgr = new_mgr.copy(deep=True) 

870 

871 out = self._constructor_from_mgr(new_mgr, axes=new_mgr.axes) 

872 return out.__finalize__(self, method="swapaxes") 

873 

874 return self._constructor( 

875 new_values, 

876 *new_axes, 

877 # The no-copy case for CoW is handled above 

878 copy=False, 

879 ).__finalize__(self, method="swapaxes") 

880 

881 @final 

882 @doc(klass=_shared_doc_kwargs["klass"]) 

883 def droplevel(self, level: IndexLabel, axis: Axis = 0) -> Self: 

884 """ 

885 Return {klass} with requested index / column level(s) removed. 

886 

887 Parameters 

888 ---------- 

889 level : int, str, or list-like 

890 If a string is given, must be the name of a level 

891 If list-like, elements must be names or positional indexes 

892 of levels. 

893 

894 axis : {{0 or 'index', 1 or 'columns'}}, default 0 

895 Axis along which the level(s) is removed: 

896 

897 * 0 or 'index': remove level(s) in column. 

898 * 1 or 'columns': remove level(s) in row. 

899 

900 For `Series` this parameter is unused and defaults to 0. 

901 

902 Returns 

903 ------- 

904 {klass} 

905 {klass} with requested index / column level(s) removed. 

906 

907 Examples 

908 -------- 

909 >>> df = pd.DataFrame([ 

910 ... [1, 2, 3, 4], 

911 ... [5, 6, 7, 8], 

912 ... [9, 10, 11, 12] 

913 ... ]).set_index([0, 1]).rename_axis(['a', 'b']) 

914 

915 >>> df.columns = pd.MultiIndex.from_tuples([ 

916 ... ('c', 'e'), ('d', 'f') 

917 ... ], names=['level_1', 'level_2']) 

918 

919 >>> df 

920 level_1 c d 

921 level_2 e f 

922 a b 

923 1 2 3 4 

924 5 6 7 8 

925 9 10 11 12 

926 

927 >>> df.droplevel('a') 

928 level_1 c d 

929 level_2 e f 

930 b 

931 2 3 4 

932 6 7 8 

933 10 11 12 

934 

935 >>> df.droplevel('level_2', axis=1) 

936 level_1 c d 

937 a b 

938 1 2 3 4 

939 5 6 7 8 

940 9 10 11 12 

941 """ 

942 labels = self._get_axis(axis) 

943 new_labels = labels.droplevel(level) 

944 return self.set_axis(new_labels, axis=axis, copy=None) 

945 

946 def pop(self, item: Hashable) -> Series | Any: 

947 result = self[item] 

948 del self[item] 

949 

950 return result 

951 

952 @final 

953 def squeeze(self, axis: Axis | None = None): 

954 """ 

955 Squeeze 1 dimensional axis objects into scalars. 

956 

957 Series or DataFrames with a single element are squeezed to a scalar. 

958 DataFrames with a single column or a single row are squeezed to a 

959 Series. Otherwise the object is unchanged. 

960 

961 This method is most useful when you don't know if your 

962 object is a Series or DataFrame, but you do know it has just a single 

963 column. In that case you can safely call `squeeze` to ensure you have a 

964 Series. 

965 

966 Parameters 

967 ---------- 

968 axis : {0 or 'index', 1 or 'columns', None}, default None 

969 A specific axis to squeeze. By default, all length-1 axes are 

970 squeezed. For `Series` this parameter is unused and defaults to `None`. 

971 

972 Returns 

973 ------- 

974 DataFrame, Series, or scalar 

975 The projection after squeezing `axis` or all the axes. 

976 

977 See Also 

978 -------- 

979 Series.iloc : Integer-location based indexing for selecting scalars. 

980 DataFrame.iloc : Integer-location based indexing for selecting Series. 

981 Series.to_frame : Inverse of DataFrame.squeeze for a 

982 single-column DataFrame. 

983 

984 Examples 

985 -------- 

986 >>> primes = pd.Series([2, 3, 5, 7]) 

987 

988 Slicing might produce a Series with a single value: 

989 

990 >>> even_primes = primes[primes % 2 == 0] 

991 >>> even_primes 

992 0 2 

993 dtype: int64 

994 

995 >>> even_primes.squeeze() 

996 2 

997 

998 Squeezing objects with more than one value in every axis does nothing: 

999 

1000 >>> odd_primes = primes[primes % 2 == 1] 

1001 >>> odd_primes 

1002 1 3 

1003 2 5 

1004 3 7 

1005 dtype: int64 

1006 

1007 >>> odd_primes.squeeze() 

1008 1 3 

1009 2 5 

1010 3 7 

1011 dtype: int64 

1012 

1013 Squeezing is even more effective when used with DataFrames. 

1014 

1015 >>> df = pd.DataFrame([[1, 2], [3, 4]], columns=['a', 'b']) 

1016 >>> df 

1017 a b 

1018 0 1 2 

1019 1 3 4 

1020 

1021 Slicing a single column will produce a DataFrame with the columns 

1022 having only one value: 

1023 

1024 >>> df_a = df[['a']] 

1025 >>> df_a 

1026 a 

1027 0 1 

1028 1 3 

1029 

1030 So the columns can be squeezed down, resulting in a Series: 

1031 

1032 >>> df_a.squeeze('columns') 

1033 0 1 

1034 1 3 

1035 Name: a, dtype: int64 

1036 

1037 Slicing a single row from a single column will produce a single 

1038 scalar DataFrame: 

1039 

1040 >>> df_0a = df.loc[df.index < 1, ['a']] 

1041 >>> df_0a 

1042 a 

1043 0 1 

1044 

1045 Squeezing the rows produces a single scalar Series: 

1046 

1047 >>> df_0a.squeeze('rows') 

1048 a 1 

1049 Name: 0, dtype: int64 

1050 

1051 Squeezing all axes will project directly into a scalar: 

1052 

1053 >>> df_0a.squeeze() 

1054 1 

1055 """ 

1056 axes = range(self._AXIS_LEN) if axis is None else (self._get_axis_number(axis),) 

1057 result = self.iloc[ 

1058 tuple( 

1059 0 if i in axes and len(a) == 1 else slice(None) 

1060 for i, a in enumerate(self.axes) 

1061 ) 

1062 ] 

1063 if isinstance(result, NDFrame): 

1064 result = result.__finalize__(self, method="squeeze") 

1065 return result 

1066 

1067 # ---------------------------------------------------------------------- 

1068 # Rename 

1069 

1070 @final 

1071 def _rename( 

1072 self, 

1073 mapper: Renamer | None = None, 

1074 *, 

1075 index: Renamer | None = None, 

1076 columns: Renamer | None = None, 

1077 axis: Axis | None = None, 

1078 copy: bool_t | None = None, 

1079 inplace: bool_t = False, 

1080 level: Level | None = None, 

1081 errors: str = "ignore", 

1082 ) -> Self | None: 

1083 # called by Series.rename and DataFrame.rename 

1084 

1085 if mapper is None and index is None and columns is None: 

1086 raise TypeError("must pass an index to rename") 

1087 

1088 if index is not None or columns is not None: 

1089 if axis is not None: 

1090 raise TypeError( 

1091 "Cannot specify both 'axis' and any of 'index' or 'columns'" 

1092 ) 

1093 if mapper is not None: 

1094 raise TypeError( 

1095 "Cannot specify both 'mapper' and any of 'index' or 'columns'" 

1096 ) 

1097 else: 

1098 # use the mapper argument 

1099 if axis and self._get_axis_number(axis) == 1: 

1100 columns = mapper 

1101 else: 

1102 index = mapper 

1103 

1104 self._check_inplace_and_allows_duplicate_labels(inplace) 

1105 result = self if inplace else self.copy(deep=copy and not using_copy_on_write()) 

1106 

1107 for axis_no, replacements in enumerate((index, columns)): 

1108 if replacements is None: 

1109 continue 

1110 

1111 ax = self._get_axis(axis_no) 

1112 f = common.get_rename_function(replacements) 

1113 

1114 if level is not None: 

1115 level = ax._get_level_number(level) 

1116 

1117 # GH 13473 

1118 if not callable(replacements): 

1119 if ax._is_multi and level is not None: 

1120 indexer = ax.get_level_values(level).get_indexer_for(replacements) 

1121 else: 

1122 indexer = ax.get_indexer_for(replacements) 

1123 

1124 if errors == "raise" and len(indexer[indexer == -1]): 

1125 missing_labels = [ 

1126 label 

1127 for index, label in enumerate(replacements) 

1128 if indexer[index] == -1 

1129 ] 

1130 raise KeyError(f"{missing_labels} not found in axis") 

1131 

1132 new_index = ax._transform_index(f, level=level) 

1133 result._set_axis_nocheck(new_index, axis=axis_no, inplace=True, copy=False) 

1134 result._clear_item_cache() 

1135 

1136 if inplace: 

1137 self._update_inplace(result) 

1138 return None 

1139 else: 

1140 return result.__finalize__(self, method="rename") 

1141 

1142 @overload 

1143 def rename_axis( 

1144 self, 

1145 mapper: IndexLabel | lib.NoDefault = ..., 

1146 *, 

1147 index=..., 

1148 columns=..., 

1149 axis: Axis = ..., 

1150 copy: bool_t | None = ..., 

1151 inplace: Literal[False] = ..., 

1152 ) -> Self: 

1153 ... 

1154 

1155 @overload 

1156 def rename_axis( 

1157 self, 

1158 mapper: IndexLabel | lib.NoDefault = ..., 

1159 *, 

1160 index=..., 

1161 columns=..., 

1162 axis: Axis = ..., 

1163 copy: bool_t | None = ..., 

1164 inplace: Literal[True], 

1165 ) -> None: 

1166 ... 

1167 

1168 @overload 

1169 def rename_axis( 

1170 self, 

1171 mapper: IndexLabel | lib.NoDefault = ..., 

1172 *, 

1173 index=..., 

1174 columns=..., 

1175 axis: Axis = ..., 

1176 copy: bool_t | None = ..., 

1177 inplace: bool_t = ..., 

1178 ) -> Self | None: 

1179 ... 

1180 

1181 def rename_axis( 

1182 self, 

1183 mapper: IndexLabel | lib.NoDefault = lib.no_default, 

1184 *, 

1185 index=lib.no_default, 

1186 columns=lib.no_default, 

1187 axis: Axis = 0, 

1188 copy: bool_t | None = None, 

1189 inplace: bool_t = False, 

1190 ) -> Self | None: 

1191 """ 

1192 Set the name of the axis for the index or columns. 

1193 

1194 Parameters 

1195 ---------- 

1196 mapper : scalar, list-like, optional 

1197 Value to set the axis name attribute. 

1198 index, columns : scalar, list-like, dict-like or function, optional 

1199 A scalar, list-like, dict-like or functions transformations to 

1200 apply to that axis' values. 

1201 Note that the ``columns`` parameter is not allowed if the 

1202 object is a Series. This parameter only apply for DataFrame 

1203 type objects. 

1204 

1205 Use either ``mapper`` and ``axis`` to 

1206 specify the axis to target with ``mapper``, or ``index`` 

1207 and/or ``columns``. 

1208 axis : {0 or 'index', 1 or 'columns'}, default 0 

1209 The axis to rename. For `Series` this parameter is unused and defaults to 0. 

1210 copy : bool, default None 

1211 Also copy underlying data. 

1212 

1213 .. note:: 

1214 The `copy` keyword will change behavior in pandas 3.0. 

1215 `Copy-on-Write 

1216 <https://pandas.pydata.org/docs/dev/user_guide/copy_on_write.html>`__ 

1217 will be enabled by default, which means that all methods with a 

1218 `copy` keyword will use a lazy copy mechanism to defer the copy and 

1219 ignore the `copy` keyword. The `copy` keyword will be removed in a 

1220 future version of pandas. 

1221 

1222 You can already get the future behavior and improvements through 

1223 enabling copy on write ``pd.options.mode.copy_on_write = True`` 

1224 inplace : bool, default False 

1225 Modifies the object directly, instead of creating a new Series 

1226 or DataFrame. 

1227 

1228 Returns 

1229 ------- 

1230 Series, DataFrame, or None 

1231 The same type as the caller or None if ``inplace=True``. 

1232 

1233 See Also 

1234 -------- 

1235 Series.rename : Alter Series index labels or name. 

1236 DataFrame.rename : Alter DataFrame index labels or name. 

1237 Index.rename : Set new names on index. 

1238 

1239 Notes 

1240 ----- 

1241 ``DataFrame.rename_axis`` supports two calling conventions 

1242 

1243 * ``(index=index_mapper, columns=columns_mapper, ...)`` 

1244 * ``(mapper, axis={'index', 'columns'}, ...)`` 

1245 

1246 The first calling convention will only modify the names of 

1247 the index and/or the names of the Index object that is the columns. 

1248 In this case, the parameter ``copy`` is ignored. 

1249 

1250 The second calling convention will modify the names of the 

1251 corresponding index if mapper is a list or a scalar. 

1252 However, if mapper is dict-like or a function, it will use the 

1253 deprecated behavior of modifying the axis *labels*. 

1254 

1255 We *highly* recommend using keyword arguments to clarify your 

1256 intent. 

1257 

1258 Examples 

1259 -------- 

1260 **Series** 

1261 

1262 >>> s = pd.Series(["dog", "cat", "monkey"]) 

1263 >>> s 

1264 0 dog 

1265 1 cat 

1266 2 monkey 

1267 dtype: object 

1268 >>> s.rename_axis("animal") 

1269 animal 

1270 0 dog 

1271 1 cat 

1272 2 monkey 

1273 dtype: object 

1274 

1275 **DataFrame** 

1276 

1277 >>> df = pd.DataFrame({"num_legs": [4, 4, 2], 

1278 ... "num_arms": [0, 0, 2]}, 

1279 ... ["dog", "cat", "monkey"]) 

1280 >>> df 

1281 num_legs num_arms 

1282 dog 4 0 

1283 cat 4 0 

1284 monkey 2 2 

1285 >>> df = df.rename_axis("animal") 

1286 >>> df 

1287 num_legs num_arms 

1288 animal 

1289 dog 4 0 

1290 cat 4 0 

1291 monkey 2 2 

1292 >>> df = df.rename_axis("limbs", axis="columns") 

1293 >>> df 

1294 limbs num_legs num_arms 

1295 animal 

1296 dog 4 0 

1297 cat 4 0 

1298 monkey 2 2 

1299 

1300 **MultiIndex** 

1301 

1302 >>> df.index = pd.MultiIndex.from_product([['mammal'], 

1303 ... ['dog', 'cat', 'monkey']], 

1304 ... names=['type', 'name']) 

1305 >>> df 

1306 limbs num_legs num_arms 

1307 type name 

1308 mammal dog 4 0 

1309 cat 4 0 

1310 monkey 2 2 

1311 

1312 >>> df.rename_axis(index={'type': 'class'}) 

1313 limbs num_legs num_arms 

1314 class name 

1315 mammal dog 4 0 

1316 cat 4 0 

1317 monkey 2 2 

1318 

1319 >>> df.rename_axis(columns=str.upper) 

1320 LIMBS num_legs num_arms 

1321 type name 

1322 mammal dog 4 0 

1323 cat 4 0 

1324 monkey 2 2 

1325 """ 

1326 axes = {"index": index, "columns": columns} 

1327 

1328 if axis is not None: 

1329 axis = self._get_axis_number(axis) 

1330 

1331 inplace = validate_bool_kwarg(inplace, "inplace") 

1332 

1333 if copy and using_copy_on_write(): 

1334 copy = False 

1335 

1336 if mapper is not lib.no_default: 

1337 # Use v0.23 behavior if a scalar or list 

1338 non_mapper = is_scalar(mapper) or ( 

1339 is_list_like(mapper) and not is_dict_like(mapper) 

1340 ) 

1341 if non_mapper: 

1342 return self._set_axis_name( 

1343 mapper, axis=axis, inplace=inplace, copy=copy 

1344 ) 

1345 else: 

1346 raise ValueError("Use `.rename` to alter labels with a mapper.") 

1347 else: 

1348 # Use new behavior. Means that index and/or columns 

1349 # is specified 

1350 result = self if inplace else self.copy(deep=copy) 

1351 

1352 for axis in range(self._AXIS_LEN): 

1353 v = axes.get(self._get_axis_name(axis)) 

1354 if v is lib.no_default: 

1355 continue 

1356 non_mapper = is_scalar(v) or (is_list_like(v) and not is_dict_like(v)) 

1357 if non_mapper: 

1358 newnames = v 

1359 else: 

1360 f = common.get_rename_function(v) 

1361 curnames = self._get_axis(axis).names 

1362 newnames = [f(name) for name in curnames] 

1363 result._set_axis_name(newnames, axis=axis, inplace=True, copy=copy) 

1364 if not inplace: 

1365 return result 

1366 return None 

1367 

1368 @final 

1369 def _set_axis_name( 

1370 self, name, axis: Axis = 0, inplace: bool_t = False, copy: bool_t | None = True 

1371 ): 

1372 """ 

1373 Set the name(s) of the axis. 

1374 

1375 Parameters 

1376 ---------- 

1377 name : str or list of str 

1378 Name(s) to set. 

1379 axis : {0 or 'index', 1 or 'columns'}, default 0 

1380 The axis to set the label. The value 0 or 'index' specifies index, 

1381 and the value 1 or 'columns' specifies columns. 

1382 inplace : bool, default False 

1383 If `True`, do operation inplace and return None. 

1384 copy: 

1385 Whether to make a copy of the result. 

1386 

1387 Returns 

1388 ------- 

1389 Series, DataFrame, or None 

1390 The same type as the caller or `None` if `inplace` is `True`. 

1391 

1392 See Also 

1393 -------- 

1394 DataFrame.rename : Alter the axis labels of :class:`DataFrame`. 

1395 Series.rename : Alter the index labels or set the index name 

1396 of :class:`Series`. 

1397 Index.rename : Set the name of :class:`Index` or :class:`MultiIndex`. 

1398 

1399 Examples 

1400 -------- 

1401 >>> df = pd.DataFrame({"num_legs": [4, 4, 2]}, 

1402 ... ["dog", "cat", "monkey"]) 

1403 >>> df 

1404 num_legs 

1405 dog 4 

1406 cat 4 

1407 monkey 2 

1408 >>> df._set_axis_name("animal") 

1409 num_legs 

1410 animal 

1411 dog 4 

1412 cat 4 

1413 monkey 2 

1414 >>> df.index = pd.MultiIndex.from_product( 

1415 ... [["mammal"], ['dog', 'cat', 'monkey']]) 

1416 >>> df._set_axis_name(["type", "name"]) 

1417 num_legs 

1418 type name 

1419 mammal dog 4 

1420 cat 4 

1421 monkey 2 

1422 """ 

1423 axis = self._get_axis_number(axis) 

1424 idx = self._get_axis(axis).set_names(name) 

1425 

1426 inplace = validate_bool_kwarg(inplace, "inplace") 

1427 renamed = self if inplace else self.copy(deep=copy) 

1428 if axis == 0: 

1429 renamed.index = idx 

1430 else: 

1431 renamed.columns = idx 

1432 

1433 if not inplace: 

1434 return renamed 

1435 

1436 # ---------------------------------------------------------------------- 

1437 # Comparison Methods 

1438 

1439 @final 

1440 def _indexed_same(self, other) -> bool_t: 

1441 return all( 

1442 self._get_axis(a).equals(other._get_axis(a)) for a in self._AXIS_ORDERS 

1443 ) 

1444 

1445 @final 

1446 def equals(self, other: object) -> bool_t: 

1447 """ 

1448 Test whether two objects contain the same elements. 

1449 

1450 This function allows two Series or DataFrames to be compared against 

1451 each other to see if they have the same shape and elements. NaNs in 

1452 the same location are considered equal. 

1453 

1454 The row/column index do not need to have the same type, as long 

1455 as the values are considered equal. Corresponding columns and 

1456 index must be of the same dtype. 

1457 

1458 Parameters 

1459 ---------- 

1460 other : Series or DataFrame 

1461 The other Series or DataFrame to be compared with the first. 

1462 

1463 Returns 

1464 ------- 

1465 bool 

1466 True if all elements are the same in both objects, False 

1467 otherwise. 

1468 

1469 See Also 

1470 -------- 

1471 Series.eq : Compare two Series objects of the same length 

1472 and return a Series where each element is True if the element 

1473 in each Series is equal, False otherwise. 

1474 DataFrame.eq : Compare two DataFrame objects of the same shape and 

1475 return a DataFrame where each element is True if the respective 

1476 element in each DataFrame is equal, False otherwise. 

1477 testing.assert_series_equal : Raises an AssertionError if left and 

1478 right are not equal. Provides an easy interface to ignore 

1479 inequality in dtypes, indexes and precision among others. 

1480 testing.assert_frame_equal : Like assert_series_equal, but targets 

1481 DataFrames. 

1482 numpy.array_equal : Return True if two arrays have the same shape 

1483 and elements, False otherwise. 

1484 

1485 Examples 

1486 -------- 

1487 >>> df = pd.DataFrame({1: [10], 2: [20]}) 

1488 >>> df 

1489 1 2 

1490 0 10 20 

1491 

1492 DataFrames df and exactly_equal have the same types and values for 

1493 their elements and column labels, which will return True. 

1494 

1495 >>> exactly_equal = pd.DataFrame({1: [10], 2: [20]}) 

1496 >>> exactly_equal 

1497 1 2 

1498 0 10 20 

1499 >>> df.equals(exactly_equal) 

1500 True 

1501 

1502 DataFrames df and different_column_type have the same element 

1503 types and values, but have different types for the column labels, 

1504 which will still return True. 

1505 

1506 >>> different_column_type = pd.DataFrame({1.0: [10], 2.0: [20]}) 

1507 >>> different_column_type 

1508 1.0 2.0 

1509 0 10 20 

1510 >>> df.equals(different_column_type) 

1511 True 

1512 

1513 DataFrames df and different_data_type have different types for the 

1514 same values for their elements, and will return False even though 

1515 their column labels are the same values and types. 

1516 

1517 >>> different_data_type = pd.DataFrame({1: [10.0], 2: [20.0]}) 

1518 >>> different_data_type 

1519 1 2 

1520 0 10.0 20.0 

1521 >>> df.equals(different_data_type) 

1522 False 

1523 """ 

1524 if not (isinstance(other, type(self)) or isinstance(self, type(other))): 

1525 return False 

1526 other = cast(NDFrame, other) 

1527 return self._mgr.equals(other._mgr) 

1528 

1529 # ------------------------------------------------------------------------- 

1530 # Unary Methods 

1531 

1532 @final 

1533 def __neg__(self) -> Self: 

1534 def blk_func(values: ArrayLike): 

1535 if is_bool_dtype(values.dtype): 

1536 # error: Argument 1 to "inv" has incompatible type "Union 

1537 # [ExtensionArray, ndarray[Any, Any]]"; expected 

1538 # "_SupportsInversion[ndarray[Any, dtype[bool_]]]" 

1539 return operator.inv(values) # type: ignore[arg-type] 

1540 else: 

1541 # error: Argument 1 to "neg" has incompatible type "Union 

1542 # [ExtensionArray, ndarray[Any, Any]]"; expected 

1543 # "_SupportsNeg[ndarray[Any, dtype[Any]]]" 

1544 return operator.neg(values) # type: ignore[arg-type] 

1545 

1546 new_data = self._mgr.apply(blk_func) 

1547 res = self._constructor_from_mgr(new_data, axes=new_data.axes) 

1548 return res.__finalize__(self, method="__neg__") 

1549 

1550 @final 

1551 def __pos__(self) -> Self: 

1552 def blk_func(values: ArrayLike): 

1553 if is_bool_dtype(values.dtype): 

1554 return values.copy() 

1555 else: 

1556 # error: Argument 1 to "pos" has incompatible type "Union 

1557 # [ExtensionArray, ndarray[Any, Any]]"; expected 

1558 # "_SupportsPos[ndarray[Any, dtype[Any]]]" 

1559 return operator.pos(values) # type: ignore[arg-type] 

1560 

1561 new_data = self._mgr.apply(blk_func) 

1562 res = self._constructor_from_mgr(new_data, axes=new_data.axes) 

1563 return res.__finalize__(self, method="__pos__") 

1564 

1565 @final 

1566 def __invert__(self) -> Self: 

1567 if not self.size: 

1568 # inv fails with 0 len 

1569 return self.copy(deep=False) 

1570 

1571 new_data = self._mgr.apply(operator.invert) 

1572 res = self._constructor_from_mgr(new_data, axes=new_data.axes) 

1573 return res.__finalize__(self, method="__invert__") 

1574 

1575 @final 

1576 def __nonzero__(self) -> NoReturn: 

1577 raise ValueError( 

1578 f"The truth value of a {type(self).__name__} is ambiguous. " 

1579 "Use a.empty, a.bool(), a.item(), a.any() or a.all()." 

1580 ) 

1581 

1582 __bool__ = __nonzero__ 

1583 

1584 @final 

1585 def bool(self) -> bool_t: 

1586 """ 

1587 Return the bool of a single element Series or DataFrame. 

1588 

1589 .. deprecated:: 2.1.0 

1590 

1591 bool is deprecated and will be removed in future version of pandas. 

1592 For ``Series`` use ``pandas.Series.item``. 

1593 

1594 This must be a boolean scalar value, either True or False. It will raise a 

1595 ValueError if the Series or DataFrame does not have exactly 1 element, or that 

1596 element is not boolean (integer values 0 and 1 will also raise an exception). 

1597 

1598 Returns 

1599 ------- 

1600 bool 

1601 The value in the Series or DataFrame. 

1602 

1603 See Also 

1604 -------- 

1605 Series.astype : Change the data type of a Series, including to boolean. 

1606 DataFrame.astype : Change the data type of a DataFrame, including to boolean. 

1607 numpy.bool_ : NumPy boolean data type, used by pandas for boolean values. 

1608 

1609 Examples 

1610 -------- 

1611 The method will only work for single element objects with a boolean value: 

1612 

1613 >>> pd.Series([True]).bool() # doctest: +SKIP 

1614 True 

1615 >>> pd.Series([False]).bool() # doctest: +SKIP 

1616 False 

1617 

1618 >>> pd.DataFrame({'col': [True]}).bool() # doctest: +SKIP 

1619 True 

1620 >>> pd.DataFrame({'col': [False]}).bool() # doctest: +SKIP 

1621 False 

1622 

1623 This is an alternative method and will only work 

1624 for single element objects with a boolean value: 

1625 

1626 >>> pd.Series([True]).item() # doctest: +SKIP 

1627 True 

1628 >>> pd.Series([False]).item() # doctest: +SKIP 

1629 False 

1630 """ 

1631 

1632 warnings.warn( 

1633 f"{type(self).__name__}.bool is now deprecated and will be removed " 

1634 "in future version of pandas", 

1635 FutureWarning, 

1636 stacklevel=find_stack_level(), 

1637 ) 

1638 v = self.squeeze() 

1639 if isinstance(v, (bool, np.bool_)): 

1640 return bool(v) 

1641 elif is_scalar(v): 

1642 raise ValueError( 

1643 "bool cannot act on a non-boolean single element " 

1644 f"{type(self).__name__}" 

1645 ) 

1646 

1647 self.__nonzero__() 

1648 # for mypy (__nonzero__ raises) 

1649 return True 

1650 

1651 @final 

1652 def abs(self) -> Self: 

1653 """ 

1654 Return a Series/DataFrame with absolute numeric value of each element. 

1655 

1656 This function only applies to elements that are all numeric. 

1657 

1658 Returns 

1659 ------- 

1660 abs 

1661 Series/DataFrame containing the absolute value of each element. 

1662 

1663 See Also 

1664 -------- 

1665 numpy.absolute : Calculate the absolute value element-wise. 

1666 

1667 Notes 

1668 ----- 

1669 For ``complex`` inputs, ``1.2 + 1j``, the absolute value is 

1670 :math:`\\sqrt{ a^2 + b^2 }`. 

1671 

1672 Examples 

1673 -------- 

1674 Absolute numeric values in a Series. 

1675 

1676 >>> s = pd.Series([-1.10, 2, -3.33, 4]) 

1677 >>> s.abs() 

1678 0 1.10 

1679 1 2.00 

1680 2 3.33 

1681 3 4.00 

1682 dtype: float64 

1683 

1684 Absolute numeric values in a Series with complex numbers. 

1685 

1686 >>> s = pd.Series([1.2 + 1j]) 

1687 >>> s.abs() 

1688 0 1.56205 

1689 dtype: float64 

1690 

1691 Absolute numeric values in a Series with a Timedelta element. 

1692 

1693 >>> s = pd.Series([pd.Timedelta('1 days')]) 

1694 >>> s.abs() 

1695 0 1 days 

1696 dtype: timedelta64[ns] 

1697 

1698 Select rows with data closest to certain value using argsort (from 

1699 `StackOverflow <https://stackoverflow.com/a/17758115>`__). 

1700 

1701 >>> df = pd.DataFrame({ 

1702 ... 'a': [4, 5, 6, 7], 

1703 ... 'b': [10, 20, 30, 40], 

1704 ... 'c': [100, 50, -30, -50] 

1705 ... }) 

1706 >>> df 

1707 a b c 

1708 0 4 10 100 

1709 1 5 20 50 

1710 2 6 30 -30 

1711 3 7 40 -50 

1712 >>> df.loc[(df.c - 43).abs().argsort()] 

1713 a b c 

1714 1 5 20 50 

1715 0 4 10 100 

1716 2 6 30 -30 

1717 3 7 40 -50 

1718 """ 

1719 res_mgr = self._mgr.apply(np.abs) 

1720 return self._constructor_from_mgr(res_mgr, axes=res_mgr.axes).__finalize__( 

1721 self, name="abs" 

1722 ) 

1723 

1724 @final 

1725 def __abs__(self) -> Self: 

1726 return self.abs() 

1727 

1728 @final 

1729 def __round__(self, decimals: int = 0) -> Self: 

1730 return self.round(decimals).__finalize__(self, method="__round__") 

1731 

1732 # ------------------------------------------------------------------------- 

1733 # Label or Level Combination Helpers 

1734 # 

1735 # A collection of helper methods for DataFrame/Series operations that 

1736 # accept a combination of column/index labels and levels. All such 

1737 # operations should utilize/extend these methods when possible so that we 

1738 # have consistent precedence and validation logic throughout the library. 

1739 

1740 @final 

1741 def _is_level_reference(self, key: Level, axis: Axis = 0) -> bool_t: 

1742 """ 

1743 Test whether a key is a level reference for a given axis. 

1744 

1745 To be considered a level reference, `key` must be a string that: 

1746 - (axis=0): Matches the name of an index level and does NOT match 

1747 a column label. 

1748 - (axis=1): Matches the name of a column level and does NOT match 

1749 an index label. 

1750 

1751 Parameters 

1752 ---------- 

1753 key : Hashable 

1754 Potential level name for the given axis 

1755 axis : int, default 0 

1756 Axis that levels are associated with (0 for index, 1 for columns) 

1757 

1758 Returns 

1759 ------- 

1760 is_level : bool 

1761 """ 

1762 axis_int = self._get_axis_number(axis) 

1763 

1764 return ( 

1765 key is not None 

1766 and is_hashable(key) 

1767 and key in self.axes[axis_int].names 

1768 and not self._is_label_reference(key, axis=axis_int) 

1769 ) 

1770 

1771 @final 

1772 def _is_label_reference(self, key: Level, axis: Axis = 0) -> bool_t: 

1773 """ 

1774 Test whether a key is a label reference for a given axis. 

1775 

1776 To be considered a label reference, `key` must be a string that: 

1777 - (axis=0): Matches a column label 

1778 - (axis=1): Matches an index label 

1779 

1780 Parameters 

1781 ---------- 

1782 key : Hashable 

1783 Potential label name, i.e. Index entry. 

1784 axis : int, default 0 

1785 Axis perpendicular to the axis that labels are associated with 

1786 (0 means search for column labels, 1 means search for index labels) 

1787 

1788 Returns 

1789 ------- 

1790 is_label: bool 

1791 """ 

1792 axis_int = self._get_axis_number(axis) 

1793 other_axes = (ax for ax in range(self._AXIS_LEN) if ax != axis_int) 

1794 

1795 return ( 

1796 key is not None 

1797 and is_hashable(key) 

1798 and any(key in self.axes[ax] for ax in other_axes) 

1799 ) 

1800 

1801 @final 

1802 def _is_label_or_level_reference(self, key: Level, axis: AxisInt = 0) -> bool_t: 

1803 """ 

1804 Test whether a key is a label or level reference for a given axis. 

1805 

1806 To be considered either a label or a level reference, `key` must be a 

1807 string that: 

1808 - (axis=0): Matches a column label or an index level 

1809 - (axis=1): Matches an index label or a column level 

1810 

1811 Parameters 

1812 ---------- 

1813 key : Hashable 

1814 Potential label or level name 

1815 axis : int, default 0 

1816 Axis that levels are associated with (0 for index, 1 for columns) 

1817 

1818 Returns 

1819 ------- 

1820 bool 

1821 """ 

1822 return self._is_level_reference(key, axis=axis) or self._is_label_reference( 

1823 key, axis=axis 

1824 ) 

1825 

1826 @final 

1827 def _check_label_or_level_ambiguity(self, key: Level, axis: Axis = 0) -> None: 

1828 """ 

1829 Check whether `key` is ambiguous. 

1830 

1831 By ambiguous, we mean that it matches both a level of the input 

1832 `axis` and a label of the other axis. 

1833 

1834 Parameters 

1835 ---------- 

1836 key : Hashable 

1837 Label or level name. 

1838 axis : int, default 0 

1839 Axis that levels are associated with (0 for index, 1 for columns). 

1840 

1841 Raises 

1842 ------ 

1843 ValueError: `key` is ambiguous 

1844 """ 

1845 

1846 axis_int = self._get_axis_number(axis) 

1847 other_axes = (ax for ax in range(self._AXIS_LEN) if ax != axis_int) 

1848 

1849 if ( 

1850 key is not None 

1851 and is_hashable(key) 

1852 and key in self.axes[axis_int].names 

1853 and any(key in self.axes[ax] for ax in other_axes) 

1854 ): 

1855 # Build an informative and grammatical warning 

1856 level_article, level_type = ( 

1857 ("an", "index") if axis_int == 0 else ("a", "column") 

1858 ) 

1859 

1860 label_article, label_type = ( 

1861 ("a", "column") if axis_int == 0 else ("an", "index") 

1862 ) 

1863 

1864 msg = ( 

1865 f"'{key}' is both {level_article} {level_type} level and " 

1866 f"{label_article} {label_type} label, which is ambiguous." 

1867 ) 

1868 raise ValueError(msg) 

1869 

1870 @final 

1871 def _get_label_or_level_values(self, key: Level, axis: AxisInt = 0) -> ArrayLike: 

1872 """ 

1873 Return a 1-D array of values associated with `key`, a label or level 

1874 from the given `axis`. 

1875 

1876 Retrieval logic: 

1877 - (axis=0): Return column values if `key` matches a column label. 

1878 Otherwise return index level values if `key` matches an index 

1879 level. 

1880 - (axis=1): Return row values if `key` matches an index label. 

1881 Otherwise return column level values if 'key' matches a column 

1882 level 

1883 

1884 Parameters 

1885 ---------- 

1886 key : Hashable 

1887 Label or level name. 

1888 axis : int, default 0 

1889 Axis that levels are associated with (0 for index, 1 for columns) 

1890 

1891 Returns 

1892 ------- 

1893 np.ndarray or ExtensionArray 

1894 

1895 Raises 

1896 ------ 

1897 KeyError 

1898 if `key` matches neither a label nor a level 

1899 ValueError 

1900 if `key` matches multiple labels 

1901 """ 

1902 axis = self._get_axis_number(axis) 

1903 other_axes = [ax for ax in range(self._AXIS_LEN) if ax != axis] 

1904 

1905 if self._is_label_reference(key, axis=axis): 

1906 self._check_label_or_level_ambiguity(key, axis=axis) 

1907 values = self.xs(key, axis=other_axes[0])._values 

1908 elif self._is_level_reference(key, axis=axis): 

1909 values = self.axes[axis].get_level_values(key)._values 

1910 else: 

1911 raise KeyError(key) 

1912 

1913 # Check for duplicates 

1914 if values.ndim > 1: 

1915 if other_axes and isinstance(self._get_axis(other_axes[0]), MultiIndex): 

1916 multi_message = ( 

1917 "\n" 

1918 "For a multi-index, the label must be a " 

1919 "tuple with elements corresponding to each level." 

1920 ) 

1921 else: 

1922 multi_message = "" 

1923 

1924 label_axis_name = "column" if axis == 0 else "index" 

1925 raise ValueError( 

1926 f"The {label_axis_name} label '{key}' is not unique.{multi_message}" 

1927 ) 

1928 

1929 return values 

1930 

1931 @final 

1932 def _drop_labels_or_levels(self, keys, axis: AxisInt = 0): 

1933 """ 

1934 Drop labels and/or levels for the given `axis`. 

1935 

1936 For each key in `keys`: 

1937 - (axis=0): If key matches a column label then drop the column. 

1938 Otherwise if key matches an index level then drop the level. 

1939 - (axis=1): If key matches an index label then drop the row. 

1940 Otherwise if key matches a column level then drop the level. 

1941 

1942 Parameters 

1943 ---------- 

1944 keys : str or list of str 

1945 labels or levels to drop 

1946 axis : int, default 0 

1947 Axis that levels are associated with (0 for index, 1 for columns) 

1948 

1949 Returns 

1950 ------- 

1951 dropped: DataFrame 

1952 

1953 Raises 

1954 ------ 

1955 ValueError 

1956 if any `keys` match neither a label nor a level 

1957 """ 

1958 axis = self._get_axis_number(axis) 

1959 

1960 # Validate keys 

1961 keys = common.maybe_make_list(keys) 

1962 invalid_keys = [ 

1963 k for k in keys if not self._is_label_or_level_reference(k, axis=axis) 

1964 ] 

1965 

1966 if invalid_keys: 

1967 raise ValueError( 

1968 "The following keys are not valid labels or " 

1969 f"levels for axis {axis}: {invalid_keys}" 

1970 ) 

1971 

1972 # Compute levels and labels to drop 

1973 levels_to_drop = [k for k in keys if self._is_level_reference(k, axis=axis)] 

1974 

1975 labels_to_drop = [k for k in keys if not self._is_level_reference(k, axis=axis)] 

1976 

1977 # Perform copy upfront and then use inplace operations below. 

1978 # This ensures that we always perform exactly one copy. 

1979 # ``copy`` and/or ``inplace`` options could be added in the future. 

1980 dropped = self.copy(deep=False) 

1981 

1982 if axis == 0: 

1983 # Handle dropping index levels 

1984 if levels_to_drop: 

1985 dropped.reset_index(levels_to_drop, drop=True, inplace=True) 

1986 

1987 # Handle dropping columns labels 

1988 if labels_to_drop: 

1989 dropped.drop(labels_to_drop, axis=1, inplace=True) 

1990 else: 

1991 # Handle dropping column levels 

1992 if levels_to_drop: 

1993 if isinstance(dropped.columns, MultiIndex): 

1994 # Drop the specified levels from the MultiIndex 

1995 dropped.columns = dropped.columns.droplevel(levels_to_drop) 

1996 else: 

1997 # Drop the last level of Index by replacing with 

1998 # a RangeIndex 

1999 dropped.columns = RangeIndex(dropped.columns.size) 

2000 

2001 # Handle dropping index labels 

2002 if labels_to_drop: 

2003 dropped.drop(labels_to_drop, axis=0, inplace=True) 

2004 

2005 return dropped 

2006 

2007 # ---------------------------------------------------------------------- 

2008 # Iteration 

2009 

2010 # https://github.com/python/typeshed/issues/2148#issuecomment-520783318 

2011 # Incompatible types in assignment (expression has type "None", base class 

2012 # "object" defined the type as "Callable[[object], int]") 

2013 __hash__: ClassVar[None] # type: ignore[assignment] 

2014 

2015 def __iter__(self) -> Iterator: 

2016 """ 

2017 Iterate over info axis. 

2018 

2019 Returns 

2020 ------- 

2021 iterator 

2022 Info axis as iterator. 

2023 

2024 Examples 

2025 -------- 

2026 >>> df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}) 

2027 >>> for x in df: 

2028 ... print(x) 

2029 A 

2030 B 

2031 """ 

2032 return iter(self._info_axis) 

2033 

2034 # can we get a better explanation of this? 

2035 def keys(self) -> Index: 

2036 """ 

2037 Get the 'info axis' (see Indexing for more). 

2038 

2039 This is index for Series, columns for DataFrame. 

2040 

2041 Returns 

2042 ------- 

2043 Index 

2044 Info axis. 

2045 

2046 Examples 

2047 -------- 

2048 >>> d = pd.DataFrame(data={'A': [1, 2, 3], 'B': [0, 4, 8]}, 

2049 ... index=['a', 'b', 'c']) 

2050 >>> d 

2051 A B 

2052 a 1 0 

2053 b 2 4 

2054 c 3 8 

2055 >>> d.keys() 

2056 Index(['A', 'B'], dtype='object') 

2057 """ 

2058 return self._info_axis 

2059 

2060 def items(self): 

2061 """ 

2062 Iterate over (label, values) on info axis 

2063 

2064 This is index for Series and columns for DataFrame. 

2065 

2066 Returns 

2067 ------- 

2068 Generator 

2069 """ 

2070 for h in self._info_axis: 

2071 yield h, self[h] 

2072 

2073 def __len__(self) -> int: 

2074 """Returns length of info axis""" 

2075 return len(self._info_axis) 

2076 

2077 @final 

2078 def __contains__(self, key) -> bool_t: 

2079 """True if the key is in the info axis""" 

2080 return key in self._info_axis 

2081 

2082 @property 

2083 def empty(self) -> bool_t: 

2084 """ 

2085 Indicator whether Series/DataFrame is empty. 

2086 

2087 True if Series/DataFrame is entirely empty (no items), meaning any of the 

2088 axes are of length 0. 

2089 

2090 Returns 

2091 ------- 

2092 bool 

2093 If Series/DataFrame is empty, return True, if not return False. 

2094 

2095 See Also 

2096 -------- 

2097 Series.dropna : Return series without null values. 

2098 DataFrame.dropna : Return DataFrame with labels on given axis omitted 

2099 where (all or any) data are missing. 

2100 

2101 Notes 

2102 ----- 

2103 If Series/DataFrame contains only NaNs, it is still not considered empty. See 

2104 the example below. 

2105 

2106 Examples 

2107 -------- 

2108 An example of an actual empty DataFrame. Notice the index is empty: 

2109 

2110 >>> df_empty = pd.DataFrame({'A' : []}) 

2111 >>> df_empty 

2112 Empty DataFrame 

2113 Columns: [A] 

2114 Index: [] 

2115 >>> df_empty.empty 

2116 True 

2117 

2118 If we only have NaNs in our DataFrame, it is not considered empty! We 

2119 will need to drop the NaNs to make the DataFrame empty: 

2120 

2121 >>> df = pd.DataFrame({'A' : [np.nan]}) 

2122 >>> df 

2123 A 

2124 0 NaN 

2125 >>> df.empty 

2126 False 

2127 >>> df.dropna().empty 

2128 True 

2129 

2130 >>> ser_empty = pd.Series({'A' : []}) 

2131 >>> ser_empty 

2132 A [] 

2133 dtype: object 

2134 >>> ser_empty.empty 

2135 False 

2136 >>> ser_empty = pd.Series() 

2137 >>> ser_empty.empty 

2138 True 

2139 """ 

2140 return any(len(self._get_axis(a)) == 0 for a in self._AXIS_ORDERS) 

2141 

2142 # ---------------------------------------------------------------------- 

2143 # Array Interface 

2144 

2145 # This is also set in IndexOpsMixin 

2146 # GH#23114 Ensure ndarray.__op__(DataFrame) returns NotImplemented 

2147 __array_priority__: int = 1000 

2148 

2149 def __array__( 

2150 self, dtype: npt.DTypeLike | None = None, copy: bool_t | None = None 

2151 ) -> np.ndarray: 

2152 values = self._values 

2153 arr = np.asarray(values, dtype=dtype) 

2154 if ( 

2155 astype_is_view(values.dtype, arr.dtype) 

2156 and using_copy_on_write() 

2157 and self._mgr.is_single_block 

2158 ): 

2159 # Check if both conversions can be done without a copy 

2160 if astype_is_view(self.dtypes.iloc[0], values.dtype) and astype_is_view( 

2161 values.dtype, arr.dtype 

2162 ): 

2163 arr = arr.view() 

2164 arr.flags.writeable = False 

2165 return arr 

2166 

2167 @final 

2168 def __array_ufunc__( 

2169 self, ufunc: np.ufunc, method: str, *inputs: Any, **kwargs: Any 

2170 ): 

2171 return arraylike.array_ufunc(self, ufunc, method, *inputs, **kwargs) 

2172 

2173 # ---------------------------------------------------------------------- 

2174 # Picklability 

2175 

2176 @final 

2177 def __getstate__(self) -> dict[str, Any]: 

2178 meta = {k: getattr(self, k, None) for k in self._metadata} 

2179 return { 

2180 "_mgr": self._mgr, 

2181 "_typ": self._typ, 

2182 "_metadata": self._metadata, 

2183 "attrs": self.attrs, 

2184 "_flags": {k: self.flags[k] for k in self.flags._keys}, 

2185 **meta, 

2186 } 

2187 

2188 @final 

2189 def __setstate__(self, state) -> None: 

2190 if isinstance(state, BlockManager): 

2191 self._mgr = state 

2192 elif isinstance(state, dict): 

2193 if "_data" in state and "_mgr" not in state: 

2194 # compat for older pickles 

2195 state["_mgr"] = state.pop("_data") 

2196 typ = state.get("_typ") 

2197 if typ is not None: 

2198 attrs = state.get("_attrs", {}) 

2199 if attrs is None: # should not happen, but better be on the safe side 

2200 attrs = {} 

2201 object.__setattr__(self, "_attrs", attrs) 

2202 flags = state.get("_flags", {"allows_duplicate_labels": True}) 

2203 object.__setattr__(self, "_flags", Flags(self, **flags)) 

2204 

2205 # set in the order of internal names 

2206 # to avoid definitional recursion 

2207 # e.g. say fill_value needing _mgr to be 

2208 # defined 

2209 meta = set(self._internal_names + self._metadata) 

2210 for k in list(meta): 

2211 if k in state and k != "_flags": 

2212 v = state[k] 

2213 object.__setattr__(self, k, v) 

2214 

2215 for k, v in state.items(): 

2216 if k not in meta: 

2217 object.__setattr__(self, k, v) 

2218 

2219 else: 

2220 raise NotImplementedError("Pre-0.12 pickles are no longer supported") 

2221 elif len(state) == 2: 

2222 raise NotImplementedError("Pre-0.12 pickles are no longer supported") 

2223 

2224 self._item_cache: dict[Hashable, Series] = {} 

2225 

2226 # ---------------------------------------------------------------------- 

2227 # Rendering Methods 

2228 

2229 def __repr__(self) -> str: 

2230 # string representation based upon iterating over self 

2231 # (since, by definition, `PandasContainers` are iterable) 

2232 prepr = f"[{','.join(map(pprint_thing, self))}]" 

2233 return f"{type(self).__name__}({prepr})" 

2234 

2235 @final 

2236 def _repr_latex_(self): 

2237 """ 

2238 Returns a LaTeX representation for a particular object. 

2239 Mainly for use with nbconvert (jupyter notebook conversion to pdf). 

2240 """ 

2241 if config.get_option("styler.render.repr") == "latex": 

2242 return self.to_latex() 

2243 else: 

2244 return None 

2245 

2246 @final 

2247 def _repr_data_resource_(self): 

2248 """ 

2249 Not a real Jupyter special repr method, but we use the same 

2250 naming convention. 

2251 """ 

2252 if config.get_option("display.html.table_schema"): 

2253 data = self.head(config.get_option("display.max_rows")) 

2254 

2255 as_json = data.to_json(orient="table") 

2256 as_json = cast(str, as_json) 

2257 return loads(as_json, object_pairs_hook=collections.OrderedDict) 

2258 

2259 # ---------------------------------------------------------------------- 

2260 # I/O Methods 

2261 

2262 @final 

2263 @deprecate_nonkeyword_arguments( 

2264 version="3.0", allowed_args=["self", "excel_writer"], name="to_excel" 

2265 ) 

2266 @doc( 

2267 klass="object", 

2268 storage_options=_shared_docs["storage_options"], 

2269 storage_options_versionadded="1.2.0", 

2270 ) 

2271 def to_excel( 

2272 self, 

2273 excel_writer: FilePath | WriteExcelBuffer | ExcelWriter, 

2274 sheet_name: str = "Sheet1", 

2275 na_rep: str = "", 

2276 float_format: str | None = None, 

2277 columns: Sequence[Hashable] | None = None, 

2278 header: Sequence[Hashable] | bool_t = True, 

2279 index: bool_t = True, 

2280 index_label: IndexLabel | None = None, 

2281 startrow: int = 0, 

2282 startcol: int = 0, 

2283 engine: Literal["openpyxl", "xlsxwriter"] | None = None, 

2284 merge_cells: bool_t = True, 

2285 inf_rep: str = "inf", 

2286 freeze_panes: tuple[int, int] | None = None, 

2287 storage_options: StorageOptions | None = None, 

2288 engine_kwargs: dict[str, Any] | None = None, 

2289 ) -> None: 

2290 """ 

2291 Write {klass} to an Excel sheet. 

2292 

2293 To write a single {klass} to an Excel .xlsx file it is only necessary to 

2294 specify a target file name. To write to multiple sheets it is necessary to 

2295 create an `ExcelWriter` object with a target file name, and specify a sheet 

2296 in the file to write to. 

2297 

2298 Multiple sheets may be written to by specifying unique `sheet_name`. 

2299 With all data written to the file it is necessary to save the changes. 

2300 Note that creating an `ExcelWriter` object with a file name that already 

2301 exists will result in the contents of the existing file being erased. 

2302 

2303 Parameters 

2304 ---------- 

2305 excel_writer : path-like, file-like, or ExcelWriter object 

2306 File path or existing ExcelWriter. 

2307 sheet_name : str, default 'Sheet1' 

2308 Name of sheet which will contain DataFrame. 

2309 na_rep : str, default '' 

2310 Missing data representation. 

2311 float_format : str, optional 

2312 Format string for floating point numbers. For example 

2313 ``float_format="%.2f"`` will format 0.1234 to 0.12. 

2314 columns : sequence or list of str, optional 

2315 Columns to write. 

2316 header : bool or list of str, default True 

2317 Write out the column names. If a list of string is given it is 

2318 assumed to be aliases for the column names. 

2319 index : bool, default True 

2320 Write row names (index). 

2321 index_label : str or sequence, optional 

2322 Column label for index column(s) if desired. If not specified, and 

2323 `header` and `index` are True, then the index names are used. A 

2324 sequence should be given if the DataFrame uses MultiIndex. 

2325 startrow : int, default 0 

2326 Upper left cell row to dump data frame. 

2327 startcol : int, default 0 

2328 Upper left cell column to dump data frame. 

2329 engine : str, optional 

2330 Write engine to use, 'openpyxl' or 'xlsxwriter'. You can also set this 

2331 via the options ``io.excel.xlsx.writer`` or 

2332 ``io.excel.xlsm.writer``. 

2333 

2334 merge_cells : bool, default True 

2335 Write MultiIndex and Hierarchical Rows as merged cells. 

2336 inf_rep : str, default 'inf' 

2337 Representation for infinity (there is no native representation for 

2338 infinity in Excel). 

2339 freeze_panes : tuple of int (length 2), optional 

2340 Specifies the one-based bottommost row and rightmost column that 

2341 is to be frozen. 

2342 {storage_options} 

2343 

2344 .. versionadded:: {storage_options_versionadded} 

2345 engine_kwargs : dict, optional 

2346 Arbitrary keyword arguments passed to excel engine. 

2347 

2348 See Also 

2349 -------- 

2350 to_csv : Write DataFrame to a comma-separated values (csv) file. 

2351 ExcelWriter : Class for writing DataFrame objects into excel sheets. 

2352 read_excel : Read an Excel file into a pandas DataFrame. 

2353 read_csv : Read a comma-separated values (csv) file into DataFrame. 

2354 io.formats.style.Styler.to_excel : Add styles to Excel sheet. 

2355 

2356 Notes 

2357 ----- 

2358 For compatibility with :meth:`~DataFrame.to_csv`, 

2359 to_excel serializes lists and dicts to strings before writing. 

2360 

2361 Once a workbook has been saved it is not possible to write further 

2362 data without rewriting the whole workbook. 

2363 

2364 Examples 

2365 -------- 

2366 

2367 Create, write to and save a workbook: 

2368 

2369 >>> df1 = pd.DataFrame([['a', 'b'], ['c', 'd']], 

2370 ... index=['row 1', 'row 2'], 

2371 ... columns=['col 1', 'col 2']) 

2372 >>> df1.to_excel("output.xlsx") # doctest: +SKIP 

2373 

2374 To specify the sheet name: 

2375 

2376 >>> df1.to_excel("output.xlsx", 

2377 ... sheet_name='Sheet_name_1') # doctest: +SKIP 

2378 

2379 If you wish to write to more than one sheet in the workbook, it is 

2380 necessary to specify an ExcelWriter object: 

2381 

2382 >>> df2 = df1.copy() 

2383 >>> with pd.ExcelWriter('output.xlsx') as writer: # doctest: +SKIP 

2384 ... df1.to_excel(writer, sheet_name='Sheet_name_1') 

2385 ... df2.to_excel(writer, sheet_name='Sheet_name_2') 

2386 

2387 ExcelWriter can also be used to append to an existing Excel file: 

2388 

2389 >>> with pd.ExcelWriter('output.xlsx', 

2390 ... mode='a') as writer: # doctest: +SKIP 

2391 ... df1.to_excel(writer, sheet_name='Sheet_name_3') 

2392 

2393 To set the library that is used to write the Excel file, 

2394 you can pass the `engine` keyword (the default engine is 

2395 automatically chosen depending on the file extension): 

2396 

2397 >>> df1.to_excel('output1.xlsx', engine='xlsxwriter') # doctest: +SKIP 

2398 """ 

2399 if engine_kwargs is None: 

2400 engine_kwargs = {} 

2401 

2402 df = self if isinstance(self, ABCDataFrame) else self.to_frame() 

2403 

2404 from pandas.io.formats.excel import ExcelFormatter 

2405 

2406 formatter = ExcelFormatter( 

2407 df, 

2408 na_rep=na_rep, 

2409 cols=columns, 

2410 header=header, 

2411 float_format=float_format, 

2412 index=index, 

2413 index_label=index_label, 

2414 merge_cells=merge_cells, 

2415 inf_rep=inf_rep, 

2416 ) 

2417 formatter.write( 

2418 excel_writer, 

2419 sheet_name=sheet_name, 

2420 startrow=startrow, 

2421 startcol=startcol, 

2422 freeze_panes=freeze_panes, 

2423 engine=engine, 

2424 storage_options=storage_options, 

2425 engine_kwargs=engine_kwargs, 

2426 ) 

2427 

2428 @final 

2429 @deprecate_nonkeyword_arguments( 

2430 version="3.0", allowed_args=["self", "path_or_buf"], name="to_json" 

2431 ) 

2432 @doc( 

2433 storage_options=_shared_docs["storage_options"], 

2434 compression_options=_shared_docs["compression_options"] % "path_or_buf", 

2435 ) 

2436 def to_json( 

2437 self, 

2438 path_or_buf: FilePath | WriteBuffer[bytes] | WriteBuffer[str] | None = None, 

2439 orient: Literal["split", "records", "index", "table", "columns", "values"] 

2440 | None = None, 

2441 date_format: str | None = None, 

2442 double_precision: int = 10, 

2443 force_ascii: bool_t = True, 

2444 date_unit: TimeUnit = "ms", 

2445 default_handler: Callable[[Any], JSONSerializable] | None = None, 

2446 lines: bool_t = False, 

2447 compression: CompressionOptions = "infer", 

2448 index: bool_t | None = None, 

2449 indent: int | None = None, 

2450 storage_options: StorageOptions | None = None, 

2451 mode: Literal["a", "w"] = "w", 

2452 ) -> str | None: 

2453 """ 

2454 Convert the object to a JSON string. 

2455 

2456 Note NaN's and None will be converted to null and datetime objects 

2457 will be converted to UNIX timestamps. 

2458 

2459 Parameters 

2460 ---------- 

2461 path_or_buf : str, path object, file-like object, or None, default None 

2462 String, path object (implementing os.PathLike[str]), or file-like 

2463 object implementing a write() function. If None, the result is 

2464 returned as a string. 

2465 orient : str 

2466 Indication of expected JSON string format. 

2467 

2468 * Series: 

2469 

2470 - default is 'index' 

2471 - allowed values are: {{'split', 'records', 'index', 'table'}}. 

2472 

2473 * DataFrame: 

2474 

2475 - default is 'columns' 

2476 - allowed values are: {{'split', 'records', 'index', 'columns', 

2477 'values', 'table'}}. 

2478 

2479 * The format of the JSON string: 

2480 

2481 - 'split' : dict like {{'index' -> [index], 'columns' -> [columns], 

2482 'data' -> [values]}} 

2483 - 'records' : list like [{{column -> value}}, ... , {{column -> value}}] 

2484 - 'index' : dict like {{index -> {{column -> value}}}} 

2485 - 'columns' : dict like {{column -> {{index -> value}}}} 

2486 - 'values' : just the values array 

2487 - 'table' : dict like {{'schema': {{schema}}, 'data': {{data}}}} 

2488 

2489 Describing the data, where data component is like ``orient='records'``. 

2490 

2491 date_format : {{None, 'epoch', 'iso'}} 

2492 Type of date conversion. 'epoch' = epoch milliseconds, 

2493 'iso' = ISO8601. The default depends on the `orient`. For 

2494 ``orient='table'``, the default is 'iso'. For all other orients, 

2495 the default is 'epoch'. 

2496 double_precision : int, default 10 

2497 The number of decimal places to use when encoding 

2498 floating point values. The possible maximal value is 15. 

2499 Passing double_precision greater than 15 will raise a ValueError. 

2500 force_ascii : bool, default True 

2501 Force encoded string to be ASCII. 

2502 date_unit : str, default 'ms' (milliseconds) 

2503 The time unit to encode to, governs timestamp and ISO8601 

2504 precision. One of 's', 'ms', 'us', 'ns' for second, millisecond, 

2505 microsecond, and nanosecond respectively. 

2506 default_handler : callable, default None 

2507 Handler to call if object cannot otherwise be converted to a 

2508 suitable format for JSON. Should receive a single argument which is 

2509 the object to convert and return a serialisable object. 

2510 lines : bool, default False 

2511 If 'orient' is 'records' write out line-delimited json format. Will 

2512 throw ValueError if incorrect 'orient' since others are not 

2513 list-like. 

2514 {compression_options} 

2515 

2516 .. versionchanged:: 1.4.0 Zstandard support. 

2517 

2518 index : bool or None, default None 

2519 The index is only used when 'orient' is 'split', 'index', 'column', 

2520 or 'table'. Of these, 'index' and 'column' do not support 

2521 `index=False`. 

2522 

2523 indent : int, optional 

2524 Length of whitespace used to indent each record. 

2525 

2526 {storage_options} 

2527 

2528 mode : str, default 'w' (writing) 

2529 Specify the IO mode for output when supplying a path_or_buf. 

2530 Accepted args are 'w' (writing) and 'a' (append) only. 

2531 mode='a' is only supported when lines is True and orient is 'records'. 

2532 

2533 Returns 

2534 ------- 

2535 None or str 

2536 If path_or_buf is None, returns the resulting json format as a 

2537 string. Otherwise returns None. 

2538 

2539 See Also 

2540 -------- 

2541 read_json : Convert a JSON string to pandas object. 

2542 

2543 Notes 

2544 ----- 

2545 The behavior of ``indent=0`` varies from the stdlib, which does not 

2546 indent the output but does insert newlines. Currently, ``indent=0`` 

2547 and the default ``indent=None`` are equivalent in pandas, though this 

2548 may change in a future release. 

2549 

2550 ``orient='table'`` contains a 'pandas_version' field under 'schema'. 

2551 This stores the version of `pandas` used in the latest revision of the 

2552 schema. 

2553 

2554 Examples 

2555 -------- 

2556 >>> from json import loads, dumps 

2557 >>> df = pd.DataFrame( 

2558 ... [["a", "b"], ["c", "d"]], 

2559 ... index=["row 1", "row 2"], 

2560 ... columns=["col 1", "col 2"], 

2561 ... ) 

2562 

2563 >>> result = df.to_json(orient="split") 

2564 >>> parsed = loads(result) 

2565 >>> dumps(parsed, indent=4) # doctest: +SKIP 

2566 {{ 

2567 "columns": [ 

2568 "col 1", 

2569 "col 2" 

2570 ], 

2571 "index": [ 

2572 "row 1", 

2573 "row 2" 

2574 ], 

2575 "data": [ 

2576 [ 

2577 "a", 

2578 "b" 

2579 ], 

2580 [ 

2581 "c", 

2582 "d" 

2583 ] 

2584 ] 

2585 }} 

2586 

2587 Encoding/decoding a Dataframe using ``'records'`` formatted JSON. 

2588 Note that index labels are not preserved with this encoding. 

2589 

2590 >>> result = df.to_json(orient="records") 

2591 >>> parsed = loads(result) 

2592 >>> dumps(parsed, indent=4) # doctest: +SKIP 

2593 [ 

2594 {{ 

2595 "col 1": "a", 

2596 "col 2": "b" 

2597 }}, 

2598 {{ 

2599 "col 1": "c", 

2600 "col 2": "d" 

2601 }} 

2602 ] 

2603 

2604 Encoding/decoding a Dataframe using ``'index'`` formatted JSON: 

2605 

2606 >>> result = df.to_json(orient="index") 

2607 >>> parsed = loads(result) 

2608 >>> dumps(parsed, indent=4) # doctest: +SKIP 

2609 {{ 

2610 "row 1": {{ 

2611 "col 1": "a", 

2612 "col 2": "b" 

2613 }}, 

2614 "row 2": {{ 

2615 "col 1": "c", 

2616 "col 2": "d" 

2617 }} 

2618 }} 

2619 

2620 Encoding/decoding a Dataframe using ``'columns'`` formatted JSON: 

2621 

2622 >>> result = df.to_json(orient="columns") 

2623 >>> parsed = loads(result) 

2624 >>> dumps(parsed, indent=4) # doctest: +SKIP 

2625 {{ 

2626 "col 1": {{ 

2627 "row 1": "a", 

2628 "row 2": "c" 

2629 }}, 

2630 "col 2": {{ 

2631 "row 1": "b", 

2632 "row 2": "d" 

2633 }} 

2634 }} 

2635 

2636 Encoding/decoding a Dataframe using ``'values'`` formatted JSON: 

2637 

2638 >>> result = df.to_json(orient="values") 

2639 >>> parsed = loads(result) 

2640 >>> dumps(parsed, indent=4) # doctest: +SKIP 

2641 [ 

2642 [ 

2643 "a", 

2644 "b" 

2645 ], 

2646 [ 

2647 "c", 

2648 "d" 

2649 ] 

2650 ] 

2651 

2652 Encoding with Table Schema: 

2653 

2654 >>> result = df.to_json(orient="table") 

2655 >>> parsed = loads(result) 

2656 >>> dumps(parsed, indent=4) # doctest: +SKIP 

2657 {{ 

2658 "schema": {{ 

2659 "fields": [ 

2660 {{ 

2661 "name": "index", 

2662 "type": "string" 

2663 }}, 

2664 {{ 

2665 "name": "col 1", 

2666 "type": "string" 

2667 }}, 

2668 {{ 

2669 "name": "col 2", 

2670 "type": "string" 

2671 }} 

2672 ], 

2673 "primaryKey": [ 

2674 "index" 

2675 ], 

2676 "pandas_version": "1.4.0" 

2677 }}, 

2678 "data": [ 

2679 {{ 

2680 "index": "row 1", 

2681 "col 1": "a", 

2682 "col 2": "b" 

2683 }}, 

2684 {{ 

2685 "index": "row 2", 

2686 "col 1": "c", 

2687 "col 2": "d" 

2688 }} 

2689 ] 

2690 }} 

2691 """ 

2692 from pandas.io import json 

2693 

2694 if date_format is None and orient == "table": 

2695 date_format = "iso" 

2696 elif date_format is None: 

2697 date_format = "epoch" 

2698 

2699 config.is_nonnegative_int(indent) 

2700 indent = indent or 0 

2701 

2702 return json.to_json( 

2703 path_or_buf=path_or_buf, 

2704 obj=self, 

2705 orient=orient, 

2706 date_format=date_format, 

2707 double_precision=double_precision, 

2708 force_ascii=force_ascii, 

2709 date_unit=date_unit, 

2710 default_handler=default_handler, 

2711 lines=lines, 

2712 compression=compression, 

2713 index=index, 

2714 indent=indent, 

2715 storage_options=storage_options, 

2716 mode=mode, 

2717 ) 

2718 

2719 @final 

2720 @deprecate_nonkeyword_arguments( 

2721 version="3.0", allowed_args=["self", "path_or_buf"], name="to_hdf" 

2722 ) 

2723 def to_hdf( 

2724 self, 

2725 path_or_buf: FilePath | HDFStore, 

2726 key: str, 

2727 mode: Literal["a", "w", "r+"] = "a", 

2728 complevel: int | None = None, 

2729 complib: Literal["zlib", "lzo", "bzip2", "blosc"] | None = None, 

2730 append: bool_t = False, 

2731 format: Literal["fixed", "table"] | None = None, 

2732 index: bool_t = True, 

2733 min_itemsize: int | dict[str, int] | None = None, 

2734 nan_rep=None, 

2735 dropna: bool_t | None = None, 

2736 data_columns: Literal[True] | list[str] | None = None, 

2737 errors: OpenFileErrors = "strict", 

2738 encoding: str = "UTF-8", 

2739 ) -> None: 

2740 """ 

2741 Write the contained data to an HDF5 file using HDFStore. 

2742 

2743 Hierarchical Data Format (HDF) is self-describing, allowing an 

2744 application to interpret the structure and contents of a file with 

2745 no outside information. One HDF file can hold a mix of related objects 

2746 which can be accessed as a group or as individual objects. 

2747 

2748 In order to add another DataFrame or Series to an existing HDF file 

2749 please use append mode and a different a key. 

2750 

2751 .. warning:: 

2752 

2753 One can store a subclass of ``DataFrame`` or ``Series`` to HDF5, 

2754 but the type of the subclass is lost upon storing. 

2755 

2756 For more information see the :ref:`user guide <io.hdf5>`. 

2757 

2758 Parameters 

2759 ---------- 

2760 path_or_buf : str or pandas.HDFStore 

2761 File path or HDFStore object. 

2762 key : str 

2763 Identifier for the group in the store. 

2764 mode : {'a', 'w', 'r+'}, default 'a' 

2765 Mode to open file: 

2766 

2767 - 'w': write, a new file is created (an existing file with 

2768 the same name would be deleted). 

2769 - 'a': append, an existing file is opened for reading and 

2770 writing, and if the file does not exist it is created. 

2771 - 'r+': similar to 'a', but the file must already exist. 

2772 complevel : {0-9}, default None 

2773 Specifies a compression level for data. 

2774 A value of 0 or None disables compression. 

2775 complib : {'zlib', 'lzo', 'bzip2', 'blosc'}, default 'zlib' 

2776 Specifies the compression library to be used. 

2777 These additional compressors for Blosc are supported 

2778 (default if no compressor specified: 'blosc:blosclz'): 

2779 {'blosc:blosclz', 'blosc:lz4', 'blosc:lz4hc', 'blosc:snappy', 

2780 'blosc:zlib', 'blosc:zstd'}. 

2781 Specifying a compression library which is not available issues 

2782 a ValueError. 

2783 append : bool, default False 

2784 For Table formats, append the input data to the existing. 

2785 format : {'fixed', 'table', None}, default 'fixed' 

2786 Possible values: 

2787 

2788 - 'fixed': Fixed format. Fast writing/reading. Not-appendable, 

2789 nor searchable. 

2790 - 'table': Table format. Write as a PyTables Table structure 

2791 which may perform worse but allow more flexible operations 

2792 like searching / selecting subsets of the data. 

2793 - If None, pd.get_option('io.hdf.default_format') is checked, 

2794 followed by fallback to "fixed". 

2795 index : bool, default True 

2796 Write DataFrame index as a column. 

2797 min_itemsize : dict or int, optional 

2798 Map column names to minimum string sizes for columns. 

2799 nan_rep : Any, optional 

2800 How to represent null values as str. 

2801 Not allowed with append=True. 

2802 dropna : bool, default False, optional 

2803 Remove missing values. 

2804 data_columns : list of columns or True, optional 

2805 List of columns to create as indexed data columns for on-disk 

2806 queries, or True to use all columns. By default only the axes 

2807 of the object are indexed. See 

2808 :ref:`Query via data columns<io.hdf5-query-data-columns>`. for 

2809 more information. 

2810 Applicable only to format='table'. 

2811 errors : str, default 'strict' 

2812 Specifies how encoding and decoding errors are to be handled. 

2813 See the errors argument for :func:`open` for a full list 

2814 of options. 

2815 encoding : str, default "UTF-8" 

2816 

2817 See Also 

2818 -------- 

2819 read_hdf : Read from HDF file. 

2820 DataFrame.to_orc : Write a DataFrame to the binary orc format. 

2821 DataFrame.to_parquet : Write a DataFrame to the binary parquet format. 

2822 DataFrame.to_sql : Write to a SQL table. 

2823 DataFrame.to_feather : Write out feather-format for DataFrames. 

2824 DataFrame.to_csv : Write out to a csv file. 

2825 

2826 Examples 

2827 -------- 

2828 >>> df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}, 

2829 ... index=['a', 'b', 'c']) # doctest: +SKIP 

2830 >>> df.to_hdf('data.h5', key='df', mode='w') # doctest: +SKIP 

2831 

2832 We can add another object to the same file: 

2833 

2834 >>> s = pd.Series([1, 2, 3, 4]) # doctest: +SKIP 

2835 >>> s.to_hdf('data.h5', key='s') # doctest: +SKIP 

2836 

2837 Reading from HDF file: 

2838 

2839 >>> pd.read_hdf('data.h5', 'df') # doctest: +SKIP 

2840 A B 

2841 a 1 4 

2842 b 2 5 

2843 c 3 6 

2844 >>> pd.read_hdf('data.h5', 's') # doctest: +SKIP 

2845 0 1 

2846 1 2 

2847 2 3 

2848 3 4 

2849 dtype: int64 

2850 """ 

2851 from pandas.io import pytables 

2852 

2853 # Argument 3 to "to_hdf" has incompatible type "NDFrame"; expected 

2854 # "Union[DataFrame, Series]" [arg-type] 

2855 pytables.to_hdf( 

2856 path_or_buf, 

2857 key, 

2858 self, # type: ignore[arg-type] 

2859 mode=mode, 

2860 complevel=complevel, 

2861 complib=complib, 

2862 append=append, 

2863 format=format, 

2864 index=index, 

2865 min_itemsize=min_itemsize, 

2866 nan_rep=nan_rep, 

2867 dropna=dropna, 

2868 data_columns=data_columns, 

2869 errors=errors, 

2870 encoding=encoding, 

2871 ) 

2872 

2873 @final 

2874 @deprecate_nonkeyword_arguments( 

2875 version="3.0", allowed_args=["self", "name", "con"], name="to_sql" 

2876 ) 

2877 def to_sql( 

2878 self, 

2879 name: str, 

2880 con, 

2881 schema: str | None = None, 

2882 if_exists: Literal["fail", "replace", "append"] = "fail", 

2883 index: bool_t = True, 

2884 index_label: IndexLabel | None = None, 

2885 chunksize: int | None = None, 

2886 dtype: DtypeArg | None = None, 

2887 method: Literal["multi"] | Callable | None = None, 

2888 ) -> int | None: 

2889 """ 

2890 Write records stored in a DataFrame to a SQL database. 

2891 

2892 Databases supported by SQLAlchemy [1]_ are supported. Tables can be 

2893 newly created, appended to, or overwritten. 

2894 

2895 Parameters 

2896 ---------- 

2897 name : str 

2898 Name of SQL table. 

2899 con : sqlalchemy.engine.(Engine or Connection) or sqlite3.Connection 

2900 Using SQLAlchemy makes it possible to use any DB supported by that 

2901 library. Legacy support is provided for sqlite3.Connection objects. The user 

2902 is responsible for engine disposal and connection closure for the SQLAlchemy 

2903 connectable. See `here \ 

2904 <https://docs.sqlalchemy.org/en/20/core/connections.html>`_. 

2905 If passing a sqlalchemy.engine.Connection which is already in a transaction, 

2906 the transaction will not be committed. If passing a sqlite3.Connection, 

2907 it will not be possible to roll back the record insertion. 

2908 

2909 schema : str, optional 

2910 Specify the schema (if database flavor supports this). If None, use 

2911 default schema. 

2912 if_exists : {'fail', 'replace', 'append'}, default 'fail' 

2913 How to behave if the table already exists. 

2914 

2915 * fail: Raise a ValueError. 

2916 * replace: Drop the table before inserting new values. 

2917 * append: Insert new values to the existing table. 

2918 

2919 index : bool, default True 

2920 Write DataFrame index as a column. Uses `index_label` as the column 

2921 name in the table. Creates a table index for this column. 

2922 index_label : str or sequence, default None 

2923 Column label for index column(s). If None is given (default) and 

2924 `index` is True, then the index names are used. 

2925 A sequence should be given if the DataFrame uses MultiIndex. 

2926 chunksize : int, optional 

2927 Specify the number of rows in each batch to be written at a time. 

2928 By default, all rows will be written at once. 

2929 dtype : dict or scalar, optional 

2930 Specifying the datatype for columns. If a dictionary is used, the 

2931 keys should be the column names and the values should be the 

2932 SQLAlchemy types or strings for the sqlite3 legacy mode. If a 

2933 scalar is provided, it will be applied to all columns. 

2934 method : {None, 'multi', callable}, optional 

2935 Controls the SQL insertion clause used: 

2936 

2937 * None : Uses standard SQL ``INSERT`` clause (one per row). 

2938 * 'multi': Pass multiple values in a single ``INSERT`` clause. 

2939 * callable with signature ``(pd_table, conn, keys, data_iter)``. 

2940 

2941 Details and a sample callable implementation can be found in the 

2942 section :ref:`insert method <io.sql.method>`. 

2943 

2944 Returns 

2945 ------- 

2946 None or int 

2947 Number of rows affected by to_sql. None is returned if the callable 

2948 passed into ``method`` does not return an integer number of rows. 

2949 

2950 The number of returned rows affected is the sum of the ``rowcount`` 

2951 attribute of ``sqlite3.Cursor`` or SQLAlchemy connectable which may not 

2952 reflect the exact number of written rows as stipulated in the 

2953 `sqlite3 <https://docs.python.org/3/library/sqlite3.html#sqlite3.Cursor.rowcount>`__ or 

2954 `SQLAlchemy <https://docs.sqlalchemy.org/en/20/core/connections.html#sqlalchemy.engine.CursorResult.rowcount>`__. 

2955 

2956 .. versionadded:: 1.4.0 

2957 

2958 Raises 

2959 ------ 

2960 ValueError 

2961 When the table already exists and `if_exists` is 'fail' (the 

2962 default). 

2963 

2964 See Also 

2965 -------- 

2966 read_sql : Read a DataFrame from a table. 

2967 

2968 Notes 

2969 ----- 

2970 Timezone aware datetime columns will be written as 

2971 ``Timestamp with timezone`` type with SQLAlchemy if supported by the 

2972 database. Otherwise, the datetimes will be stored as timezone unaware 

2973 timestamps local to the original timezone. 

2974 

2975 Not all datastores support ``method="multi"``. Oracle, for example, 

2976 does not support multi-value insert. 

2977 

2978 References 

2979 ---------- 

2980 .. [1] https://docs.sqlalchemy.org 

2981 .. [2] https://www.python.org/dev/peps/pep-0249/ 

2982 

2983 Examples 

2984 -------- 

2985 Create an in-memory SQLite database. 

2986 

2987 >>> from sqlalchemy import create_engine 

2988 >>> engine = create_engine('sqlite://', echo=False) 

2989 

2990 Create a table from scratch with 3 rows. 

2991 

2992 >>> df = pd.DataFrame({'name' : ['User 1', 'User 2', 'User 3']}) 

2993 >>> df 

2994 name 

2995 0 User 1 

2996 1 User 2 

2997 2 User 3 

2998 

2999 >>> df.to_sql(name='users', con=engine) 

3000 3 

3001 >>> from sqlalchemy import text 

3002 >>> with engine.connect() as conn: 

3003 ... conn.execute(text("SELECT * FROM users")).fetchall() 

3004 [(0, 'User 1'), (1, 'User 2'), (2, 'User 3')] 

3005 

3006 An `sqlalchemy.engine.Connection` can also be passed to `con`: 

3007 

3008 >>> with engine.begin() as connection: 

3009 ... df1 = pd.DataFrame({'name' : ['User 4', 'User 5']}) 

3010 ... df1.to_sql(name='users', con=connection, if_exists='append') 

3011 2 

3012 

3013 This is allowed to support operations that require that the same 

3014 DBAPI connection is used for the entire operation. 

3015 

3016 >>> df2 = pd.DataFrame({'name' : ['User 6', 'User 7']}) 

3017 >>> df2.to_sql(name='users', con=engine, if_exists='append') 

3018 2 

3019 >>> with engine.connect() as conn: 

3020 ... conn.execute(text("SELECT * FROM users")).fetchall() 

3021 [(0, 'User 1'), (1, 'User 2'), (2, 'User 3'), 

3022 (0, 'User 4'), (1, 'User 5'), (0, 'User 6'), 

3023 (1, 'User 7')] 

3024 

3025 Overwrite the table with just ``df2``. 

3026 

3027 >>> df2.to_sql(name='users', con=engine, if_exists='replace', 

3028 ... index_label='id') 

3029 2 

3030 >>> with engine.connect() as conn: 

3031 ... conn.execute(text("SELECT * FROM users")).fetchall() 

3032 [(0, 'User 6'), (1, 'User 7')] 

3033 

3034 Use ``method`` to define a callable insertion method to do nothing 

3035 if there's a primary key conflict on a table in a PostgreSQL database. 

3036 

3037 >>> from sqlalchemy.dialects.postgresql import insert 

3038 >>> def insert_on_conflict_nothing(table, conn, keys, data_iter): 

3039 ... # "a" is the primary key in "conflict_table" 

3040 ... data = [dict(zip(keys, row)) for row in data_iter] 

3041 ... stmt = insert(table.table).values(data).on_conflict_do_nothing(index_elements=["a"]) 

3042 ... result = conn.execute(stmt) 

3043 ... return result.rowcount 

3044 >>> df_conflict.to_sql(name="conflict_table", con=conn, if_exists="append", method=insert_on_conflict_nothing) # doctest: +SKIP 

3045 0 

3046 

3047 For MySQL, a callable to update columns ``b`` and ``c`` if there's a conflict 

3048 on a primary key. 

3049 

3050 >>> from sqlalchemy.dialects.mysql import insert 

3051 >>> def insert_on_conflict_update(table, conn, keys, data_iter): 

3052 ... # update columns "b" and "c" on primary key conflict 

3053 ... data = [dict(zip(keys, row)) for row in data_iter] 

3054 ... stmt = ( 

3055 ... insert(table.table) 

3056 ... .values(data) 

3057 ... ) 

3058 ... stmt = stmt.on_duplicate_key_update(b=stmt.inserted.b, c=stmt.inserted.c) 

3059 ... result = conn.execute(stmt) 

3060 ... return result.rowcount 

3061 >>> df_conflict.to_sql(name="conflict_table", con=conn, if_exists="append", method=insert_on_conflict_update) # doctest: +SKIP 

3062 2 

3063 

3064 Specify the dtype (especially useful for integers with missing values). 

3065 Notice that while pandas is forced to store the data as floating point, 

3066 the database supports nullable integers. When fetching the data with 

3067 Python, we get back integer scalars. 

3068 

3069 >>> df = pd.DataFrame({"A": [1, None, 2]}) 

3070 >>> df 

3071 A 

3072 0 1.0 

3073 1 NaN 

3074 2 2.0 

3075 

3076 >>> from sqlalchemy.types import Integer 

3077 >>> df.to_sql(name='integers', con=engine, index=False, 

3078 ... dtype={"A": Integer()}) 

3079 3 

3080 

3081 >>> with engine.connect() as conn: 

3082 ... conn.execute(text("SELECT * FROM integers")).fetchall() 

3083 [(1,), (None,), (2,)] 

3084 """ # noqa: E501 

3085 from pandas.io import sql 

3086 

3087 return sql.to_sql( 

3088 self, 

3089 name, 

3090 con, 

3091 schema=schema, 

3092 if_exists=if_exists, 

3093 index=index, 

3094 index_label=index_label, 

3095 chunksize=chunksize, 

3096 dtype=dtype, 

3097 method=method, 

3098 ) 

3099 

3100 @final 

3101 @deprecate_nonkeyword_arguments( 

3102 version="3.0", allowed_args=["self", "path"], name="to_pickle" 

3103 ) 

3104 @doc( 

3105 storage_options=_shared_docs["storage_options"], 

3106 compression_options=_shared_docs["compression_options"] % "path", 

3107 ) 

3108 def to_pickle( 

3109 self, 

3110 path: FilePath | WriteBuffer[bytes], 

3111 compression: CompressionOptions = "infer", 

3112 protocol: int = pickle.HIGHEST_PROTOCOL, 

3113 storage_options: StorageOptions | None = None, 

3114 ) -> None: 

3115 """ 

3116 Pickle (serialize) object to file. 

3117 

3118 Parameters 

3119 ---------- 

3120 path : str, path object, or file-like object 

3121 String, path object (implementing ``os.PathLike[str]``), or file-like 

3122 object implementing a binary ``write()`` function. File path where 

3123 the pickled object will be stored. 

3124 {compression_options} 

3125 protocol : int 

3126 Int which indicates which protocol should be used by the pickler, 

3127 default HIGHEST_PROTOCOL (see [1]_ paragraph 12.1.2). The possible 

3128 values are 0, 1, 2, 3, 4, 5. A negative value for the protocol 

3129 parameter is equivalent to setting its value to HIGHEST_PROTOCOL. 

3130 

3131 .. [1] https://docs.python.org/3/library/pickle.html. 

3132 

3133 {storage_options} 

3134 

3135 See Also 

3136 -------- 

3137 read_pickle : Load pickled pandas object (or any object) from file. 

3138 DataFrame.to_hdf : Write DataFrame to an HDF5 file. 

3139 DataFrame.to_sql : Write DataFrame to a SQL database. 

3140 DataFrame.to_parquet : Write a DataFrame to the binary parquet format. 

3141 

3142 Examples 

3143 -------- 

3144 >>> original_df = pd.DataFrame({{"foo": range(5), "bar": range(5, 10)}}) # doctest: +SKIP 

3145 >>> original_df # doctest: +SKIP 

3146 foo bar 

3147 0 0 5 

3148 1 1 6 

3149 2 2 7 

3150 3 3 8 

3151 4 4 9 

3152 >>> original_df.to_pickle("./dummy.pkl") # doctest: +SKIP 

3153 

3154 >>> unpickled_df = pd.read_pickle("./dummy.pkl") # doctest: +SKIP 

3155 >>> unpickled_df # doctest: +SKIP 

3156 foo bar 

3157 0 0 5 

3158 1 1 6 

3159 2 2 7 

3160 3 3 8 

3161 4 4 9 

3162 """ # noqa: E501 

3163 from pandas.io.pickle import to_pickle 

3164 

3165 to_pickle( 

3166 self, 

3167 path, 

3168 compression=compression, 

3169 protocol=protocol, 

3170 storage_options=storage_options, 

3171 ) 

3172 

3173 @final 

3174 @deprecate_nonkeyword_arguments( 

3175 version="3.0", allowed_args=["self"], name="to_clipboard" 

3176 ) 

3177 def to_clipboard( 

3178 self, excel: bool_t = True, sep: str | None = None, **kwargs 

3179 ) -> None: 

3180 r""" 

3181 Copy object to the system clipboard. 

3182 

3183 Write a text representation of object to the system clipboard. 

3184 This can be pasted into Excel, for example. 

3185 

3186 Parameters 

3187 ---------- 

3188 excel : bool, default True 

3189 Produce output in a csv format for easy pasting into excel. 

3190 

3191 - True, use the provided separator for csv pasting. 

3192 - False, write a string representation of the object to the clipboard. 

3193 

3194 sep : str, default ``'\t'`` 

3195 Field delimiter. 

3196 **kwargs 

3197 These parameters will be passed to DataFrame.to_csv. 

3198 

3199 See Also 

3200 -------- 

3201 DataFrame.to_csv : Write a DataFrame to a comma-separated values 

3202 (csv) file. 

3203 read_clipboard : Read text from clipboard and pass to read_csv. 

3204 

3205 Notes 

3206 ----- 

3207 Requirements for your platform. 

3208 

3209 - Linux : `xclip`, or `xsel` (with `PyQt4` modules) 

3210 - Windows : none 

3211 - macOS : none 

3212 

3213 This method uses the processes developed for the package `pyperclip`. A 

3214 solution to render any output string format is given in the examples. 

3215 

3216 Examples 

3217 -------- 

3218 Copy the contents of a DataFrame to the clipboard. 

3219 

3220 >>> df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=['A', 'B', 'C']) 

3221 

3222 >>> df.to_clipboard(sep=',') # doctest: +SKIP 

3223 ... # Wrote the following to the system clipboard: 

3224 ... # ,A,B,C 

3225 ... # 0,1,2,3 

3226 ... # 1,4,5,6 

3227 

3228 We can omit the index by passing the keyword `index` and setting 

3229 it to false. 

3230 

3231 >>> df.to_clipboard(sep=',', index=False) # doctest: +SKIP 

3232 ... # Wrote the following to the system clipboard: 

3233 ... # A,B,C 

3234 ... # 1,2,3 

3235 ... # 4,5,6 

3236 

3237 Using the original `pyperclip` package for any string output format. 

3238 

3239 .. code-block:: python 

3240 

3241 import pyperclip 

3242 html = df.style.to_html() 

3243 pyperclip.copy(html) 

3244 """ 

3245 from pandas.io import clipboards 

3246 

3247 clipboards.to_clipboard(self, excel=excel, sep=sep, **kwargs) 

3248 

3249 @final 

3250 def to_xarray(self): 

3251 """ 

3252 Return an xarray object from the pandas object. 

3253 

3254 Returns 

3255 ------- 

3256 xarray.DataArray or xarray.Dataset 

3257 Data in the pandas structure converted to Dataset if the object is 

3258 a DataFrame, or a DataArray if the object is a Series. 

3259 

3260 See Also 

3261 -------- 

3262 DataFrame.to_hdf : Write DataFrame to an HDF5 file. 

3263 DataFrame.to_parquet : Write a DataFrame to the binary parquet format. 

3264 

3265 Notes 

3266 ----- 

3267 See the `xarray docs <https://xarray.pydata.org/en/stable/>`__ 

3268 

3269 Examples 

3270 -------- 

3271 >>> df = pd.DataFrame([('falcon', 'bird', 389.0, 2), 

3272 ... ('parrot', 'bird', 24.0, 2), 

3273 ... ('lion', 'mammal', 80.5, 4), 

3274 ... ('monkey', 'mammal', np.nan, 4)], 

3275 ... columns=['name', 'class', 'max_speed', 

3276 ... 'num_legs']) 

3277 >>> df 

3278 name class max_speed num_legs 

3279 0 falcon bird 389.0 2 

3280 1 parrot bird 24.0 2 

3281 2 lion mammal 80.5 4 

3282 3 monkey mammal NaN 4 

3283 

3284 >>> df.to_xarray() # doctest: +SKIP 

3285 <xarray.Dataset> 

3286 Dimensions: (index: 4) 

3287 Coordinates: 

3288 * index (index) int64 32B 0 1 2 3 

3289 Data variables: 

3290 name (index) object 32B 'falcon' 'parrot' 'lion' 'monkey' 

3291 class (index) object 32B 'bird' 'bird' 'mammal' 'mammal' 

3292 max_speed (index) float64 32B 389.0 24.0 80.5 nan 

3293 num_legs (index) int64 32B 2 2 4 4 

3294 

3295 >>> df['max_speed'].to_xarray() # doctest: +SKIP 

3296 <xarray.DataArray 'max_speed' (index: 4)> 

3297 array([389. , 24. , 80.5, nan]) 

3298 Coordinates: 

3299 * index (index) int64 0 1 2 3 

3300 

3301 >>> dates = pd.to_datetime(['2018-01-01', '2018-01-01', 

3302 ... '2018-01-02', '2018-01-02']) 

3303 >>> df_multiindex = pd.DataFrame({'date': dates, 

3304 ... 'animal': ['falcon', 'parrot', 

3305 ... 'falcon', 'parrot'], 

3306 ... 'speed': [350, 18, 361, 15]}) 

3307 >>> df_multiindex = df_multiindex.set_index(['date', 'animal']) 

3308 

3309 >>> df_multiindex 

3310 speed 

3311 date animal 

3312 2018-01-01 falcon 350 

3313 parrot 18 

3314 2018-01-02 falcon 361 

3315 parrot 15 

3316 

3317 >>> df_multiindex.to_xarray() # doctest: +SKIP 

3318 <xarray.Dataset> 

3319 Dimensions: (date: 2, animal: 2) 

3320 Coordinates: 

3321 * date (date) datetime64[ns] 2018-01-01 2018-01-02 

3322 * animal (animal) object 'falcon' 'parrot' 

3323 Data variables: 

3324 speed (date, animal) int64 350 18 361 15 

3325 """ 

3326 xarray = import_optional_dependency("xarray") 

3327 

3328 if self.ndim == 1: 

3329 return xarray.DataArray.from_series(self) 

3330 else: 

3331 return xarray.Dataset.from_dataframe(self) 

3332 

3333 @overload 

3334 def to_latex( 

3335 self, 

3336 buf: None = ..., 

3337 columns: Sequence[Hashable] | None = ..., 

3338 header: bool_t | SequenceNotStr[str] = ..., 

3339 index: bool_t = ..., 

3340 na_rep: str = ..., 

3341 formatters: FormattersType | None = ..., 

3342 float_format: FloatFormatType | None = ..., 

3343 sparsify: bool_t | None = ..., 

3344 index_names: bool_t = ..., 

3345 bold_rows: bool_t = ..., 

3346 column_format: str | None = ..., 

3347 longtable: bool_t | None = ..., 

3348 escape: bool_t | None = ..., 

3349 encoding: str | None = ..., 

3350 decimal: str = ..., 

3351 multicolumn: bool_t | None = ..., 

3352 multicolumn_format: str | None = ..., 

3353 multirow: bool_t | None = ..., 

3354 caption: str | tuple[str, str] | None = ..., 

3355 label: str | None = ..., 

3356 position: str | None = ..., 

3357 ) -> str: 

3358 ... 

3359 

3360 @overload 

3361 def to_latex( 

3362 self, 

3363 buf: FilePath | WriteBuffer[str], 

3364 columns: Sequence[Hashable] | None = ..., 

3365 header: bool_t | SequenceNotStr[str] = ..., 

3366 index: bool_t = ..., 

3367 na_rep: str = ..., 

3368 formatters: FormattersType | None = ..., 

3369 float_format: FloatFormatType | None = ..., 

3370 sparsify: bool_t | None = ..., 

3371 index_names: bool_t = ..., 

3372 bold_rows: bool_t = ..., 

3373 column_format: str | None = ..., 

3374 longtable: bool_t | None = ..., 

3375 escape: bool_t | None = ..., 

3376 encoding: str | None = ..., 

3377 decimal: str = ..., 

3378 multicolumn: bool_t | None = ..., 

3379 multicolumn_format: str | None = ..., 

3380 multirow: bool_t | None = ..., 

3381 caption: str | tuple[str, str] | None = ..., 

3382 label: str | None = ..., 

3383 position: str | None = ..., 

3384 ) -> None: 

3385 ... 

3386 

3387 @final 

3388 @deprecate_nonkeyword_arguments( 

3389 version="3.0", allowed_args=["self", "buf"], name="to_latex" 

3390 ) 

3391 def to_latex( 

3392 self, 

3393 buf: FilePath | WriteBuffer[str] | None = None, 

3394 columns: Sequence[Hashable] | None = None, 

3395 header: bool_t | SequenceNotStr[str] = True, 

3396 index: bool_t = True, 

3397 na_rep: str = "NaN", 

3398 formatters: FormattersType | None = None, 

3399 float_format: FloatFormatType | None = None, 

3400 sparsify: bool_t | None = None, 

3401 index_names: bool_t = True, 

3402 bold_rows: bool_t = False, 

3403 column_format: str | None = None, 

3404 longtable: bool_t | None = None, 

3405 escape: bool_t | None = None, 

3406 encoding: str | None = None, 

3407 decimal: str = ".", 

3408 multicolumn: bool_t | None = None, 

3409 multicolumn_format: str | None = None, 

3410 multirow: bool_t | None = None, 

3411 caption: str | tuple[str, str] | None = None, 

3412 label: str | None = None, 

3413 position: str | None = None, 

3414 ) -> str | None: 

3415 r""" 

3416 Render object to a LaTeX tabular, longtable, or nested table. 

3417 

3418 Requires ``\usepackage{{booktabs}}``. The output can be copy/pasted 

3419 into a main LaTeX document or read from an external file 

3420 with ``\input{{table.tex}}``. 

3421 

3422 .. versionchanged:: 2.0.0 

3423 Refactored to use the Styler implementation via jinja2 templating. 

3424 

3425 Parameters 

3426 ---------- 

3427 buf : str, Path or StringIO-like, optional, default None 

3428 Buffer to write to. If None, the output is returned as a string. 

3429 columns : list of label, optional 

3430 The subset of columns to write. Writes all columns by default. 

3431 header : bool or list of str, default True 

3432 Write out the column names. If a list of strings is given, 

3433 it is assumed to be aliases for the column names. 

3434 index : bool, default True 

3435 Write row names (index). 

3436 na_rep : str, default 'NaN' 

3437 Missing data representation. 

3438 formatters : list of functions or dict of {{str: function}}, optional 

3439 Formatter functions to apply to columns' elements by position or 

3440 name. The result of each function must be a unicode string. 

3441 List must be of length equal to the number of columns. 

3442 float_format : one-parameter function or str, optional, default None 

3443 Formatter for floating point numbers. For example 

3444 ``float_format="%.2f"`` and ``float_format="{{:0.2f}}".format`` will 

3445 both result in 0.1234 being formatted as 0.12. 

3446 sparsify : bool, optional 

3447 Set to False for a DataFrame with a hierarchical index to print 

3448 every multiindex key at each row. By default, the value will be 

3449 read from the config module. 

3450 index_names : bool, default True 

3451 Prints the names of the indexes. 

3452 bold_rows : bool, default False 

3453 Make the row labels bold in the output. 

3454 column_format : str, optional 

3455 The columns format as specified in `LaTeX table format 

3456 <https://en.wikibooks.org/wiki/LaTeX/Tables>`__ e.g. 'rcl' for 3 

3457 columns. By default, 'l' will be used for all columns except 

3458 columns of numbers, which default to 'r'. 

3459 longtable : bool, optional 

3460 Use a longtable environment instead of tabular. Requires 

3461 adding a \usepackage{{longtable}} to your LaTeX preamble. 

3462 By default, the value will be read from the pandas config 

3463 module, and set to `True` if the option ``styler.latex.environment`` is 

3464 `"longtable"`. 

3465 

3466 .. versionchanged:: 2.0.0 

3467 The pandas option affecting this argument has changed. 

3468 escape : bool, optional 

3469 By default, the value will be read from the pandas config 

3470 module and set to `True` if the option ``styler.format.escape`` is 

3471 `"latex"`. When set to False prevents from escaping latex special 

3472 characters in column names. 

3473 

3474 .. versionchanged:: 2.0.0 

3475 The pandas option affecting this argument has changed, as has the 

3476 default value to `False`. 

3477 encoding : str, optional 

3478 A string representing the encoding to use in the output file, 

3479 defaults to 'utf-8'. 

3480 decimal : str, default '.' 

3481 Character recognized as decimal separator, e.g. ',' in Europe. 

3482 multicolumn : bool, default True 

3483 Use \multicolumn to enhance MultiIndex columns. 

3484 The default will be read from the config module, and is set 

3485 as the option ``styler.sparse.columns``. 

3486 

3487 .. versionchanged:: 2.0.0 

3488 The pandas option affecting this argument has changed. 

3489 multicolumn_format : str, default 'r' 

3490 The alignment for multicolumns, similar to `column_format` 

3491 The default will be read from the config module, and is set as the option 

3492 ``styler.latex.multicol_align``. 

3493 

3494 .. versionchanged:: 2.0.0 

3495 The pandas option affecting this argument has changed, as has the 

3496 default value to "r". 

3497 multirow : bool, default True 

3498 Use \multirow to enhance MultiIndex rows. Requires adding a 

3499 \usepackage{{multirow}} to your LaTeX preamble. Will print 

3500 centered labels (instead of top-aligned) across the contained 

3501 rows, separating groups via clines. The default will be read 

3502 from the pandas config module, and is set as the option 

3503 ``styler.sparse.index``. 

3504 

3505 .. versionchanged:: 2.0.0 

3506 The pandas option affecting this argument has changed, as has the 

3507 default value to `True`. 

3508 caption : str or tuple, optional 

3509 Tuple (full_caption, short_caption), 

3510 which results in ``\caption[short_caption]{{full_caption}}``; 

3511 if a single string is passed, no short caption will be set. 

3512 label : str, optional 

3513 The LaTeX label to be placed inside ``\label{{}}`` in the output. 

3514 This is used with ``\ref{{}}`` in the main ``.tex`` file. 

3515 

3516 position : str, optional 

3517 The LaTeX positional argument for tables, to be placed after 

3518 ``\begin{{}}`` in the output. 

3519 

3520 Returns 

3521 ------- 

3522 str or None 

3523 If buf is None, returns the result as a string. Otherwise returns None. 

3524 

3525 See Also 

3526 -------- 

3527 io.formats.style.Styler.to_latex : Render a DataFrame to LaTeX 

3528 with conditional formatting. 

3529 DataFrame.to_string : Render a DataFrame to a console-friendly 

3530 tabular output. 

3531 DataFrame.to_html : Render a DataFrame as an HTML table. 

3532 

3533 Notes 

3534 ----- 

3535 As of v2.0.0 this method has changed to use the Styler implementation as 

3536 part of :meth:`.Styler.to_latex` via ``jinja2`` templating. This means 

3537 that ``jinja2`` is a requirement, and needs to be installed, for this method 

3538 to function. It is advised that users switch to using Styler, since that 

3539 implementation is more frequently updated and contains much more 

3540 flexibility with the output. 

3541 

3542 Examples 

3543 -------- 

3544 Convert a general DataFrame to LaTeX with formatting: 

3545 

3546 >>> df = pd.DataFrame(dict(name=['Raphael', 'Donatello'], 

3547 ... age=[26, 45], 

3548 ... height=[181.23, 177.65])) 

3549 >>> print(df.to_latex(index=False, 

3550 ... formatters={"name": str.upper}, 

3551 ... float_format="{:.1f}".format, 

3552 ... )) # doctest: +SKIP 

3553 \begin{tabular}{lrr} 

3554 \toprule 

3555 name & age & height \\ 

3556 \midrule 

3557 RAPHAEL & 26 & 181.2 \\ 

3558 DONATELLO & 45 & 177.7 \\ 

3559 \bottomrule 

3560 \end{tabular} 

3561 """ 

3562 # Get defaults from the pandas config 

3563 if self.ndim == 1: 

3564 self = self.to_frame() 

3565 if longtable is None: 

3566 longtable = config.get_option("styler.latex.environment") == "longtable" 

3567 if escape is None: 

3568 escape = config.get_option("styler.format.escape") == "latex" 

3569 if multicolumn is None: 

3570 multicolumn = config.get_option("styler.sparse.columns") 

3571 if multicolumn_format is None: 

3572 multicolumn_format = config.get_option("styler.latex.multicol_align") 

3573 if multirow is None: 

3574 multirow = config.get_option("styler.sparse.index") 

3575 

3576 if column_format is not None and not isinstance(column_format, str): 

3577 raise ValueError("`column_format` must be str or unicode") 

3578 length = len(self.columns) if columns is None else len(columns) 

3579 if isinstance(header, (list, tuple)) and len(header) != length: 

3580 raise ValueError(f"Writing {length} cols but got {len(header)} aliases") 

3581 

3582 # Refactor formatters/float_format/decimal/na_rep/escape to Styler structure 

3583 base_format_ = { 

3584 "na_rep": na_rep, 

3585 "escape": "latex" if escape else None, 

3586 "decimal": decimal, 

3587 } 

3588 index_format_: dict[str, Any] = {"axis": 0, **base_format_} 

3589 column_format_: dict[str, Any] = {"axis": 1, **base_format_} 

3590 

3591 if isinstance(float_format, str): 

3592 float_format_: Callable | None = lambda x: float_format % x 

3593 else: 

3594 float_format_ = float_format 

3595 

3596 def _wrap(x, alt_format_): 

3597 if isinstance(x, (float, complex)) and float_format_ is not None: 

3598 return float_format_(x) 

3599 else: 

3600 return alt_format_(x) 

3601 

3602 formatters_: list | tuple | dict | Callable | None = None 

3603 if isinstance(formatters, list): 

3604 formatters_ = { 

3605 c: partial(_wrap, alt_format_=formatters[i]) 

3606 for i, c in enumerate(self.columns) 

3607 } 

3608 elif isinstance(formatters, dict): 

3609 index_formatter = formatters.pop("__index__", None) 

3610 column_formatter = formatters.pop("__columns__", None) 

3611 if index_formatter is not None: 

3612 index_format_.update({"formatter": index_formatter}) 

3613 if column_formatter is not None: 

3614 column_format_.update({"formatter": column_formatter}) 

3615 

3616 formatters_ = formatters 

3617 float_columns = self.select_dtypes(include="float").columns 

3618 for col in float_columns: 

3619 if col not in formatters.keys(): 

3620 formatters_.update({col: float_format_}) 

3621 elif formatters is None and float_format is not None: 

3622 formatters_ = partial(_wrap, alt_format_=lambda v: v) 

3623 format_index_ = [index_format_, column_format_] 

3624 

3625 # Deal with hiding indexes and relabelling column names 

3626 hide_: list[dict] = [] 

3627 relabel_index_: list[dict] = [] 

3628 if columns: 

3629 hide_.append( 

3630 { 

3631 "subset": [c for c in self.columns if c not in columns], 

3632 "axis": "columns", 

3633 } 

3634 ) 

3635 if header is False: 

3636 hide_.append({"axis": "columns"}) 

3637 elif isinstance(header, (list, tuple)): 

3638 relabel_index_.append({"labels": header, "axis": "columns"}) 

3639 format_index_ = [index_format_] # column_format is overwritten 

3640 

3641 if index is False: 

3642 hide_.append({"axis": "index"}) 

3643 if index_names is False: 

3644 hide_.append({"names": True, "axis": "index"}) 

3645 

3646 render_kwargs_ = { 

3647 "hrules": True, 

3648 "sparse_index": sparsify, 

3649 "sparse_columns": sparsify, 

3650 "environment": "longtable" if longtable else None, 

3651 "multicol_align": multicolumn_format 

3652 if multicolumn 

3653 else f"naive-{multicolumn_format}", 

3654 "multirow_align": "t" if multirow else "naive", 

3655 "encoding": encoding, 

3656 "caption": caption, 

3657 "label": label, 

3658 "position": position, 

3659 "column_format": column_format, 

3660 "clines": "skip-last;data" 

3661 if (multirow and isinstance(self.index, MultiIndex)) 

3662 else None, 

3663 "bold_rows": bold_rows, 

3664 } 

3665 

3666 return self._to_latex_via_styler( 

3667 buf, 

3668 hide=hide_, 

3669 relabel_index=relabel_index_, 

3670 format={"formatter": formatters_, **base_format_}, 

3671 format_index=format_index_, 

3672 render_kwargs=render_kwargs_, 

3673 ) 

3674 

3675 @final 

3676 def _to_latex_via_styler( 

3677 self, 

3678 buf=None, 

3679 *, 

3680 hide: dict | list[dict] | None = None, 

3681 relabel_index: dict | list[dict] | None = None, 

3682 format: dict | list[dict] | None = None, 

3683 format_index: dict | list[dict] | None = None, 

3684 render_kwargs: dict | None = None, 

3685 ): 

3686 """ 

3687 Render object to a LaTeX tabular, longtable, or nested table. 

3688 

3689 Uses the ``Styler`` implementation with the following, ordered, method chaining: 

3690 

3691 .. code-block:: python 

3692 styler = Styler(DataFrame) 

3693 styler.hide(**hide) 

3694 styler.relabel_index(**relabel_index) 

3695 styler.format(**format) 

3696 styler.format_index(**format_index) 

3697 styler.to_latex(buf=buf, **render_kwargs) 

3698 

3699 Parameters 

3700 ---------- 

3701 buf : str, Path or StringIO-like, optional, default None 

3702 Buffer to write to. If None, the output is returned as a string. 

3703 hide : dict, list of dict 

3704 Keyword args to pass to the method call of ``Styler.hide``. If a list will 

3705 call the method numerous times. 

3706 relabel_index : dict, list of dict 

3707 Keyword args to pass to the method of ``Styler.relabel_index``. If a list 

3708 will call the method numerous times. 

3709 format : dict, list of dict 

3710 Keyword args to pass to the method call of ``Styler.format``. If a list will 

3711 call the method numerous times. 

3712 format_index : dict, list of dict 

3713 Keyword args to pass to the method call of ``Styler.format_index``. If a 

3714 list will call the method numerous times. 

3715 render_kwargs : dict 

3716 Keyword args to pass to the method call of ``Styler.to_latex``. 

3717 

3718 Returns 

3719 ------- 

3720 str or None 

3721 If buf is None, returns the result as a string. Otherwise returns None. 

3722 """ 

3723 from pandas.io.formats.style import Styler 

3724 

3725 self = cast("DataFrame", self) 

3726 styler = Styler(self, uuid="") 

3727 

3728 for kw_name in ["hide", "relabel_index", "format", "format_index"]: 

3729 kw = vars()[kw_name] 

3730 if isinstance(kw, dict): 

3731 getattr(styler, kw_name)(**kw) 

3732 elif isinstance(kw, list): 

3733 for sub_kw in kw: 

3734 getattr(styler, kw_name)(**sub_kw) 

3735 

3736 # bold_rows is not a direct kwarg of Styler.to_latex 

3737 render_kwargs = {} if render_kwargs is None else render_kwargs 

3738 if render_kwargs.pop("bold_rows"): 

3739 styler.map_index(lambda v: "textbf:--rwrap;") 

3740 

3741 return styler.to_latex(buf=buf, **render_kwargs) 

3742 

3743 @overload 

3744 def to_csv( 

3745 self, 

3746 path_or_buf: None = ..., 

3747 sep: str = ..., 

3748 na_rep: str = ..., 

3749 float_format: str | Callable | None = ..., 

3750 columns: Sequence[Hashable] | None = ..., 

3751 header: bool_t | list[str] = ..., 

3752 index: bool_t = ..., 

3753 index_label: IndexLabel | None = ..., 

3754 mode: str = ..., 

3755 encoding: str | None = ..., 

3756 compression: CompressionOptions = ..., 

3757 quoting: int | None = ..., 

3758 quotechar: str = ..., 

3759 lineterminator: str | None = ..., 

3760 chunksize: int | None = ..., 

3761 date_format: str | None = ..., 

3762 doublequote: bool_t = ..., 

3763 escapechar: str | None = ..., 

3764 decimal: str = ..., 

3765 errors: OpenFileErrors = ..., 

3766 storage_options: StorageOptions = ..., 

3767 ) -> str: 

3768 ... 

3769 

3770 @overload 

3771 def to_csv( 

3772 self, 

3773 path_or_buf: FilePath | WriteBuffer[bytes] | WriteBuffer[str], 

3774 sep: str = ..., 

3775 na_rep: str = ..., 

3776 float_format: str | Callable | None = ..., 

3777 columns: Sequence[Hashable] | None = ..., 

3778 header: bool_t | list[str] = ..., 

3779 index: bool_t = ..., 

3780 index_label: IndexLabel | None = ..., 

3781 mode: str = ..., 

3782 encoding: str | None = ..., 

3783 compression: CompressionOptions = ..., 

3784 quoting: int | None = ..., 

3785 quotechar: str = ..., 

3786 lineterminator: str | None = ..., 

3787 chunksize: int | None = ..., 

3788 date_format: str | None = ..., 

3789 doublequote: bool_t = ..., 

3790 escapechar: str | None = ..., 

3791 decimal: str = ..., 

3792 errors: OpenFileErrors = ..., 

3793 storage_options: StorageOptions = ..., 

3794 ) -> None: 

3795 ... 

3796 

3797 @final 

3798 @deprecate_nonkeyword_arguments( 

3799 version="3.0", allowed_args=["self", "path_or_buf"], name="to_csv" 

3800 ) 

3801 @doc( 

3802 storage_options=_shared_docs["storage_options"], 

3803 compression_options=_shared_docs["compression_options"] % "path_or_buf", 

3804 ) 

3805 def to_csv( 

3806 self, 

3807 path_or_buf: FilePath | WriteBuffer[bytes] | WriteBuffer[str] | None = None, 

3808 sep: str = ",", 

3809 na_rep: str = "", 

3810 float_format: str | Callable | None = None, 

3811 columns: Sequence[Hashable] | None = None, 

3812 header: bool_t | list[str] = True, 

3813 index: bool_t = True, 

3814 index_label: IndexLabel | None = None, 

3815 mode: str = "w", 

3816 encoding: str | None = None, 

3817 compression: CompressionOptions = "infer", 

3818 quoting: int | None = None, 

3819 quotechar: str = '"', 

3820 lineterminator: str | None = None, 

3821 chunksize: int | None = None, 

3822 date_format: str | None = None, 

3823 doublequote: bool_t = True, 

3824 escapechar: str | None = None, 

3825 decimal: str = ".", 

3826 errors: OpenFileErrors = "strict", 

3827 storage_options: StorageOptions | None = None, 

3828 ) -> str | None: 

3829 r""" 

3830 Write object to a comma-separated values (csv) file. 

3831 

3832 Parameters 

3833 ---------- 

3834 path_or_buf : str, path object, file-like object, or None, default None 

3835 String, path object (implementing os.PathLike[str]), or file-like 

3836 object implementing a write() function. If None, the result is 

3837 returned as a string. If a non-binary file object is passed, it should 

3838 be opened with `newline=''`, disabling universal newlines. If a binary 

3839 file object is passed, `mode` might need to contain a `'b'`. 

3840 sep : str, default ',' 

3841 String of length 1. Field delimiter for the output file. 

3842 na_rep : str, default '' 

3843 Missing data representation. 

3844 float_format : str, Callable, default None 

3845 Format string for floating point numbers. If a Callable is given, it takes 

3846 precedence over other numeric formatting parameters, like decimal. 

3847 columns : sequence, optional 

3848 Columns to write. 

3849 header : bool or list of str, default True 

3850 Write out the column names. If a list of strings is given it is 

3851 assumed to be aliases for the column names. 

3852 index : bool, default True 

3853 Write row names (index). 

3854 index_label : str or sequence, or False, default None 

3855 Column label for index column(s) if desired. If None is given, and 

3856 `header` and `index` are True, then the index names are used. A 

3857 sequence should be given if the object uses MultiIndex. If 

3858 False do not print fields for index names. Use index_label=False 

3859 for easier importing in R. 

3860 mode : {{'w', 'x', 'a'}}, default 'w' 

3861 Forwarded to either `open(mode=)` or `fsspec.open(mode=)` to control 

3862 the file opening. Typical values include: 

3863 

3864 - 'w', truncate the file first. 

3865 - 'x', exclusive creation, failing if the file already exists. 

3866 - 'a', append to the end of file if it exists. 

3867 

3868 encoding : str, optional 

3869 A string representing the encoding to use in the output file, 

3870 defaults to 'utf-8'. `encoding` is not supported if `path_or_buf` 

3871 is a non-binary file object. 

3872 {compression_options} 

3873 

3874 May be a dict with key 'method' as compression mode 

3875 and other entries as additional compression options if 

3876 compression mode is 'zip'. 

3877 

3878 Passing compression options as keys in dict is 

3879 supported for compression modes 'gzip', 'bz2', 'zstd', and 'zip'. 

3880 quoting : optional constant from csv module 

3881 Defaults to csv.QUOTE_MINIMAL. If you have set a `float_format` 

3882 then floats are converted to strings and thus csv.QUOTE_NONNUMERIC 

3883 will treat them as non-numeric. 

3884 quotechar : str, default '\"' 

3885 String of length 1. Character used to quote fields. 

3886 lineterminator : str, optional 

3887 The newline character or character sequence to use in the output 

3888 file. Defaults to `os.linesep`, which depends on the OS in which 

3889 this method is called ('\\n' for linux, '\\r\\n' for Windows, i.e.). 

3890 

3891 .. versionchanged:: 1.5.0 

3892 

3893 Previously was line_terminator, changed for consistency with 

3894 read_csv and the standard library 'csv' module. 

3895 

3896 chunksize : int or None 

3897 Rows to write at a time. 

3898 date_format : str, default None 

3899 Format string for datetime objects. 

3900 doublequote : bool, default True 

3901 Control quoting of `quotechar` inside a field. 

3902 escapechar : str, default None 

3903 String of length 1. Character used to escape `sep` and `quotechar` 

3904 when appropriate. 

3905 decimal : str, default '.' 

3906 Character recognized as decimal separator. E.g. use ',' for 

3907 European data. 

3908 errors : str, default 'strict' 

3909 Specifies how encoding and decoding errors are to be handled. 

3910 See the errors argument for :func:`open` for a full list 

3911 of options. 

3912 

3913 {storage_options} 

3914 

3915 Returns 

3916 ------- 

3917 None or str 

3918 If path_or_buf is None, returns the resulting csv format as a 

3919 string. Otherwise returns None. 

3920 

3921 See Also 

3922 -------- 

3923 read_csv : Load a CSV file into a DataFrame. 

3924 to_excel : Write DataFrame to an Excel file. 

3925 

3926 Examples 

3927 -------- 

3928 Create 'out.csv' containing 'df' without indices 

3929 

3930 >>> df = pd.DataFrame({{'name': ['Raphael', 'Donatello'], 

3931 ... 'mask': ['red', 'purple'], 

3932 ... 'weapon': ['sai', 'bo staff']}}) 

3933 >>> df.to_csv('out.csv', index=False) # doctest: +SKIP 

3934 

3935 Create 'out.zip' containing 'out.csv' 

3936 

3937 >>> df.to_csv(index=False) 

3938 'name,mask,weapon\nRaphael,red,sai\nDonatello,purple,bo staff\n' 

3939 >>> compression_opts = dict(method='zip', 

3940 ... archive_name='out.csv') # doctest: +SKIP 

3941 >>> df.to_csv('out.zip', index=False, 

3942 ... compression=compression_opts) # doctest: +SKIP 

3943 

3944 To write a csv file to a new folder or nested folder you will first 

3945 need to create it using either Pathlib or os: 

3946 

3947 >>> from pathlib import Path # doctest: +SKIP 

3948 >>> filepath = Path('folder/subfolder/out.csv') # doctest: +SKIP 

3949 >>> filepath.parent.mkdir(parents=True, exist_ok=True) # doctest: +SKIP 

3950 >>> df.to_csv(filepath) # doctest: +SKIP 

3951 

3952 >>> import os # doctest: +SKIP 

3953 >>> os.makedirs('folder/subfolder', exist_ok=True) # doctest: +SKIP 

3954 >>> df.to_csv('folder/subfolder/out.csv') # doctest: +SKIP 

3955 """ 

3956 df = self if isinstance(self, ABCDataFrame) else self.to_frame() 

3957 

3958 formatter = DataFrameFormatter( 

3959 frame=df, 

3960 header=header, 

3961 index=index, 

3962 na_rep=na_rep, 

3963 float_format=float_format, 

3964 decimal=decimal, 

3965 ) 

3966 

3967 return DataFrameRenderer(formatter).to_csv( 

3968 path_or_buf, 

3969 lineterminator=lineterminator, 

3970 sep=sep, 

3971 encoding=encoding, 

3972 errors=errors, 

3973 compression=compression, 

3974 quoting=quoting, 

3975 columns=columns, 

3976 index_label=index_label, 

3977 mode=mode, 

3978 chunksize=chunksize, 

3979 quotechar=quotechar, 

3980 date_format=date_format, 

3981 doublequote=doublequote, 

3982 escapechar=escapechar, 

3983 storage_options=storage_options, 

3984 ) 

3985 

3986 # ---------------------------------------------------------------------- 

3987 # Lookup Caching 

3988 

3989 def _reset_cacher(self) -> None: 

3990 """ 

3991 Reset the cacher. 

3992 """ 

3993 raise AbstractMethodError(self) 

3994 

3995 def _maybe_update_cacher( 

3996 self, 

3997 clear: bool_t = False, 

3998 verify_is_copy: bool_t = True, 

3999 inplace: bool_t = False, 

4000 ) -> None: 

4001 """ 

4002 See if we need to update our parent cacher if clear, then clear our 

4003 cache. 

4004 

4005 Parameters 

4006 ---------- 

4007 clear : bool, default False 

4008 Clear the item cache. 

4009 verify_is_copy : bool, default True 

4010 Provide is_copy checks. 

4011 """ 

4012 if using_copy_on_write(): 

4013 return 

4014 

4015 if verify_is_copy: 

4016 self._check_setitem_copy(t="referent") 

4017 

4018 if clear: 

4019 self._clear_item_cache() 

4020 

4021 def _clear_item_cache(self) -> None: 

4022 raise AbstractMethodError(self) 

4023 

4024 # ---------------------------------------------------------------------- 

4025 # Indexing Methods 

4026 

4027 @final 

4028 def take(self, indices, axis: Axis = 0, **kwargs) -> Self: 

4029 """ 

4030 Return the elements in the given *positional* indices along an axis. 

4031 

4032 This means that we are not indexing according to actual values in 

4033 the index attribute of the object. We are indexing according to the 

4034 actual position of the element in the object. 

4035 

4036 Parameters 

4037 ---------- 

4038 indices : array-like 

4039 An array of ints indicating which positions to take. 

4040 axis : {0 or 'index', 1 or 'columns', None}, default 0 

4041 The axis on which to select elements. ``0`` means that we are 

4042 selecting rows, ``1`` means that we are selecting columns. 

4043 For `Series` this parameter is unused and defaults to 0. 

4044 **kwargs 

4045 For compatibility with :meth:`numpy.take`. Has no effect on the 

4046 output. 

4047 

4048 Returns 

4049 ------- 

4050 same type as caller 

4051 An array-like containing the elements taken from the object. 

4052 

4053 See Also 

4054 -------- 

4055 DataFrame.loc : Select a subset of a DataFrame by labels. 

4056 DataFrame.iloc : Select a subset of a DataFrame by positions. 

4057 numpy.take : Take elements from an array along an axis. 

4058 

4059 Examples 

4060 -------- 

4061 >>> df = pd.DataFrame([('falcon', 'bird', 389.0), 

4062 ... ('parrot', 'bird', 24.0), 

4063 ... ('lion', 'mammal', 80.5), 

4064 ... ('monkey', 'mammal', np.nan)], 

4065 ... columns=['name', 'class', 'max_speed'], 

4066 ... index=[0, 2, 3, 1]) 

4067 >>> df 

4068 name class max_speed 

4069 0 falcon bird 389.0 

4070 2 parrot bird 24.0 

4071 3 lion mammal 80.5 

4072 1 monkey mammal NaN 

4073 

4074 Take elements at positions 0 and 3 along the axis 0 (default). 

4075 

4076 Note how the actual indices selected (0 and 1) do not correspond to 

4077 our selected indices 0 and 3. That's because we are selecting the 0th 

4078 and 3rd rows, not rows whose indices equal 0 and 3. 

4079 

4080 >>> df.take([0, 3]) 

4081 name class max_speed 

4082 0 falcon bird 389.0 

4083 1 monkey mammal NaN 

4084 

4085 Take elements at indices 1 and 2 along the axis 1 (column selection). 

4086 

4087 >>> df.take([1, 2], axis=1) 

4088 class max_speed 

4089 0 bird 389.0 

4090 2 bird 24.0 

4091 3 mammal 80.5 

4092 1 mammal NaN 

4093 

4094 We may take elements using negative integers for positive indices, 

4095 starting from the end of the object, just like with Python lists. 

4096 

4097 >>> df.take([-1, -2]) 

4098 name class max_speed 

4099 1 monkey mammal NaN 

4100 3 lion mammal 80.5 

4101 """ 

4102 

4103 nv.validate_take((), kwargs) 

4104 

4105 if not isinstance(indices, slice): 

4106 indices = np.asarray(indices, dtype=np.intp) 

4107 if ( 

4108 axis == 0 

4109 and indices.ndim == 1 

4110 and using_copy_on_write() 

4111 and is_range_indexer(indices, len(self)) 

4112 ): 

4113 return self.copy(deep=None) 

4114 elif self.ndim == 1: 

4115 raise TypeError( 

4116 f"{type(self).__name__}.take requires a sequence of integers, " 

4117 "not slice." 

4118 ) 

4119 else: 

4120 warnings.warn( 

4121 # GH#51539 

4122 f"Passing a slice to {type(self).__name__}.take is deprecated " 

4123 "and will raise in a future version. Use `obj[slicer]` or pass " 

4124 "a sequence of integers instead.", 

4125 FutureWarning, 

4126 stacklevel=find_stack_level(), 

4127 ) 

4128 # We can get here with a slice via DataFrame.__getitem__ 

4129 indices = np.arange( 

4130 indices.start, indices.stop, indices.step, dtype=np.intp 

4131 ) 

4132 

4133 new_data = self._mgr.take( 

4134 indices, 

4135 axis=self._get_block_manager_axis(axis), 

4136 verify=True, 

4137 ) 

4138 return self._constructor_from_mgr(new_data, axes=new_data.axes).__finalize__( 

4139 self, method="take" 

4140 ) 

4141 

4142 @final 

4143 def _take_with_is_copy(self, indices, axis: Axis = 0) -> Self: 

4144 """ 

4145 Internal version of the `take` method that sets the `_is_copy` 

4146 attribute to keep track of the parent dataframe (using in indexing 

4147 for the SettingWithCopyWarning). 

4148 

4149 For Series this does the same as the public take (it never sets `_is_copy`). 

4150 

4151 See the docstring of `take` for full explanation of the parameters. 

4152 """ 

4153 result = self.take(indices=indices, axis=axis) 

4154 # Maybe set copy if we didn't actually change the index. 

4155 if self.ndim == 2 and not result._get_axis(axis).equals(self._get_axis(axis)): 

4156 result._set_is_copy(self) 

4157 return result 

4158 

4159 @final 

4160 def xs( 

4161 self, 

4162 key: IndexLabel, 

4163 axis: Axis = 0, 

4164 level: IndexLabel | None = None, 

4165 drop_level: bool_t = True, 

4166 ) -> Self: 

4167 """ 

4168 Return cross-section from the Series/DataFrame. 

4169 

4170 This method takes a `key` argument to select data at a particular 

4171 level of a MultiIndex. 

4172 

4173 Parameters 

4174 ---------- 

4175 key : label or tuple of label 

4176 Label contained in the index, or partially in a MultiIndex. 

4177 axis : {0 or 'index', 1 or 'columns'}, default 0 

4178 Axis to retrieve cross-section on. 

4179 level : object, defaults to first n levels (n=1 or len(key)) 

4180 In case of a key partially contained in a MultiIndex, indicate 

4181 which levels are used. Levels can be referred by label or position. 

4182 drop_level : bool, default True 

4183 If False, returns object with same levels as self. 

4184 

4185 Returns 

4186 ------- 

4187 Series or DataFrame 

4188 Cross-section from the original Series or DataFrame 

4189 corresponding to the selected index levels. 

4190 

4191 See Also 

4192 -------- 

4193 DataFrame.loc : Access a group of rows and columns 

4194 by label(s) or a boolean array. 

4195 DataFrame.iloc : Purely integer-location based indexing 

4196 for selection by position. 

4197 

4198 Notes 

4199 ----- 

4200 `xs` can not be used to set values. 

4201 

4202 MultiIndex Slicers is a generic way to get/set values on 

4203 any level or levels. 

4204 It is a superset of `xs` functionality, see 

4205 :ref:`MultiIndex Slicers <advanced.mi_slicers>`. 

4206 

4207 Examples 

4208 -------- 

4209 >>> d = {'num_legs': [4, 4, 2, 2], 

4210 ... 'num_wings': [0, 0, 2, 2], 

4211 ... 'class': ['mammal', 'mammal', 'mammal', 'bird'], 

4212 ... 'animal': ['cat', 'dog', 'bat', 'penguin'], 

4213 ... 'locomotion': ['walks', 'walks', 'flies', 'walks']} 

4214 >>> df = pd.DataFrame(data=d) 

4215 >>> df = df.set_index(['class', 'animal', 'locomotion']) 

4216 >>> df 

4217 num_legs num_wings 

4218 class animal locomotion 

4219 mammal cat walks 4 0 

4220 dog walks 4 0 

4221 bat flies 2 2 

4222 bird penguin walks 2 2 

4223 

4224 Get values at specified index 

4225 

4226 >>> df.xs('mammal') 

4227 num_legs num_wings 

4228 animal locomotion 

4229 cat walks 4 0 

4230 dog walks 4 0 

4231 bat flies 2 2 

4232 

4233 Get values at several indexes 

4234 

4235 >>> df.xs(('mammal', 'dog', 'walks')) 

4236 num_legs 4 

4237 num_wings 0 

4238 Name: (mammal, dog, walks), dtype: int64 

4239 

4240 Get values at specified index and level 

4241 

4242 >>> df.xs('cat', level=1) 

4243 num_legs num_wings 

4244 class locomotion 

4245 mammal walks 4 0 

4246 

4247 Get values at several indexes and levels 

4248 

4249 >>> df.xs(('bird', 'walks'), 

4250 ... level=[0, 'locomotion']) 

4251 num_legs num_wings 

4252 animal 

4253 penguin 2 2 

4254 

4255 Get values at specified column and axis 

4256 

4257 >>> df.xs('num_wings', axis=1) 

4258 class animal locomotion 

4259 mammal cat walks 0 

4260 dog walks 0 

4261 bat flies 2 

4262 bird penguin walks 2 

4263 Name: num_wings, dtype: int64 

4264 """ 

4265 axis = self._get_axis_number(axis) 

4266 labels = self._get_axis(axis) 

4267 

4268 if isinstance(key, list): 

4269 raise TypeError("list keys are not supported in xs, pass a tuple instead") 

4270 

4271 if level is not None: 

4272 if not isinstance(labels, MultiIndex): 

4273 raise TypeError("Index must be a MultiIndex") 

4274 loc, new_ax = labels.get_loc_level(key, level=level, drop_level=drop_level) 

4275 

4276 # create the tuple of the indexer 

4277 _indexer = [slice(None)] * self.ndim 

4278 _indexer[axis] = loc 

4279 indexer = tuple(_indexer) 

4280 

4281 result = self.iloc[indexer] 

4282 setattr(result, result._get_axis_name(axis), new_ax) 

4283 return result 

4284 

4285 if axis == 1: 

4286 if drop_level: 

4287 return self[key] 

4288 index = self.columns 

4289 else: 

4290 index = self.index 

4291 

4292 if isinstance(index, MultiIndex): 

4293 loc, new_index = index._get_loc_level(key, level=0) 

4294 if not drop_level: 

4295 if lib.is_integer(loc): 

4296 # Slice index must be an integer or None 

4297 new_index = index[loc : loc + 1] 

4298 else: 

4299 new_index = index[loc] 

4300 else: 

4301 loc = index.get_loc(key) 

4302 

4303 if isinstance(loc, np.ndarray): 

4304 if loc.dtype == np.bool_: 

4305 (inds,) = loc.nonzero() 

4306 return self._take_with_is_copy(inds, axis=axis) 

4307 else: 

4308 return self._take_with_is_copy(loc, axis=axis) 

4309 

4310 if not is_scalar(loc): 

4311 new_index = index[loc] 

4312 

4313 if is_scalar(loc) and axis == 0: 

4314 # In this case loc should be an integer 

4315 if self.ndim == 1: 

4316 # if we encounter an array-like and we only have 1 dim 

4317 # that means that their are list/ndarrays inside the Series! 

4318 # so just return them (GH 6394) 

4319 return self._values[loc] 

4320 

4321 new_mgr = self._mgr.fast_xs(loc) 

4322 

4323 result = self._constructor_sliced_from_mgr(new_mgr, axes=new_mgr.axes) 

4324 result._name = self.index[loc] 

4325 result = result.__finalize__(self) 

4326 elif is_scalar(loc): 

4327 result = self.iloc[:, slice(loc, loc + 1)] 

4328 elif axis == 1: 

4329 result = self.iloc[:, loc] 

4330 else: 

4331 result = self.iloc[loc] 

4332 result.index = new_index 

4333 

4334 # this could be a view 

4335 # but only in a single-dtyped view sliceable case 

4336 result._set_is_copy(self, copy=not result._is_view) 

4337 return result 

4338 

4339 def __getitem__(self, item): 

4340 raise AbstractMethodError(self) 

4341 

4342 @final 

4343 def _getitem_slice(self, key: slice) -> Self: 

4344 """ 

4345 __getitem__ for the case where the key is a slice object. 

4346 """ 

4347 # _convert_slice_indexer to determine if this slice is positional 

4348 # or label based, and if the latter, convert to positional 

4349 slobj = self.index._convert_slice_indexer(key, kind="getitem") 

4350 if isinstance(slobj, np.ndarray): 

4351 # reachable with DatetimeIndex 

4352 indexer = lib.maybe_indices_to_slice( 

4353 slobj.astype(np.intp, copy=False), len(self) 

4354 ) 

4355 if isinstance(indexer, np.ndarray): 

4356 # GH#43223 If we can not convert, use take 

4357 return self.take(indexer, axis=0) 

4358 slobj = indexer 

4359 return self._slice(slobj) 

4360 

4361 def _slice(self, slobj: slice, axis: AxisInt = 0) -> Self: 

4362 """ 

4363 Construct a slice of this container. 

4364 

4365 Slicing with this method is *always* positional. 

4366 """ 

4367 assert isinstance(slobj, slice), type(slobj) 

4368 axis = self._get_block_manager_axis(axis) 

4369 new_mgr = self._mgr.get_slice(slobj, axis=axis) 

4370 result = self._constructor_from_mgr(new_mgr, axes=new_mgr.axes) 

4371 result = result.__finalize__(self) 

4372 

4373 # this could be a view 

4374 # but only in a single-dtyped view sliceable case 

4375 is_copy = axis != 0 or result._is_view 

4376 result._set_is_copy(self, copy=is_copy) 

4377 return result 

4378 

4379 @final 

4380 def _set_is_copy(self, ref: NDFrame, copy: bool_t = True) -> None: 

4381 if not copy: 

4382 self._is_copy = None 

4383 else: 

4384 assert ref is not None 

4385 self._is_copy = weakref.ref(ref) 

4386 

4387 def _check_is_chained_assignment_possible(self) -> bool_t: 

4388 """ 

4389 Check if we are a view, have a cacher, and are of mixed type. 

4390 If so, then force a setitem_copy check. 

4391 

4392 Should be called just near setting a value 

4393 

4394 Will return a boolean if it we are a view and are cached, but a 

4395 single-dtype meaning that the cacher should be updated following 

4396 setting. 

4397 """ 

4398 if self._is_copy: 

4399 self._check_setitem_copy(t="referent") 

4400 return False 

4401 

4402 @final 

4403 def _check_setitem_copy(self, t: str = "setting", force: bool_t = False): 

4404 """ 

4405 

4406 Parameters 

4407 ---------- 

4408 t : str, the type of setting error 

4409 force : bool, default False 

4410 If True, then force showing an error. 

4411 

4412 validate if we are doing a setitem on a chained copy. 

4413 

4414 It is technically possible to figure out that we are setting on 

4415 a copy even WITH a multi-dtyped pandas object. In other words, some 

4416 blocks may be views while other are not. Currently _is_view will ALWAYS 

4417 return False for multi-blocks to avoid having to handle this case. 

4418 

4419 df = DataFrame(np.arange(0,9), columns=['count']) 

4420 df['group'] = 'b' 

4421 

4422 # This technically need not raise SettingWithCopy if both are view 

4423 # (which is not generally guaranteed but is usually True. However, 

4424 # this is in general not a good practice and we recommend using .loc. 

4425 df.iloc[0:5]['group'] = 'a' 

4426 

4427 """ 

4428 if using_copy_on_write() or warn_copy_on_write(): 

4429 return 

4430 

4431 # return early if the check is not needed 

4432 if not (force or self._is_copy): 

4433 return 

4434 

4435 value = config.get_option("mode.chained_assignment") 

4436 if value is None: 

4437 return 

4438 

4439 # see if the copy is not actually referred; if so, then dissolve 

4440 # the copy weakref 

4441 if self._is_copy is not None and not isinstance(self._is_copy, str): 

4442 r = self._is_copy() 

4443 if not gc.get_referents(r) or (r is not None and r.shape == self.shape): 

4444 self._is_copy = None 

4445 return 

4446 

4447 # a custom message 

4448 if isinstance(self._is_copy, str): 

4449 t = self._is_copy 

4450 

4451 elif t == "referent": 

4452 t = ( 

4453 "\n" 

4454 "A value is trying to be set on a copy of a slice from a " 

4455 "DataFrame\n\n" 

4456 "See the caveats in the documentation: " 

4457 "https://pandas.pydata.org/pandas-docs/stable/user_guide/" 

4458 "indexing.html#returning-a-view-versus-a-copy" 

4459 ) 

4460 

4461 else: 

4462 t = ( 

4463 "\n" 

4464 "A value is trying to be set on a copy of a slice from a " 

4465 "DataFrame.\n" 

4466 "Try using .loc[row_indexer,col_indexer] = value " 

4467 "instead\n\nSee the caveats in the documentation: " 

4468 "https://pandas.pydata.org/pandas-docs/stable/user_guide/" 

4469 "indexing.html#returning-a-view-versus-a-copy" 

4470 ) 

4471 

4472 if value == "raise": 

4473 raise SettingWithCopyError(t) 

4474 if value == "warn": 

4475 warnings.warn(t, SettingWithCopyWarning, stacklevel=find_stack_level()) 

4476 

4477 @final 

4478 def __delitem__(self, key) -> None: 

4479 """ 

4480 Delete item 

4481 """ 

4482 deleted = False 

4483 

4484 maybe_shortcut = False 

4485 if self.ndim == 2 and isinstance(self.columns, MultiIndex): 

4486 try: 

4487 # By using engine's __contains__ we effectively 

4488 # restrict to same-length tuples 

4489 maybe_shortcut = key not in self.columns._engine 

4490 except TypeError: 

4491 pass 

4492 

4493 if maybe_shortcut: 

4494 # Allow shorthand to delete all columns whose first len(key) 

4495 # elements match key: 

4496 if not isinstance(key, tuple): 

4497 key = (key,) 

4498 for col in self.columns: 

4499 if isinstance(col, tuple) and col[: len(key)] == key: 

4500 del self[col] 

4501 deleted = True 

4502 if not deleted: 

4503 # If the above loop ran and didn't delete anything because 

4504 # there was no match, this call should raise the appropriate 

4505 # exception: 

4506 loc = self.axes[-1].get_loc(key) 

4507 self._mgr = self._mgr.idelete(loc) 

4508 

4509 # delete from the caches 

4510 try: 

4511 del self._item_cache[key] 

4512 except KeyError: 

4513 pass 

4514 

4515 # ---------------------------------------------------------------------- 

4516 # Unsorted 

4517 

4518 @final 

4519 def _check_inplace_and_allows_duplicate_labels(self, inplace: bool_t): 

4520 if inplace and not self.flags.allows_duplicate_labels: 

4521 raise ValueError( 

4522 "Cannot specify 'inplace=True' when " 

4523 "'self.flags.allows_duplicate_labels' is False." 

4524 ) 

4525 

4526 @final 

4527 def get(self, key, default=None): 

4528 """ 

4529 Get item from object for given key (ex: DataFrame column). 

4530 

4531 Returns default value if not found. 

4532 

4533 Parameters 

4534 ---------- 

4535 key : object 

4536 

4537 Returns 

4538 ------- 

4539 same type as items contained in object 

4540 

4541 Examples 

4542 -------- 

4543 >>> df = pd.DataFrame( 

4544 ... [ 

4545 ... [24.3, 75.7, "high"], 

4546 ... [31, 87.8, "high"], 

4547 ... [22, 71.6, "medium"], 

4548 ... [35, 95, "medium"], 

4549 ... ], 

4550 ... columns=["temp_celsius", "temp_fahrenheit", "windspeed"], 

4551 ... index=pd.date_range(start="2014-02-12", end="2014-02-15", freq="D"), 

4552 ... ) 

4553 

4554 >>> df 

4555 temp_celsius temp_fahrenheit windspeed 

4556 2014-02-12 24.3 75.7 high 

4557 2014-02-13 31.0 87.8 high 

4558 2014-02-14 22.0 71.6 medium 

4559 2014-02-15 35.0 95.0 medium 

4560 

4561 >>> df.get(["temp_celsius", "windspeed"]) 

4562 temp_celsius windspeed 

4563 2014-02-12 24.3 high 

4564 2014-02-13 31.0 high 

4565 2014-02-14 22.0 medium 

4566 2014-02-15 35.0 medium 

4567 

4568 >>> ser = df['windspeed'] 

4569 >>> ser.get('2014-02-13') 

4570 'high' 

4571 

4572 If the key isn't found, the default value will be used. 

4573 

4574 >>> df.get(["temp_celsius", "temp_kelvin"], default="default_value") 

4575 'default_value' 

4576 

4577 >>> ser.get('2014-02-10', '[unknown]') 

4578 '[unknown]' 

4579 """ 

4580 try: 

4581 return self[key] 

4582 except (KeyError, ValueError, IndexError): 

4583 return default 

4584 

4585 @final 

4586 @property 

4587 def _is_view(self) -> bool_t: 

4588 """Return boolean indicating if self is view of another array""" 

4589 return self._mgr.is_view 

4590 

4591 @final 

4592 def reindex_like( 

4593 self, 

4594 other, 

4595 method: Literal["backfill", "bfill", "pad", "ffill", "nearest"] | None = None, 

4596 copy: bool_t | None = None, 

4597 limit: int | None = None, 

4598 tolerance=None, 

4599 ) -> Self: 

4600 """ 

4601 Return an object with matching indices as other object. 

4602 

4603 Conform the object to the same index on all axes. Optional 

4604 filling logic, placing NaN in locations having no value 

4605 in the previous index. A new object is produced unless the 

4606 new index is equivalent to the current one and copy=False. 

4607 

4608 Parameters 

4609 ---------- 

4610 other : Object of the same data type 

4611 Its row and column indices are used to define the new indices 

4612 of this object. 

4613 method : {None, 'backfill'/'bfill', 'pad'/'ffill', 'nearest'} 

4614 Method to use for filling holes in reindexed DataFrame. 

4615 Please note: this is only applicable to DataFrames/Series with a 

4616 monotonically increasing/decreasing index. 

4617 

4618 * None (default): don't fill gaps 

4619 * pad / ffill: propagate last valid observation forward to next 

4620 valid 

4621 * backfill / bfill: use next valid observation to fill gap 

4622 * nearest: use nearest valid observations to fill gap. 

4623 

4624 copy : bool, default True 

4625 Return a new object, even if the passed indexes are the same. 

4626 

4627 .. note:: 

4628 The `copy` keyword will change behavior in pandas 3.0. 

4629 `Copy-on-Write 

4630 <https://pandas.pydata.org/docs/dev/user_guide/copy_on_write.html>`__ 

4631 will be enabled by default, which means that all methods with a 

4632 `copy` keyword will use a lazy copy mechanism to defer the copy and 

4633 ignore the `copy` keyword. The `copy` keyword will be removed in a 

4634 future version of pandas. 

4635 

4636 You can already get the future behavior and improvements through 

4637 enabling copy on write ``pd.options.mode.copy_on_write = True`` 

4638 limit : int, default None 

4639 Maximum number of consecutive labels to fill for inexact matches. 

4640 tolerance : optional 

4641 Maximum distance between original and new labels for inexact 

4642 matches. The values of the index at the matching locations must 

4643 satisfy the equation ``abs(index[indexer] - target) <= tolerance``. 

4644 

4645 Tolerance may be a scalar value, which applies the same tolerance 

4646 to all values, or list-like, which applies variable tolerance per 

4647 element. List-like includes list, tuple, array, Series, and must be 

4648 the same size as the index and its dtype must exactly match the 

4649 index's type. 

4650 

4651 Returns 

4652 ------- 

4653 Series or DataFrame 

4654 Same type as caller, but with changed indices on each axis. 

4655 

4656 See Also 

4657 -------- 

4658 DataFrame.set_index : Set row labels. 

4659 DataFrame.reset_index : Remove row labels or move them to new columns. 

4660 DataFrame.reindex : Change to new indices or expand indices. 

4661 

4662 Notes 

4663 ----- 

4664 Same as calling 

4665 ``.reindex(index=other.index, columns=other.columns,...)``. 

4666 

4667 Examples 

4668 -------- 

4669 >>> df1 = pd.DataFrame([[24.3, 75.7, 'high'], 

4670 ... [31, 87.8, 'high'], 

4671 ... [22, 71.6, 'medium'], 

4672 ... [35, 95, 'medium']], 

4673 ... columns=['temp_celsius', 'temp_fahrenheit', 

4674 ... 'windspeed'], 

4675 ... index=pd.date_range(start='2014-02-12', 

4676 ... end='2014-02-15', freq='D')) 

4677 

4678 >>> df1 

4679 temp_celsius temp_fahrenheit windspeed 

4680 2014-02-12 24.3 75.7 high 

4681 2014-02-13 31.0 87.8 high 

4682 2014-02-14 22.0 71.6 medium 

4683 2014-02-15 35.0 95.0 medium 

4684 

4685 >>> df2 = pd.DataFrame([[28, 'low'], 

4686 ... [30, 'low'], 

4687 ... [35.1, 'medium']], 

4688 ... columns=['temp_celsius', 'windspeed'], 

4689 ... index=pd.DatetimeIndex(['2014-02-12', '2014-02-13', 

4690 ... '2014-02-15'])) 

4691 

4692 >>> df2 

4693 temp_celsius windspeed 

4694 2014-02-12 28.0 low 

4695 2014-02-13 30.0 low 

4696 2014-02-15 35.1 medium 

4697 

4698 >>> df2.reindex_like(df1) 

4699 temp_celsius temp_fahrenheit windspeed 

4700 2014-02-12 28.0 NaN low 

4701 2014-02-13 30.0 NaN low 

4702 2014-02-14 NaN NaN NaN 

4703 2014-02-15 35.1 NaN medium 

4704 """ 

4705 d = other._construct_axes_dict( 

4706 axes=self._AXIS_ORDERS, 

4707 method=method, 

4708 copy=copy, 

4709 limit=limit, 

4710 tolerance=tolerance, 

4711 ) 

4712 

4713 return self.reindex(**d) 

4714 

4715 @overload 

4716 def drop( 

4717 self, 

4718 labels: IndexLabel = ..., 

4719 *, 

4720 axis: Axis = ..., 

4721 index: IndexLabel = ..., 

4722 columns: IndexLabel = ..., 

4723 level: Level | None = ..., 

4724 inplace: Literal[True], 

4725 errors: IgnoreRaise = ..., 

4726 ) -> None: 

4727 ... 

4728 

4729 @overload 

4730 def drop( 

4731 self, 

4732 labels: IndexLabel = ..., 

4733 *, 

4734 axis: Axis = ..., 

4735 index: IndexLabel = ..., 

4736 columns: IndexLabel = ..., 

4737 level: Level | None = ..., 

4738 inplace: Literal[False] = ..., 

4739 errors: IgnoreRaise = ..., 

4740 ) -> Self: 

4741 ... 

4742 

4743 @overload 

4744 def drop( 

4745 self, 

4746 labels: IndexLabel = ..., 

4747 *, 

4748 axis: Axis = ..., 

4749 index: IndexLabel = ..., 

4750 columns: IndexLabel = ..., 

4751 level: Level | None = ..., 

4752 inplace: bool_t = ..., 

4753 errors: IgnoreRaise = ..., 

4754 ) -> Self | None: 

4755 ... 

4756 

4757 def drop( 

4758 self, 

4759 labels: IndexLabel | None = None, 

4760 *, 

4761 axis: Axis = 0, 

4762 index: IndexLabel | None = None, 

4763 columns: IndexLabel | None = None, 

4764 level: Level | None = None, 

4765 inplace: bool_t = False, 

4766 errors: IgnoreRaise = "raise", 

4767 ) -> Self | None: 

4768 inplace = validate_bool_kwarg(inplace, "inplace") 

4769 

4770 if labels is not None: 

4771 if index is not None or columns is not None: 

4772 raise ValueError("Cannot specify both 'labels' and 'index'/'columns'") 

4773 axis_name = self._get_axis_name(axis) 

4774 axes = {axis_name: labels} 

4775 elif index is not None or columns is not None: 

4776 axes = {"index": index} 

4777 if self.ndim == 2: 

4778 axes["columns"] = columns 

4779 else: 

4780 raise ValueError( 

4781 "Need to specify at least one of 'labels', 'index' or 'columns'" 

4782 ) 

4783 

4784 obj = self 

4785 

4786 for axis, labels in axes.items(): 

4787 if labels is not None: 

4788 obj = obj._drop_axis(labels, axis, level=level, errors=errors) 

4789 

4790 if inplace: 

4791 self._update_inplace(obj) 

4792 return None 

4793 else: 

4794 return obj 

4795 

4796 @final 

4797 def _drop_axis( 

4798 self, 

4799 labels, 

4800 axis, 

4801 level=None, 

4802 errors: IgnoreRaise = "raise", 

4803 only_slice: bool_t = False, 

4804 ) -> Self: 

4805 """ 

4806 Drop labels from specified axis. Used in the ``drop`` method 

4807 internally. 

4808 

4809 Parameters 

4810 ---------- 

4811 labels : single label or list-like 

4812 axis : int or axis name 

4813 level : int or level name, default None 

4814 For MultiIndex 

4815 errors : {'ignore', 'raise'}, default 'raise' 

4816 If 'ignore', suppress error and existing labels are dropped. 

4817 only_slice : bool, default False 

4818 Whether indexing along columns should be view-only. 

4819 

4820 """ 

4821 axis_num = self._get_axis_number(axis) 

4822 axis = self._get_axis(axis) 

4823 

4824 if axis.is_unique: 

4825 if level is not None: 

4826 if not isinstance(axis, MultiIndex): 

4827 raise AssertionError("axis must be a MultiIndex") 

4828 new_axis = axis.drop(labels, level=level, errors=errors) 

4829 else: 

4830 new_axis = axis.drop(labels, errors=errors) 

4831 indexer = axis.get_indexer(new_axis) 

4832 

4833 # Case for non-unique axis 

4834 else: 

4835 is_tuple_labels = is_nested_list_like(labels) or isinstance(labels, tuple) 

4836 labels = ensure_object(common.index_labels_to_array(labels)) 

4837 if level is not None: 

4838 if not isinstance(axis, MultiIndex): 

4839 raise AssertionError("axis must be a MultiIndex") 

4840 mask = ~axis.get_level_values(level).isin(labels) 

4841 

4842 # GH 18561 MultiIndex.drop should raise if label is absent 

4843 if errors == "raise" and mask.all(): 

4844 raise KeyError(f"{labels} not found in axis") 

4845 elif ( 

4846 isinstance(axis, MultiIndex) 

4847 and labels.dtype == "object" 

4848 and not is_tuple_labels 

4849 ): 

4850 # Set level to zero in case of MultiIndex and label is string, 

4851 # because isin can't handle strings for MultiIndexes GH#36293 

4852 # In case of tuples we get dtype object but have to use isin GH#42771 

4853 mask = ~axis.get_level_values(0).isin(labels) 

4854 else: 

4855 mask = ~axis.isin(labels) 

4856 # Check if label doesn't exist along axis 

4857 labels_missing = (axis.get_indexer_for(labels) == -1).any() 

4858 if errors == "raise" and labels_missing: 

4859 raise KeyError(f"{labels} not found in axis") 

4860 

4861 if isinstance(mask.dtype, ExtensionDtype): 

4862 # GH#45860 

4863 mask = mask.to_numpy(dtype=bool) 

4864 

4865 indexer = mask.nonzero()[0] 

4866 new_axis = axis.take(indexer) 

4867 

4868 bm_axis = self.ndim - axis_num - 1 

4869 new_mgr = self._mgr.reindex_indexer( 

4870 new_axis, 

4871 indexer, 

4872 axis=bm_axis, 

4873 allow_dups=True, 

4874 copy=None, 

4875 only_slice=only_slice, 

4876 ) 

4877 result = self._constructor_from_mgr(new_mgr, axes=new_mgr.axes) 

4878 if self.ndim == 1: 

4879 result._name = self.name 

4880 

4881 return result.__finalize__(self) 

4882 

4883 @final 

4884 def _update_inplace(self, result, verify_is_copy: bool_t = True) -> None: 

4885 """ 

4886 Replace self internals with result. 

4887 

4888 Parameters 

4889 ---------- 

4890 result : same type as self 

4891 verify_is_copy : bool, default True 

4892 Provide is_copy checks. 

4893 """ 

4894 # NOTE: This does *not* call __finalize__ and that's an explicit 

4895 # decision that we may revisit in the future. 

4896 self._reset_cache() 

4897 self._clear_item_cache() 

4898 self._mgr = result._mgr 

4899 self._maybe_update_cacher(verify_is_copy=verify_is_copy, inplace=True) 

4900 

4901 @final 

4902 def add_prefix(self, prefix: str, axis: Axis | None = None) -> Self: 

4903 """ 

4904 Prefix labels with string `prefix`. 

4905 

4906 For Series, the row labels are prefixed. 

4907 For DataFrame, the column labels are prefixed. 

4908 

4909 Parameters 

4910 ---------- 

4911 prefix : str 

4912 The string to add before each label. 

4913 axis : {0 or 'index', 1 or 'columns', None}, default None 

4914 Axis to add prefix on 

4915 

4916 .. versionadded:: 2.0.0 

4917 

4918 Returns 

4919 ------- 

4920 Series or DataFrame 

4921 New Series or DataFrame with updated labels. 

4922 

4923 See Also 

4924 -------- 

4925 Series.add_suffix: Suffix row labels with string `suffix`. 

4926 DataFrame.add_suffix: Suffix column labels with string `suffix`. 

4927 

4928 Examples 

4929 -------- 

4930 >>> s = pd.Series([1, 2, 3, 4]) 

4931 >>> s 

4932 0 1 

4933 1 2 

4934 2 3 

4935 3 4 

4936 dtype: int64 

4937 

4938 >>> s.add_prefix('item_') 

4939 item_0 1 

4940 item_1 2 

4941 item_2 3 

4942 item_3 4 

4943 dtype: int64 

4944 

4945 >>> df = pd.DataFrame({'A': [1, 2, 3, 4], 'B': [3, 4, 5, 6]}) 

4946 >>> df 

4947 A B 

4948 0 1 3 

4949 1 2 4 

4950 2 3 5 

4951 3 4 6 

4952 

4953 >>> df.add_prefix('col_') 

4954 col_A col_B 

4955 0 1 3 

4956 1 2 4 

4957 2 3 5 

4958 3 4 6 

4959 """ 

4960 f = lambda x: f"{prefix}{x}" 

4961 

4962 axis_name = self._info_axis_name 

4963 if axis is not None: 

4964 axis_name = self._get_axis_name(axis) 

4965 

4966 mapper = {axis_name: f} 

4967 

4968 # error: Incompatible return value type (got "Optional[Self]", 

4969 # expected "Self") 

4970 # error: Argument 1 to "rename" of "NDFrame" has incompatible type 

4971 # "**Dict[str, partial[str]]"; expected "Union[str, int, None]" 

4972 # error: Keywords must be strings 

4973 return self._rename(**mapper) # type: ignore[return-value, arg-type, misc] 

4974 

4975 @final 

4976 def add_suffix(self, suffix: str, axis: Axis | None = None) -> Self: 

4977 """ 

4978 Suffix labels with string `suffix`. 

4979 

4980 For Series, the row labels are suffixed. 

4981 For DataFrame, the column labels are suffixed. 

4982 

4983 Parameters 

4984 ---------- 

4985 suffix : str 

4986 The string to add after each label. 

4987 axis : {0 or 'index', 1 or 'columns', None}, default None 

4988 Axis to add suffix on 

4989 

4990 .. versionadded:: 2.0.0 

4991 

4992 Returns 

4993 ------- 

4994 Series or DataFrame 

4995 New Series or DataFrame with updated labels. 

4996 

4997 See Also 

4998 -------- 

4999 Series.add_prefix: Prefix row labels with string `prefix`. 

5000 DataFrame.add_prefix: Prefix column labels with string `prefix`. 

5001 

5002 Examples 

5003 -------- 

5004 >>> s = pd.Series([1, 2, 3, 4]) 

5005 >>> s 

5006 0 1 

5007 1 2 

5008 2 3 

5009 3 4 

5010 dtype: int64 

5011 

5012 >>> s.add_suffix('_item') 

5013 0_item 1 

5014 1_item 2 

5015 2_item 3 

5016 3_item 4 

5017 dtype: int64 

5018 

5019 >>> df = pd.DataFrame({'A': [1, 2, 3, 4], 'B': [3, 4, 5, 6]}) 

5020 >>> df 

5021 A B 

5022 0 1 3 

5023 1 2 4 

5024 2 3 5 

5025 3 4 6 

5026 

5027 >>> df.add_suffix('_col') 

5028 A_col B_col 

5029 0 1 3 

5030 1 2 4 

5031 2 3 5 

5032 3 4 6 

5033 """ 

5034 f = lambda x: f"{x}{suffix}" 

5035 

5036 axis_name = self._info_axis_name 

5037 if axis is not None: 

5038 axis_name = self._get_axis_name(axis) 

5039 

5040 mapper = {axis_name: f} 

5041 # error: Incompatible return value type (got "Optional[Self]", 

5042 # expected "Self") 

5043 # error: Argument 1 to "rename" of "NDFrame" has incompatible type 

5044 # "**Dict[str, partial[str]]"; expected "Union[str, int, None]" 

5045 # error: Keywords must be strings 

5046 return self._rename(**mapper) # type: ignore[return-value, arg-type, misc] 

5047 

5048 @overload 

5049 def sort_values( 

5050 self, 

5051 *, 

5052 axis: Axis = ..., 

5053 ascending: bool_t | Sequence[bool_t] = ..., 

5054 inplace: Literal[False] = ..., 

5055 kind: SortKind = ..., 

5056 na_position: NaPosition = ..., 

5057 ignore_index: bool_t = ..., 

5058 key: ValueKeyFunc = ..., 

5059 ) -> Self: 

5060 ... 

5061 

5062 @overload 

5063 def sort_values( 

5064 self, 

5065 *, 

5066 axis: Axis = ..., 

5067 ascending: bool_t | Sequence[bool_t] = ..., 

5068 inplace: Literal[True], 

5069 kind: SortKind = ..., 

5070 na_position: NaPosition = ..., 

5071 ignore_index: bool_t = ..., 

5072 key: ValueKeyFunc = ..., 

5073 ) -> None: 

5074 ... 

5075 

5076 @overload 

5077 def sort_values( 

5078 self, 

5079 *, 

5080 axis: Axis = ..., 

5081 ascending: bool_t | Sequence[bool_t] = ..., 

5082 inplace: bool_t = ..., 

5083 kind: SortKind = ..., 

5084 na_position: NaPosition = ..., 

5085 ignore_index: bool_t = ..., 

5086 key: ValueKeyFunc = ..., 

5087 ) -> Self | None: 

5088 ... 

5089 

5090 def sort_values( 

5091 self, 

5092 *, 

5093 axis: Axis = 0, 

5094 ascending: bool_t | Sequence[bool_t] = True, 

5095 inplace: bool_t = False, 

5096 kind: SortKind = "quicksort", 

5097 na_position: NaPosition = "last", 

5098 ignore_index: bool_t = False, 

5099 key: ValueKeyFunc | None = None, 

5100 ) -> Self | None: 

5101 """ 

5102 Sort by the values along either axis. 

5103 

5104 Parameters 

5105 ----------%(optional_by)s 

5106 axis : %(axes_single_arg)s, default 0 

5107 Axis to be sorted. 

5108 ascending : bool or list of bool, default True 

5109 Sort ascending vs. descending. Specify list for multiple sort 

5110 orders. If this is a list of bools, must match the length of 

5111 the by. 

5112 inplace : bool, default False 

5113 If True, perform operation in-place. 

5114 kind : {'quicksort', 'mergesort', 'heapsort', 'stable'}, default 'quicksort' 

5115 Choice of sorting algorithm. See also :func:`numpy.sort` for more 

5116 information. `mergesort` and `stable` are the only stable algorithms. For 

5117 DataFrames, this option is only applied when sorting on a single 

5118 column or label. 

5119 na_position : {'first', 'last'}, default 'last' 

5120 Puts NaNs at the beginning if `first`; `last` puts NaNs at the 

5121 end. 

5122 ignore_index : bool, default False 

5123 If True, the resulting axis will be labeled 0, 1, …, n - 1. 

5124 key : callable, optional 

5125 Apply the key function to the values 

5126 before sorting. This is similar to the `key` argument in the 

5127 builtin :meth:`sorted` function, with the notable difference that 

5128 this `key` function should be *vectorized*. It should expect a 

5129 ``Series`` and return a Series with the same shape as the input. 

5130 It will be applied to each column in `by` independently. 

5131 

5132 Returns 

5133 ------- 

5134 DataFrame or None 

5135 DataFrame with sorted values or None if ``inplace=True``. 

5136 

5137 See Also 

5138 -------- 

5139 DataFrame.sort_index : Sort a DataFrame by the index. 

5140 Series.sort_values : Similar method for a Series. 

5141 

5142 Examples 

5143 -------- 

5144 >>> df = pd.DataFrame({ 

5145 ... 'col1': ['A', 'A', 'B', np.nan, 'D', 'C'], 

5146 ... 'col2': [2, 1, 9, 8, 7, 4], 

5147 ... 'col3': [0, 1, 9, 4, 2, 3], 

5148 ... 'col4': ['a', 'B', 'c', 'D', 'e', 'F'] 

5149 ... }) 

5150 >>> df 

5151 col1 col2 col3 col4 

5152 0 A 2 0 a 

5153 1 A 1 1 B 

5154 2 B 9 9 c 

5155 3 NaN 8 4 D 

5156 4 D 7 2 e 

5157 5 C 4 3 F 

5158 

5159 Sort by col1 

5160 

5161 >>> df.sort_values(by=['col1']) 

5162 col1 col2 col3 col4 

5163 0 A 2 0 a 

5164 1 A 1 1 B 

5165 2 B 9 9 c 

5166 5 C 4 3 F 

5167 4 D 7 2 e 

5168 3 NaN 8 4 D 

5169 

5170 Sort by multiple columns 

5171 

5172 >>> df.sort_values(by=['col1', 'col2']) 

5173 col1 col2 col3 col4 

5174 1 A 1 1 B 

5175 0 A 2 0 a 

5176 2 B 9 9 c 

5177 5 C 4 3 F 

5178 4 D 7 2 e 

5179 3 NaN 8 4 D 

5180 

5181 Sort Descending 

5182 

5183 >>> df.sort_values(by='col1', ascending=False) 

5184 col1 col2 col3 col4 

5185 4 D 7 2 e 

5186 5 C 4 3 F 

5187 2 B 9 9 c 

5188 0 A 2 0 a 

5189 1 A 1 1 B 

5190 3 NaN 8 4 D 

5191 

5192 Putting NAs first 

5193 

5194 >>> df.sort_values(by='col1', ascending=False, na_position='first') 

5195 col1 col2 col3 col4 

5196 3 NaN 8 4 D 

5197 4 D 7 2 e 

5198 5 C 4 3 F 

5199 2 B 9 9 c 

5200 0 A 2 0 a 

5201 1 A 1 1 B 

5202 

5203 Sorting with a key function 

5204 

5205 >>> df.sort_values(by='col4', key=lambda col: col.str.lower()) 

5206 col1 col2 col3 col4 

5207 0 A 2 0 a 

5208 1 A 1 1 B 

5209 2 B 9 9 c 

5210 3 NaN 8 4 D 

5211 4 D 7 2 e 

5212 5 C 4 3 F 

5213 

5214 Natural sort with the key argument, 

5215 using the `natsort <https://github.com/SethMMorton/natsort>` package. 

5216 

5217 >>> df = pd.DataFrame({ 

5218 ... "time": ['0hr', '128hr', '72hr', '48hr', '96hr'], 

5219 ... "value": [10, 20, 30, 40, 50] 

5220 ... }) 

5221 >>> df 

5222 time value 

5223 0 0hr 10 

5224 1 128hr 20 

5225 2 72hr 30 

5226 3 48hr 40 

5227 4 96hr 50 

5228 >>> from natsort import index_natsorted 

5229 >>> df.sort_values( 

5230 ... by="time", 

5231 ... key=lambda x: np.argsort(index_natsorted(df["time"])) 

5232 ... ) 

5233 time value 

5234 0 0hr 10 

5235 3 48hr 40 

5236 2 72hr 30 

5237 4 96hr 50 

5238 1 128hr 20 

5239 """ 

5240 raise AbstractMethodError(self) 

5241 

5242 @overload 

5243 def sort_index( 

5244 self, 

5245 *, 

5246 axis: Axis = ..., 

5247 level: IndexLabel = ..., 

5248 ascending: bool_t | Sequence[bool_t] = ..., 

5249 inplace: Literal[True], 

5250 kind: SortKind = ..., 

5251 na_position: NaPosition = ..., 

5252 sort_remaining: bool_t = ..., 

5253 ignore_index: bool_t = ..., 

5254 key: IndexKeyFunc = ..., 

5255 ) -> None: 

5256 ... 

5257 

5258 @overload 

5259 def sort_index( 

5260 self, 

5261 *, 

5262 axis: Axis = ..., 

5263 level: IndexLabel = ..., 

5264 ascending: bool_t | Sequence[bool_t] = ..., 

5265 inplace: Literal[False] = ..., 

5266 kind: SortKind = ..., 

5267 na_position: NaPosition = ..., 

5268 sort_remaining: bool_t = ..., 

5269 ignore_index: bool_t = ..., 

5270 key: IndexKeyFunc = ..., 

5271 ) -> Self: 

5272 ... 

5273 

5274 @overload 

5275 def sort_index( 

5276 self, 

5277 *, 

5278 axis: Axis = ..., 

5279 level: IndexLabel = ..., 

5280 ascending: bool_t | Sequence[bool_t] = ..., 

5281 inplace: bool_t = ..., 

5282 kind: SortKind = ..., 

5283 na_position: NaPosition = ..., 

5284 sort_remaining: bool_t = ..., 

5285 ignore_index: bool_t = ..., 

5286 key: IndexKeyFunc = ..., 

5287 ) -> Self | None: 

5288 ... 

5289 

5290 def sort_index( 

5291 self, 

5292 *, 

5293 axis: Axis = 0, 

5294 level: IndexLabel | None = None, 

5295 ascending: bool_t | Sequence[bool_t] = True, 

5296 inplace: bool_t = False, 

5297 kind: SortKind = "quicksort", 

5298 na_position: NaPosition = "last", 

5299 sort_remaining: bool_t = True, 

5300 ignore_index: bool_t = False, 

5301 key: IndexKeyFunc | None = None, 

5302 ) -> Self | None: 

5303 inplace = validate_bool_kwarg(inplace, "inplace") 

5304 axis = self._get_axis_number(axis) 

5305 ascending = validate_ascending(ascending) 

5306 

5307 target = self._get_axis(axis) 

5308 

5309 indexer = get_indexer_indexer( 

5310 target, level, ascending, kind, na_position, sort_remaining, key 

5311 ) 

5312 

5313 if indexer is None: 

5314 if inplace: 

5315 result = self 

5316 else: 

5317 result = self.copy(deep=None) 

5318 

5319 if ignore_index: 

5320 result.index = default_index(len(self)) 

5321 if inplace: 

5322 return None 

5323 else: 

5324 return result 

5325 

5326 baxis = self._get_block_manager_axis(axis) 

5327 new_data = self._mgr.take(indexer, axis=baxis, verify=False) 

5328 

5329 # reconstruct axis if needed 

5330 if not ignore_index: 

5331 new_axis = new_data.axes[baxis]._sort_levels_monotonic() 

5332 else: 

5333 new_axis = default_index(len(indexer)) 

5334 new_data.set_axis(baxis, new_axis) 

5335 

5336 result = self._constructor_from_mgr(new_data, axes=new_data.axes) 

5337 

5338 if inplace: 

5339 return self._update_inplace(result) 

5340 else: 

5341 return result.__finalize__(self, method="sort_index") 

5342 

5343 @doc( 

5344 klass=_shared_doc_kwargs["klass"], 

5345 optional_reindex="", 

5346 ) 

5347 def reindex( 

5348 self, 

5349 labels=None, 

5350 *, 

5351 index=None, 

5352 columns=None, 

5353 axis: Axis | None = None, 

5354 method: ReindexMethod | None = None, 

5355 copy: bool_t | None = None, 

5356 level: Level | None = None, 

5357 fill_value: Scalar | None = np.nan, 

5358 limit: int | None = None, 

5359 tolerance=None, 

5360 ) -> Self: 

5361 """ 

5362 Conform {klass} to new index with optional filling logic. 

5363 

5364 Places NA/NaN in locations having no value in the previous index. A new object 

5365 is produced unless the new index is equivalent to the current one and 

5366 ``copy=False``. 

5367 

5368 Parameters 

5369 ---------- 

5370 {optional_reindex} 

5371 method : {{None, 'backfill'/'bfill', 'pad'/'ffill', 'nearest'}} 

5372 Method to use for filling holes in reindexed DataFrame. 

5373 Please note: this is only applicable to DataFrames/Series with a 

5374 monotonically increasing/decreasing index. 

5375 

5376 * None (default): don't fill gaps 

5377 * pad / ffill: Propagate last valid observation forward to next 

5378 valid. 

5379 * backfill / bfill: Use next valid observation to fill gap. 

5380 * nearest: Use nearest valid observations to fill gap. 

5381 

5382 copy : bool, default True 

5383 Return a new object, even if the passed indexes are the same. 

5384 

5385 .. note:: 

5386 The `copy` keyword will change behavior in pandas 3.0. 

5387 `Copy-on-Write 

5388 <https://pandas.pydata.org/docs/dev/user_guide/copy_on_write.html>`__ 

5389 will be enabled by default, which means that all methods with a 

5390 `copy` keyword will use a lazy copy mechanism to defer the copy and 

5391 ignore the `copy` keyword. The `copy` keyword will be removed in a 

5392 future version of pandas. 

5393 

5394 You can already get the future behavior and improvements through 

5395 enabling copy on write ``pd.options.mode.copy_on_write = True`` 

5396 level : int or name 

5397 Broadcast across a level, matching Index values on the 

5398 passed MultiIndex level. 

5399 fill_value : scalar, default np.nan 

5400 Value to use for missing values. Defaults to NaN, but can be any 

5401 "compatible" value. 

5402 limit : int, default None 

5403 Maximum number of consecutive elements to forward or backward fill. 

5404 tolerance : optional 

5405 Maximum distance between original and new labels for inexact 

5406 matches. The values of the index at the matching locations most 

5407 satisfy the equation ``abs(index[indexer] - target) <= tolerance``. 

5408 

5409 Tolerance may be a scalar value, which applies the same tolerance 

5410 to all values, or list-like, which applies variable tolerance per 

5411 element. List-like includes list, tuple, array, Series, and must be 

5412 the same size as the index and its dtype must exactly match the 

5413 index's type. 

5414 

5415 Returns 

5416 ------- 

5417 {klass} with changed index. 

5418 

5419 See Also 

5420 -------- 

5421 DataFrame.set_index : Set row labels. 

5422 DataFrame.reset_index : Remove row labels or move them to new columns. 

5423 DataFrame.reindex_like : Change to same indices as other DataFrame. 

5424 

5425 Examples 

5426 -------- 

5427 ``DataFrame.reindex`` supports two calling conventions 

5428 

5429 * ``(index=index_labels, columns=column_labels, ...)`` 

5430 * ``(labels, axis={{'index', 'columns'}}, ...)`` 

5431 

5432 We *highly* recommend using keyword arguments to clarify your 

5433 intent. 

5434 

5435 Create a dataframe with some fictional data. 

5436 

5437 >>> index = ['Firefox', 'Chrome', 'Safari', 'IE10', 'Konqueror'] 

5438 >>> df = pd.DataFrame({{'http_status': [200, 200, 404, 404, 301], 

5439 ... 'response_time': [0.04, 0.02, 0.07, 0.08, 1.0]}}, 

5440 ... index=index) 

5441 >>> df 

5442 http_status response_time 

5443 Firefox 200 0.04 

5444 Chrome 200 0.02 

5445 Safari 404 0.07 

5446 IE10 404 0.08 

5447 Konqueror 301 1.00 

5448 

5449 Create a new index and reindex the dataframe. By default 

5450 values in the new index that do not have corresponding 

5451 records in the dataframe are assigned ``NaN``. 

5452 

5453 >>> new_index = ['Safari', 'Iceweasel', 'Comodo Dragon', 'IE10', 

5454 ... 'Chrome'] 

5455 >>> df.reindex(new_index) 

5456 http_status response_time 

5457 Safari 404.0 0.07 

5458 Iceweasel NaN NaN 

5459 Comodo Dragon NaN NaN 

5460 IE10 404.0 0.08 

5461 Chrome 200.0 0.02 

5462 

5463 We can fill in the missing values by passing a value to 

5464 the keyword ``fill_value``. Because the index is not monotonically 

5465 increasing or decreasing, we cannot use arguments to the keyword 

5466 ``method`` to fill the ``NaN`` values. 

5467 

5468 >>> df.reindex(new_index, fill_value=0) 

5469 http_status response_time 

5470 Safari 404 0.07 

5471 Iceweasel 0 0.00 

5472 Comodo Dragon 0 0.00 

5473 IE10 404 0.08 

5474 Chrome 200 0.02 

5475 

5476 >>> df.reindex(new_index, fill_value='missing') 

5477 http_status response_time 

5478 Safari 404 0.07 

5479 Iceweasel missing missing 

5480 Comodo Dragon missing missing 

5481 IE10 404 0.08 

5482 Chrome 200 0.02 

5483 

5484 We can also reindex the columns. 

5485 

5486 >>> df.reindex(columns=['http_status', 'user_agent']) 

5487 http_status user_agent 

5488 Firefox 200 NaN 

5489 Chrome 200 NaN 

5490 Safari 404 NaN 

5491 IE10 404 NaN 

5492 Konqueror 301 NaN 

5493 

5494 Or we can use "axis-style" keyword arguments 

5495 

5496 >>> df.reindex(['http_status', 'user_agent'], axis="columns") 

5497 http_status user_agent 

5498 Firefox 200 NaN 

5499 Chrome 200 NaN 

5500 Safari 404 NaN 

5501 IE10 404 NaN 

5502 Konqueror 301 NaN 

5503 

5504 To further illustrate the filling functionality in 

5505 ``reindex``, we will create a dataframe with a 

5506 monotonically increasing index (for example, a sequence 

5507 of dates). 

5508 

5509 >>> date_index = pd.date_range('1/1/2010', periods=6, freq='D') 

5510 >>> df2 = pd.DataFrame({{"prices": [100, 101, np.nan, 100, 89, 88]}}, 

5511 ... index=date_index) 

5512 >>> df2 

5513 prices 

5514 2010-01-01 100.0 

5515 2010-01-02 101.0 

5516 2010-01-03 NaN 

5517 2010-01-04 100.0 

5518 2010-01-05 89.0 

5519 2010-01-06 88.0 

5520 

5521 Suppose we decide to expand the dataframe to cover a wider 

5522 date range. 

5523 

5524 >>> date_index2 = pd.date_range('12/29/2009', periods=10, freq='D') 

5525 >>> df2.reindex(date_index2) 

5526 prices 

5527 2009-12-29 NaN 

5528 2009-12-30 NaN 

5529 2009-12-31 NaN 

5530 2010-01-01 100.0 

5531 2010-01-02 101.0 

5532 2010-01-03 NaN 

5533 2010-01-04 100.0 

5534 2010-01-05 89.0 

5535 2010-01-06 88.0 

5536 2010-01-07 NaN 

5537 

5538 The index entries that did not have a value in the original data frame 

5539 (for example, '2009-12-29') are by default filled with ``NaN``. 

5540 If desired, we can fill in the missing values using one of several 

5541 options. 

5542 

5543 For example, to back-propagate the last valid value to fill the ``NaN`` 

5544 values, pass ``bfill`` as an argument to the ``method`` keyword. 

5545 

5546 >>> df2.reindex(date_index2, method='bfill') 

5547 prices 

5548 2009-12-29 100.0 

5549 2009-12-30 100.0 

5550 2009-12-31 100.0 

5551 2010-01-01 100.0 

5552 2010-01-02 101.0 

5553 2010-01-03 NaN 

5554 2010-01-04 100.0 

5555 2010-01-05 89.0 

5556 2010-01-06 88.0 

5557 2010-01-07 NaN 

5558 

5559 Please note that the ``NaN`` value present in the original dataframe 

5560 (at index value 2010-01-03) will not be filled by any of the 

5561 value propagation schemes. This is because filling while reindexing 

5562 does not look at dataframe values, but only compares the original and 

5563 desired indexes. If you do want to fill in the ``NaN`` values present 

5564 in the original dataframe, use the ``fillna()`` method. 

5565 

5566 See the :ref:`user guide <basics.reindexing>` for more. 

5567 """ 

5568 # TODO: Decide if we care about having different examples for different 

5569 # kinds 

5570 

5571 if index is not None and columns is not None and labels is not None: 

5572 raise TypeError("Cannot specify all of 'labels', 'index', 'columns'.") 

5573 elif index is not None or columns is not None: 

5574 if axis is not None: 

5575 raise TypeError( 

5576 "Cannot specify both 'axis' and any of 'index' or 'columns'" 

5577 ) 

5578 if labels is not None: 

5579 if index is not None: 

5580 columns = labels 

5581 else: 

5582 index = labels 

5583 else: 

5584 if axis and self._get_axis_number(axis) == 1: 

5585 columns = labels 

5586 else: 

5587 index = labels 

5588 axes: dict[Literal["index", "columns"], Any] = { 

5589 "index": index, 

5590 "columns": columns, 

5591 } 

5592 method = clean_reindex_fill_method(method) 

5593 

5594 # if all axes that are requested to reindex are equal, then only copy 

5595 # if indicated must have index names equal here as well as values 

5596 if copy and using_copy_on_write(): 

5597 copy = False 

5598 if all( 

5599 self._get_axis(axis_name).identical(ax) 

5600 for axis_name, ax in axes.items() 

5601 if ax is not None 

5602 ): 

5603 return self.copy(deep=copy) 

5604 

5605 # check if we are a multi reindex 

5606 if self._needs_reindex_multi(axes, method, level): 

5607 return self._reindex_multi(axes, copy, fill_value) 

5608 

5609 # perform the reindex on the axes 

5610 return self._reindex_axes( 

5611 axes, level, limit, tolerance, method, fill_value, copy 

5612 ).__finalize__(self, method="reindex") 

5613 

5614 @final 

5615 def _reindex_axes( 

5616 self, 

5617 axes, 

5618 level: Level | None, 

5619 limit: int | None, 

5620 tolerance, 

5621 method, 

5622 fill_value: Scalar | None, 

5623 copy: bool_t | None, 

5624 ) -> Self: 

5625 """Perform the reindex for all the axes.""" 

5626 obj = self 

5627 for a in self._AXIS_ORDERS: 

5628 labels = axes[a] 

5629 if labels is None: 

5630 continue 

5631 

5632 ax = self._get_axis(a) 

5633 new_index, indexer = ax.reindex( 

5634 labels, level=level, limit=limit, tolerance=tolerance, method=method 

5635 ) 

5636 

5637 axis = self._get_axis_number(a) 

5638 obj = obj._reindex_with_indexers( 

5639 {axis: [new_index, indexer]}, 

5640 fill_value=fill_value, 

5641 copy=copy, 

5642 allow_dups=False, 

5643 ) 

5644 # If we've made a copy once, no need to make another one 

5645 copy = False 

5646 

5647 return obj 

5648 

5649 def _needs_reindex_multi(self, axes, method, level: Level | None) -> bool_t: 

5650 """Check if we do need a multi reindex.""" 

5651 return ( 

5652 (common.count_not_none(*axes.values()) == self._AXIS_LEN) 

5653 and method is None 

5654 and level is None 

5655 # reindex_multi calls self.values, so we only want to go 

5656 # down that path when doing so is cheap. 

5657 and self._can_fast_transpose 

5658 ) 

5659 

5660 def _reindex_multi(self, axes, copy, fill_value): 

5661 raise AbstractMethodError(self) 

5662 

5663 @final 

5664 def _reindex_with_indexers( 

5665 self, 

5666 reindexers, 

5667 fill_value=None, 

5668 copy: bool_t | None = False, 

5669 allow_dups: bool_t = False, 

5670 ) -> Self: 

5671 """allow_dups indicates an internal call here""" 

5672 # reindex doing multiple operations on different axes if indicated 

5673 new_data = self._mgr 

5674 for axis in sorted(reindexers.keys()): 

5675 index, indexer = reindexers[axis] 

5676 baxis = self._get_block_manager_axis(axis) 

5677 

5678 if index is None: 

5679 continue 

5680 

5681 index = ensure_index(index) 

5682 if indexer is not None: 

5683 indexer = ensure_platform_int(indexer) 

5684 

5685 # TODO: speed up on homogeneous DataFrame objects (see _reindex_multi) 

5686 new_data = new_data.reindex_indexer( 

5687 index, 

5688 indexer, 

5689 axis=baxis, 

5690 fill_value=fill_value, 

5691 allow_dups=allow_dups, 

5692 copy=copy, 

5693 ) 

5694 # If we've made a copy once, no need to make another one 

5695 copy = False 

5696 

5697 if ( 

5698 (copy or copy is None) 

5699 and new_data is self._mgr 

5700 and not using_copy_on_write() 

5701 ): 

5702 new_data = new_data.copy(deep=copy) 

5703 elif using_copy_on_write() and new_data is self._mgr: 

5704 new_data = new_data.copy(deep=False) 

5705 

5706 return self._constructor_from_mgr(new_data, axes=new_data.axes).__finalize__( 

5707 self 

5708 ) 

5709 

5710 def filter( 

5711 self, 

5712 items=None, 

5713 like: str | None = None, 

5714 regex: str | None = None, 

5715 axis: Axis | None = None, 

5716 ) -> Self: 

5717 """ 

5718 Subset the dataframe rows or columns according to the specified index labels. 

5719 

5720 Note that this routine does not filter a dataframe on its 

5721 contents. The filter is applied to the labels of the index. 

5722 

5723 Parameters 

5724 ---------- 

5725 items : list-like 

5726 Keep labels from axis which are in items. 

5727 like : str 

5728 Keep labels from axis for which "like in label == True". 

5729 regex : str (regular expression) 

5730 Keep labels from axis for which re.search(regex, label) == True. 

5731 axis : {0 or 'index', 1 or 'columns', None}, default None 

5732 The axis to filter on, expressed either as an index (int) 

5733 or axis name (str). By default this is the info axis, 'columns' for 

5734 DataFrame. For `Series` this parameter is unused and defaults to `None`. 

5735 

5736 Returns 

5737 ------- 

5738 same type as input object 

5739 

5740 See Also 

5741 -------- 

5742 DataFrame.loc : Access a group of rows and columns 

5743 by label(s) or a boolean array. 

5744 

5745 Notes 

5746 ----- 

5747 The ``items``, ``like``, and ``regex`` parameters are 

5748 enforced to be mutually exclusive. 

5749 

5750 ``axis`` defaults to the info axis that is used when indexing 

5751 with ``[]``. 

5752 

5753 Examples 

5754 -------- 

5755 >>> df = pd.DataFrame(np.array(([1, 2, 3], [4, 5, 6])), 

5756 ... index=['mouse', 'rabbit'], 

5757 ... columns=['one', 'two', 'three']) 

5758 >>> df 

5759 one two three 

5760 mouse 1 2 3 

5761 rabbit 4 5 6 

5762 

5763 >>> # select columns by name 

5764 >>> df.filter(items=['one', 'three']) 

5765 one three 

5766 mouse 1 3 

5767 rabbit 4 6 

5768 

5769 >>> # select columns by regular expression 

5770 >>> df.filter(regex='e$', axis=1) 

5771 one three 

5772 mouse 1 3 

5773 rabbit 4 6 

5774 

5775 >>> # select rows containing 'bbi' 

5776 >>> df.filter(like='bbi', axis=0) 

5777 one two three 

5778 rabbit 4 5 6 

5779 """ 

5780 nkw = common.count_not_none(items, like, regex) 

5781 if nkw > 1: 

5782 raise TypeError( 

5783 "Keyword arguments `items`, `like`, or `regex` " 

5784 "are mutually exclusive" 

5785 ) 

5786 

5787 if axis is None: 

5788 axis = self._info_axis_name 

5789 labels = self._get_axis(axis) 

5790 

5791 if items is not None: 

5792 name = self._get_axis_name(axis) 

5793 items = Index(items).intersection(labels) 

5794 if len(items) == 0: 

5795 # Keep the dtype of labels when we are empty 

5796 items = items.astype(labels.dtype) 

5797 # error: Keywords must be strings 

5798 return self.reindex(**{name: items}) # type: ignore[misc] 

5799 elif like: 

5800 

5801 def f(x) -> bool_t: 

5802 assert like is not None # needed for mypy 

5803 return like in ensure_str(x) 

5804 

5805 values = labels.map(f) 

5806 return self.loc(axis=axis)[values] 

5807 elif regex: 

5808 

5809 def f(x) -> bool_t: 

5810 return matcher.search(ensure_str(x)) is not None 

5811 

5812 matcher = re.compile(regex) 

5813 values = labels.map(f) 

5814 return self.loc(axis=axis)[values] 

5815 else: 

5816 raise TypeError("Must pass either `items`, `like`, or `regex`") 

5817 

5818 @final 

5819 def head(self, n: int = 5) -> Self: 

5820 """ 

5821 Return the first `n` rows. 

5822 

5823 This function returns the first `n` rows for the object based 

5824 on position. It is useful for quickly testing if your object 

5825 has the right type of data in it. 

5826 

5827 For negative values of `n`, this function returns all rows except 

5828 the last `|n|` rows, equivalent to ``df[:n]``. 

5829 

5830 If n is larger than the number of rows, this function returns all rows. 

5831 

5832 Parameters 

5833 ---------- 

5834 n : int, default 5 

5835 Number of rows to select. 

5836 

5837 Returns 

5838 ------- 

5839 same type as caller 

5840 The first `n` rows of the caller object. 

5841 

5842 See Also 

5843 -------- 

5844 DataFrame.tail: Returns the last `n` rows. 

5845 

5846 Examples 

5847 -------- 

5848 >>> df = pd.DataFrame({'animal': ['alligator', 'bee', 'falcon', 'lion', 

5849 ... 'monkey', 'parrot', 'shark', 'whale', 'zebra']}) 

5850 >>> df 

5851 animal 

5852 0 alligator 

5853 1 bee 

5854 2 falcon 

5855 3 lion 

5856 4 monkey 

5857 5 parrot 

5858 6 shark 

5859 7 whale 

5860 8 zebra 

5861 

5862 Viewing the first 5 lines 

5863 

5864 >>> df.head() 

5865 animal 

5866 0 alligator 

5867 1 bee 

5868 2 falcon 

5869 3 lion 

5870 4 monkey 

5871 

5872 Viewing the first `n` lines (three in this case) 

5873 

5874 >>> df.head(3) 

5875 animal 

5876 0 alligator 

5877 1 bee 

5878 2 falcon 

5879 

5880 For negative values of `n` 

5881 

5882 >>> df.head(-3) 

5883 animal 

5884 0 alligator 

5885 1 bee 

5886 2 falcon 

5887 3 lion 

5888 4 monkey 

5889 5 parrot 

5890 """ 

5891 if using_copy_on_write(): 

5892 return self.iloc[:n].copy() 

5893 return self.iloc[:n] 

5894 

5895 @final 

5896 def tail(self, n: int = 5) -> Self: 

5897 """ 

5898 Return the last `n` rows. 

5899 

5900 This function returns last `n` rows from the object based on 

5901 position. It is useful for quickly verifying data, for example, 

5902 after sorting or appending rows. 

5903 

5904 For negative values of `n`, this function returns all rows except 

5905 the first `|n|` rows, equivalent to ``df[|n|:]``. 

5906 

5907 If n is larger than the number of rows, this function returns all rows. 

5908 

5909 Parameters 

5910 ---------- 

5911 n : int, default 5 

5912 Number of rows to select. 

5913 

5914 Returns 

5915 ------- 

5916 type of caller 

5917 The last `n` rows of the caller object. 

5918 

5919 See Also 

5920 -------- 

5921 DataFrame.head : The first `n` rows of the caller object. 

5922 

5923 Examples 

5924 -------- 

5925 >>> df = pd.DataFrame({'animal': ['alligator', 'bee', 'falcon', 'lion', 

5926 ... 'monkey', 'parrot', 'shark', 'whale', 'zebra']}) 

5927 >>> df 

5928 animal 

5929 0 alligator 

5930 1 bee 

5931 2 falcon 

5932 3 lion 

5933 4 monkey 

5934 5 parrot 

5935 6 shark 

5936 7 whale 

5937 8 zebra 

5938 

5939 Viewing the last 5 lines 

5940 

5941 >>> df.tail() 

5942 animal 

5943 4 monkey 

5944 5 parrot 

5945 6 shark 

5946 7 whale 

5947 8 zebra 

5948 

5949 Viewing the last `n` lines (three in this case) 

5950 

5951 >>> df.tail(3) 

5952 animal 

5953 6 shark 

5954 7 whale 

5955 8 zebra 

5956 

5957 For negative values of `n` 

5958 

5959 >>> df.tail(-3) 

5960 animal 

5961 3 lion 

5962 4 monkey 

5963 5 parrot 

5964 6 shark 

5965 7 whale 

5966 8 zebra 

5967 """ 

5968 if using_copy_on_write(): 

5969 if n == 0: 

5970 return self.iloc[0:0].copy() 

5971 return self.iloc[-n:].copy() 

5972 if n == 0: 

5973 return self.iloc[0:0] 

5974 return self.iloc[-n:] 

5975 

5976 @final 

5977 def sample( 

5978 self, 

5979 n: int | None = None, 

5980 frac: float | None = None, 

5981 replace: bool_t = False, 

5982 weights=None, 

5983 random_state: RandomState | None = None, 

5984 axis: Axis | None = None, 

5985 ignore_index: bool_t = False, 

5986 ) -> Self: 

5987 """ 

5988 Return a random sample of items from an axis of object. 

5989 

5990 You can use `random_state` for reproducibility. 

5991 

5992 Parameters 

5993 ---------- 

5994 n : int, optional 

5995 Number of items from axis to return. Cannot be used with `frac`. 

5996 Default = 1 if `frac` = None. 

5997 frac : float, optional 

5998 Fraction of axis items to return. Cannot be used with `n`. 

5999 replace : bool, default False 

6000 Allow or disallow sampling of the same row more than once. 

6001 weights : str or ndarray-like, optional 

6002 Default 'None' results in equal probability weighting. 

6003 If passed a Series, will align with target object on index. Index 

6004 values in weights not found in sampled object will be ignored and 

6005 index values in sampled object not in weights will be assigned 

6006 weights of zero. 

6007 If called on a DataFrame, will accept the name of a column 

6008 when axis = 0. 

6009 Unless weights are a Series, weights must be same length as axis 

6010 being sampled. 

6011 If weights do not sum to 1, they will be normalized to sum to 1. 

6012 Missing values in the weights column will be treated as zero. 

6013 Infinite values not allowed. 

6014 random_state : int, array-like, BitGenerator, np.random.RandomState, np.random.Generator, optional 

6015 If int, array-like, or BitGenerator, seed for random number generator. 

6016 If np.random.RandomState or np.random.Generator, use as given. 

6017 

6018 .. versionchanged:: 1.4.0 

6019 

6020 np.random.Generator objects now accepted 

6021 

6022 axis : {0 or 'index', 1 or 'columns', None}, default None 

6023 Axis to sample. Accepts axis number or name. Default is stat axis 

6024 for given data type. For `Series` this parameter is unused and defaults to `None`. 

6025 ignore_index : bool, default False 

6026 If True, the resulting index will be labeled 0, 1, …, n - 1. 

6027 

6028 .. versionadded:: 1.3.0 

6029 

6030 Returns 

6031 ------- 

6032 Series or DataFrame 

6033 A new object of same type as caller containing `n` items randomly 

6034 sampled from the caller object. 

6035 

6036 See Also 

6037 -------- 

6038 DataFrameGroupBy.sample: Generates random samples from each group of a 

6039 DataFrame object. 

6040 SeriesGroupBy.sample: Generates random samples from each group of a 

6041 Series object. 

6042 numpy.random.choice: Generates a random sample from a given 1-D numpy 

6043 array. 

6044 

6045 Notes 

6046 ----- 

6047 If `frac` > 1, `replacement` should be set to `True`. 

6048 

6049 Examples 

6050 -------- 

6051 >>> df = pd.DataFrame({'num_legs': [2, 4, 8, 0], 

6052 ... 'num_wings': [2, 0, 0, 0], 

6053 ... 'num_specimen_seen': [10, 2, 1, 8]}, 

6054 ... index=['falcon', 'dog', 'spider', 'fish']) 

6055 >>> df 

6056 num_legs num_wings num_specimen_seen 

6057 falcon 2 2 10 

6058 dog 4 0 2 

6059 spider 8 0 1 

6060 fish 0 0 8 

6061 

6062 Extract 3 random elements from the ``Series`` ``df['num_legs']``: 

6063 Note that we use `random_state` to ensure the reproducibility of 

6064 the examples. 

6065 

6066 >>> df['num_legs'].sample(n=3, random_state=1) 

6067 fish 0 

6068 spider 8 

6069 falcon 2 

6070 Name: num_legs, dtype: int64 

6071 

6072 A random 50% sample of the ``DataFrame`` with replacement: 

6073 

6074 >>> df.sample(frac=0.5, replace=True, random_state=1) 

6075 num_legs num_wings num_specimen_seen 

6076 dog 4 0 2 

6077 fish 0 0 8 

6078 

6079 An upsample sample of the ``DataFrame`` with replacement: 

6080 Note that `replace` parameter has to be `True` for `frac` parameter > 1. 

6081 

6082 >>> df.sample(frac=2, replace=True, random_state=1) 

6083 num_legs num_wings num_specimen_seen 

6084 dog 4 0 2 

6085 fish 0 0 8 

6086 falcon 2 2 10 

6087 falcon 2 2 10 

6088 fish 0 0 8 

6089 dog 4 0 2 

6090 fish 0 0 8 

6091 dog 4 0 2 

6092 

6093 Using a DataFrame column as weights. Rows with larger value in the 

6094 `num_specimen_seen` column are more likely to be sampled. 

6095 

6096 >>> df.sample(n=2, weights='num_specimen_seen', random_state=1) 

6097 num_legs num_wings num_specimen_seen 

6098 falcon 2 2 10 

6099 fish 0 0 8 

6100 """ # noqa: E501 

6101 if axis is None: 

6102 axis = 0 

6103 

6104 axis = self._get_axis_number(axis) 

6105 obj_len = self.shape[axis] 

6106 

6107 # Process random_state argument 

6108 rs = common.random_state(random_state) 

6109 

6110 size = sample.process_sampling_size(n, frac, replace) 

6111 if size is None: 

6112 assert frac is not None 

6113 size = round(frac * obj_len) 

6114 

6115 if weights is not None: 

6116 weights = sample.preprocess_weights(self, weights, axis) 

6117 

6118 sampled_indices = sample.sample(obj_len, size, replace, weights, rs) 

6119 result = self.take(sampled_indices, axis=axis) 

6120 

6121 if ignore_index: 

6122 result.index = default_index(len(result)) 

6123 

6124 return result 

6125 

6126 @final 

6127 @doc(klass=_shared_doc_kwargs["klass"]) 

6128 def pipe( 

6129 self, 

6130 func: Callable[..., T] | tuple[Callable[..., T], str], 

6131 *args, 

6132 **kwargs, 

6133 ) -> T: 

6134 r""" 

6135 Apply chainable functions that expect Series or DataFrames. 

6136 

6137 Parameters 

6138 ---------- 

6139 func : function 

6140 Function to apply to the {klass}. 

6141 ``args``, and ``kwargs`` are passed into ``func``. 

6142 Alternatively a ``(callable, data_keyword)`` tuple where 

6143 ``data_keyword`` is a string indicating the keyword of 

6144 ``callable`` that expects the {klass}. 

6145 *args : iterable, optional 

6146 Positional arguments passed into ``func``. 

6147 **kwargs : mapping, optional 

6148 A dictionary of keyword arguments passed into ``func``. 

6149 

6150 Returns 

6151 ------- 

6152 the return type of ``func``. 

6153 

6154 See Also 

6155 -------- 

6156 DataFrame.apply : Apply a function along input axis of DataFrame. 

6157 DataFrame.map : Apply a function elementwise on a whole DataFrame. 

6158 Series.map : Apply a mapping correspondence on a 

6159 :class:`~pandas.Series`. 

6160 

6161 Notes 

6162 ----- 

6163 Use ``.pipe`` when chaining together functions that expect 

6164 Series, DataFrames or GroupBy objects. 

6165 

6166 Examples 

6167 -------- 

6168 Constructing a income DataFrame from a dictionary. 

6169 

6170 >>> data = [[8000, 1000], [9500, np.nan], [5000, 2000]] 

6171 >>> df = pd.DataFrame(data, columns=['Salary', 'Others']) 

6172 >>> df 

6173 Salary Others 

6174 0 8000 1000.0 

6175 1 9500 NaN 

6176 2 5000 2000.0 

6177 

6178 Functions that perform tax reductions on an income DataFrame. 

6179 

6180 >>> def subtract_federal_tax(df): 

6181 ... return df * 0.9 

6182 >>> def subtract_state_tax(df, rate): 

6183 ... return df * (1 - rate) 

6184 >>> def subtract_national_insurance(df, rate, rate_increase): 

6185 ... new_rate = rate + rate_increase 

6186 ... return df * (1 - new_rate) 

6187 

6188 Instead of writing 

6189 

6190 >>> subtract_national_insurance( 

6191 ... subtract_state_tax(subtract_federal_tax(df), rate=0.12), 

6192 ... rate=0.05, 

6193 ... rate_increase=0.02) # doctest: +SKIP 

6194 

6195 You can write 

6196 

6197 >>> ( 

6198 ... df.pipe(subtract_federal_tax) 

6199 ... .pipe(subtract_state_tax, rate=0.12) 

6200 ... .pipe(subtract_national_insurance, rate=0.05, rate_increase=0.02) 

6201 ... ) 

6202 Salary Others 

6203 0 5892.48 736.56 

6204 1 6997.32 NaN 

6205 2 3682.80 1473.12 

6206 

6207 If you have a function that takes the data as (say) the second 

6208 argument, pass a tuple indicating which keyword expects the 

6209 data. For example, suppose ``national_insurance`` takes its data as ``df`` 

6210 in the second argument: 

6211 

6212 >>> def subtract_national_insurance(rate, df, rate_increase): 

6213 ... new_rate = rate + rate_increase 

6214 ... return df * (1 - new_rate) 

6215 >>> ( 

6216 ... df.pipe(subtract_federal_tax) 

6217 ... .pipe(subtract_state_tax, rate=0.12) 

6218 ... .pipe( 

6219 ... (subtract_national_insurance, 'df'), 

6220 ... rate=0.05, 

6221 ... rate_increase=0.02 

6222 ... ) 

6223 ... ) 

6224 Salary Others 

6225 0 5892.48 736.56 

6226 1 6997.32 NaN 

6227 2 3682.80 1473.12 

6228 """ 

6229 if using_copy_on_write(): 

6230 return common.pipe(self.copy(deep=None), func, *args, **kwargs) 

6231 return common.pipe(self, func, *args, **kwargs) 

6232 

6233 # ---------------------------------------------------------------------- 

6234 # Attribute access 

6235 

6236 @final 

6237 def __finalize__(self, other, method: str | None = None, **kwargs) -> Self: 

6238 """ 

6239 Propagate metadata from other to self. 

6240 

6241 Parameters 

6242 ---------- 

6243 other : the object from which to get the attributes that we are going 

6244 to propagate 

6245 method : str, optional 

6246 A passed method name providing context on where ``__finalize__`` 

6247 was called. 

6248 

6249 .. warning:: 

6250 

6251 The value passed as `method` are not currently considered 

6252 stable across pandas releases. 

6253 """ 

6254 if isinstance(other, NDFrame): 

6255 if other.attrs: 

6256 # We want attrs propagation to have minimal performance 

6257 # impact if attrs are not used; i.e. attrs is an empty dict. 

6258 # One could make the deepcopy unconditionally, but a deepcopy 

6259 # of an empty dict is 50x more expensive than the empty check. 

6260 self.attrs = deepcopy(other.attrs) 

6261 

6262 self.flags.allows_duplicate_labels = other.flags.allows_duplicate_labels 

6263 # For subclasses using _metadata. 

6264 for name in set(self._metadata) & set(other._metadata): 

6265 assert isinstance(name, str) 

6266 object.__setattr__(self, name, getattr(other, name, None)) 

6267 

6268 if method == "concat": 

6269 # propagate attrs only if all concat arguments have the same attrs 

6270 if all(bool(obj.attrs) for obj in other.objs): 

6271 # all concatenate arguments have non-empty attrs 

6272 attrs = other.objs[0].attrs 

6273 have_same_attrs = all(obj.attrs == attrs for obj in other.objs[1:]) 

6274 if have_same_attrs: 

6275 self.attrs = deepcopy(attrs) 

6276 

6277 allows_duplicate_labels = all( 

6278 x.flags.allows_duplicate_labels for x in other.objs 

6279 ) 

6280 self.flags.allows_duplicate_labels = allows_duplicate_labels 

6281 

6282 return self 

6283 

6284 @final 

6285 def __getattr__(self, name: str): 

6286 """ 

6287 After regular attribute access, try looking up the name 

6288 This allows simpler access to columns for interactive use. 

6289 """ 

6290 # Note: obj.x will always call obj.__getattribute__('x') prior to 

6291 # calling obj.__getattr__('x'). 

6292 if ( 

6293 name not in self._internal_names_set 

6294 and name not in self._metadata 

6295 and name not in self._accessors 

6296 and self._info_axis._can_hold_identifiers_and_holds_name(name) 

6297 ): 

6298 return self[name] 

6299 return object.__getattribute__(self, name) 

6300 

6301 @final 

6302 def __setattr__(self, name: str, value) -> None: 

6303 """ 

6304 After regular attribute access, try setting the name 

6305 This allows simpler access to columns for interactive use. 

6306 """ 

6307 # first try regular attribute access via __getattribute__, so that 

6308 # e.g. ``obj.x`` and ``obj.x = 4`` will always reference/modify 

6309 # the same attribute. 

6310 

6311 try: 

6312 object.__getattribute__(self, name) 

6313 return object.__setattr__(self, name, value) 

6314 except AttributeError: 

6315 pass 

6316 

6317 # if this fails, go on to more involved attribute setting 

6318 # (note that this matches __getattr__, above). 

6319 if name in self._internal_names_set: 

6320 object.__setattr__(self, name, value) 

6321 elif name in self._metadata: 

6322 object.__setattr__(self, name, value) 

6323 else: 

6324 try: 

6325 existing = getattr(self, name) 

6326 if isinstance(existing, Index): 

6327 object.__setattr__(self, name, value) 

6328 elif name in self._info_axis: 

6329 self[name] = value 

6330 else: 

6331 object.__setattr__(self, name, value) 

6332 except (AttributeError, TypeError): 

6333 if isinstance(self, ABCDataFrame) and (is_list_like(value)): 

6334 warnings.warn( 

6335 "Pandas doesn't allow columns to be " 

6336 "created via a new attribute name - see " 

6337 "https://pandas.pydata.org/pandas-docs/" 

6338 "stable/indexing.html#attribute-access", 

6339 stacklevel=find_stack_level(), 

6340 ) 

6341 object.__setattr__(self, name, value) 

6342 

6343 @final 

6344 def _dir_additions(self) -> set[str]: 

6345 """ 

6346 add the string-like attributes from the info_axis. 

6347 If info_axis is a MultiIndex, its first level values are used. 

6348 """ 

6349 additions = super()._dir_additions() 

6350 if self._info_axis._can_hold_strings: 

6351 additions.update(self._info_axis._dir_additions_for_owner) 

6352 return additions 

6353 

6354 # ---------------------------------------------------------------------- 

6355 # Consolidation of internals 

6356 

6357 @final 

6358 def _protect_consolidate(self, f): 

6359 """ 

6360 Consolidate _mgr -- if the blocks have changed, then clear the 

6361 cache 

6362 """ 

6363 if isinstance(self._mgr, (ArrayManager, SingleArrayManager)): 

6364 return f() 

6365 blocks_before = len(self._mgr.blocks) 

6366 result = f() 

6367 if len(self._mgr.blocks) != blocks_before: 

6368 self._clear_item_cache() 

6369 return result 

6370 

6371 @final 

6372 def _consolidate_inplace(self) -> None: 

6373 """Consolidate data in place and return None""" 

6374 

6375 def f() -> None: 

6376 self._mgr = self._mgr.consolidate() 

6377 

6378 self._protect_consolidate(f) 

6379 

6380 @final 

6381 def _consolidate(self): 

6382 """ 

6383 Compute NDFrame with "consolidated" internals (data of each dtype 

6384 grouped together in a single ndarray). 

6385 

6386 Returns 

6387 ------- 

6388 consolidated : same type as caller 

6389 """ 

6390 f = lambda: self._mgr.consolidate() 

6391 cons_data = self._protect_consolidate(f) 

6392 return self._constructor_from_mgr(cons_data, axes=cons_data.axes).__finalize__( 

6393 self 

6394 ) 

6395 

6396 @final 

6397 @property 

6398 def _is_mixed_type(self) -> bool_t: 

6399 if self._mgr.is_single_block: 

6400 # Includes all Series cases 

6401 return False 

6402 

6403 if self._mgr.any_extension_types: 

6404 # Even if they have the same dtype, we can't consolidate them, 

6405 # so we pretend this is "mixed'" 

6406 return True 

6407 

6408 return self.dtypes.nunique() > 1 

6409 

6410 @final 

6411 def _get_numeric_data(self) -> Self: 

6412 new_mgr = self._mgr.get_numeric_data() 

6413 return self._constructor_from_mgr(new_mgr, axes=new_mgr.axes).__finalize__(self) 

6414 

6415 @final 

6416 def _get_bool_data(self): 

6417 new_mgr = self._mgr.get_bool_data() 

6418 return self._constructor_from_mgr(new_mgr, axes=new_mgr.axes).__finalize__(self) 

6419 

6420 # ---------------------------------------------------------------------- 

6421 # Internal Interface Methods 

6422 

6423 @property 

6424 def values(self): 

6425 raise AbstractMethodError(self) 

6426 

6427 @property 

6428 def _values(self) -> ArrayLike: 

6429 """internal implementation""" 

6430 raise AbstractMethodError(self) 

6431 

6432 @property 

6433 def dtypes(self): 

6434 """ 

6435 Return the dtypes in the DataFrame. 

6436 

6437 This returns a Series with the data type of each column. 

6438 The result's index is the original DataFrame's columns. Columns 

6439 with mixed types are stored with the ``object`` dtype. See 

6440 :ref:`the User Guide <basics.dtypes>` for more. 

6441 

6442 Returns 

6443 ------- 

6444 pandas.Series 

6445 The data type of each column. 

6446 

6447 Examples 

6448 -------- 

6449 >>> df = pd.DataFrame({'float': [1.0], 

6450 ... 'int': [1], 

6451 ... 'datetime': [pd.Timestamp('20180310')], 

6452 ... 'string': ['foo']}) 

6453 >>> df.dtypes 

6454 float float64 

6455 int int64 

6456 datetime datetime64[ns] 

6457 string object 

6458 dtype: object 

6459 """ 

6460 data = self._mgr.get_dtypes() 

6461 return self._constructor_sliced(data, index=self._info_axis, dtype=np.object_) 

6462 

6463 @final 

6464 def astype( 

6465 self, dtype, copy: bool_t | None = None, errors: IgnoreRaise = "raise" 

6466 ) -> Self: 

6467 """ 

6468 Cast a pandas object to a specified dtype ``dtype``. 

6469 

6470 Parameters 

6471 ---------- 

6472 dtype : str, data type, Series or Mapping of column name -> data type 

6473 Use a str, numpy.dtype, pandas.ExtensionDtype or Python type to 

6474 cast entire pandas object to the same type. Alternatively, use a 

6475 mapping, e.g. {col: dtype, ...}, where col is a column label and dtype is 

6476 a numpy.dtype or Python type to cast one or more of the DataFrame's 

6477 columns to column-specific types. 

6478 copy : bool, default True 

6479 Return a copy when ``copy=True`` (be very careful setting 

6480 ``copy=False`` as changes to values then may propagate to other 

6481 pandas objects). 

6482 

6483 .. note:: 

6484 The `copy` keyword will change behavior in pandas 3.0. 

6485 `Copy-on-Write 

6486 <https://pandas.pydata.org/docs/dev/user_guide/copy_on_write.html>`__ 

6487 will be enabled by default, which means that all methods with a 

6488 `copy` keyword will use a lazy copy mechanism to defer the copy and 

6489 ignore the `copy` keyword. The `copy` keyword will be removed in a 

6490 future version of pandas. 

6491 

6492 You can already get the future behavior and improvements through 

6493 enabling copy on write ``pd.options.mode.copy_on_write = True`` 

6494 errors : {'raise', 'ignore'}, default 'raise' 

6495 Control raising of exceptions on invalid data for provided dtype. 

6496 

6497 - ``raise`` : allow exceptions to be raised 

6498 - ``ignore`` : suppress exceptions. On error return original object. 

6499 

6500 Returns 

6501 ------- 

6502 same type as caller 

6503 

6504 See Also 

6505 -------- 

6506 to_datetime : Convert argument to datetime. 

6507 to_timedelta : Convert argument to timedelta. 

6508 to_numeric : Convert argument to a numeric type. 

6509 numpy.ndarray.astype : Cast a numpy array to a specified type. 

6510 

6511 Notes 

6512 ----- 

6513 .. versionchanged:: 2.0.0 

6514 

6515 Using ``astype`` to convert from timezone-naive dtype to 

6516 timezone-aware dtype will raise an exception. 

6517 Use :meth:`Series.dt.tz_localize` instead. 

6518 

6519 Examples 

6520 -------- 

6521 Create a DataFrame: 

6522 

6523 >>> d = {'col1': [1, 2], 'col2': [3, 4]} 

6524 >>> df = pd.DataFrame(data=d) 

6525 >>> df.dtypes 

6526 col1 int64 

6527 col2 int64 

6528 dtype: object 

6529 

6530 Cast all columns to int32: 

6531 

6532 >>> df.astype('int32').dtypes 

6533 col1 int32 

6534 col2 int32 

6535 dtype: object 

6536 

6537 Cast col1 to int32 using a dictionary: 

6538 

6539 >>> df.astype({'col1': 'int32'}).dtypes 

6540 col1 int32 

6541 col2 int64 

6542 dtype: object 

6543 

6544 Create a series: 

6545 

6546 >>> ser = pd.Series([1, 2], dtype='int32') 

6547 >>> ser 

6548 0 1 

6549 1 2 

6550 dtype: int32 

6551 >>> ser.astype('int64') 

6552 0 1 

6553 1 2 

6554 dtype: int64 

6555 

6556 Convert to categorical type: 

6557 

6558 >>> ser.astype('category') 

6559 0 1 

6560 1 2 

6561 dtype: category 

6562 Categories (2, int32): [1, 2] 

6563 

6564 Convert to ordered categorical type with custom ordering: 

6565 

6566 >>> from pandas.api.types import CategoricalDtype 

6567 >>> cat_dtype = CategoricalDtype( 

6568 ... categories=[2, 1], ordered=True) 

6569 >>> ser.astype(cat_dtype) 

6570 0 1 

6571 1 2 

6572 dtype: category 

6573 Categories (2, int64): [2 < 1] 

6574 

6575 Create a series of dates: 

6576 

6577 >>> ser_date = pd.Series(pd.date_range('20200101', periods=3)) 

6578 >>> ser_date 

6579 0 2020-01-01 

6580 1 2020-01-02 

6581 2 2020-01-03 

6582 dtype: datetime64[ns] 

6583 """ 

6584 if copy and using_copy_on_write(): 

6585 copy = False 

6586 

6587 if is_dict_like(dtype): 

6588 if self.ndim == 1: # i.e. Series 

6589 if len(dtype) > 1 or self.name not in dtype: 

6590 raise KeyError( 

6591 "Only the Series name can be used for " 

6592 "the key in Series dtype mappings." 

6593 ) 

6594 new_type = dtype[self.name] 

6595 return self.astype(new_type, copy, errors) 

6596 

6597 # GH#44417 cast to Series so we can use .iat below, which will be 

6598 # robust in case we 

6599 from pandas import Series 

6600 

6601 dtype_ser = Series(dtype, dtype=object) 

6602 

6603 for col_name in dtype_ser.index: 

6604 if col_name not in self: 

6605 raise KeyError( 

6606 "Only a column name can be used for the " 

6607 "key in a dtype mappings argument. " 

6608 f"'{col_name}' not found in columns." 

6609 ) 

6610 

6611 dtype_ser = dtype_ser.reindex(self.columns, fill_value=None, copy=False) 

6612 

6613 results = [] 

6614 for i, (col_name, col) in enumerate(self.items()): 

6615 cdt = dtype_ser.iat[i] 

6616 if isna(cdt): 

6617 res_col = col.copy(deep=copy) 

6618 else: 

6619 try: 

6620 res_col = col.astype(dtype=cdt, copy=copy, errors=errors) 

6621 except ValueError as ex: 

6622 ex.args = ( 

6623 f"{ex}: Error while type casting for column '{col_name}'", 

6624 ) 

6625 raise 

6626 results.append(res_col) 

6627 

6628 elif is_extension_array_dtype(dtype) and self.ndim > 1: 

6629 # TODO(EA2D): special case not needed with 2D EAs 

6630 dtype = pandas_dtype(dtype) 

6631 if isinstance(dtype, ExtensionDtype) and all( 

6632 arr.dtype == dtype for arr in self._mgr.arrays 

6633 ): 

6634 return self.copy(deep=copy) 

6635 # GH 18099/22869: columnwise conversion to extension dtype 

6636 # GH 24704: self.items handles duplicate column names 

6637 results = [ 

6638 ser.astype(dtype, copy=copy, errors=errors) for _, ser in self.items() 

6639 ] 

6640 

6641 else: 

6642 # else, only a single dtype is given 

6643 new_data = self._mgr.astype(dtype=dtype, copy=copy, errors=errors) 

6644 res = self._constructor_from_mgr(new_data, axes=new_data.axes) 

6645 return res.__finalize__(self, method="astype") 

6646 

6647 # GH 33113: handle empty frame or series 

6648 if not results: 

6649 return self.copy(deep=None) 

6650 

6651 # GH 19920: retain column metadata after concat 

6652 result = concat(results, axis=1, copy=False) 

6653 # GH#40810 retain subclass 

6654 # error: Incompatible types in assignment 

6655 # (expression has type "Self", variable has type "DataFrame") 

6656 result = self._constructor(result) # type: ignore[assignment] 

6657 result.columns = self.columns 

6658 result = result.__finalize__(self, method="astype") 

6659 # https://github.com/python/mypy/issues/8354 

6660 return cast(Self, result) 

6661 

6662 @final 

6663 def copy(self, deep: bool_t | None = True) -> Self: 

6664 """ 

6665 Make a copy of this object's indices and data. 

6666 

6667 When ``deep=True`` (default), a new object will be created with a 

6668 copy of the calling object's data and indices. Modifications to 

6669 the data or indices of the copy will not be reflected in the 

6670 original object (see notes below). 

6671 

6672 When ``deep=False``, a new object will be created without copying 

6673 the calling object's data or index (only references to the data 

6674 and index are copied). Any changes to the data of the original 

6675 will be reflected in the shallow copy (and vice versa). 

6676 

6677 .. note:: 

6678 The ``deep=False`` behaviour as described above will change 

6679 in pandas 3.0. `Copy-on-Write 

6680 <https://pandas.pydata.org/docs/dev/user_guide/copy_on_write.html>`__ 

6681 will be enabled by default, which means that the "shallow" copy 

6682 is that is returned with ``deep=False`` will still avoid making 

6683 an eager copy, but changes to the data of the original will *no* 

6684 longer be reflected in the shallow copy (or vice versa). Instead, 

6685 it makes use of a lazy (deferred) copy mechanism that will copy 

6686 the data only when any changes to the original or shallow copy is 

6687 made. 

6688 

6689 You can already get the future behavior and improvements through 

6690 enabling copy on write ``pd.options.mode.copy_on_write = True`` 

6691 

6692 Parameters 

6693 ---------- 

6694 deep : bool, default True 

6695 Make a deep copy, including a copy of the data and the indices. 

6696 With ``deep=False`` neither the indices nor the data are copied. 

6697 

6698 Returns 

6699 ------- 

6700 Series or DataFrame 

6701 Object type matches caller. 

6702 

6703 Notes 

6704 ----- 

6705 When ``deep=True``, data is copied but actual Python objects 

6706 will not be copied recursively, only the reference to the object. 

6707 This is in contrast to `copy.deepcopy` in the Standard Library, 

6708 which recursively copies object data (see examples below). 

6709 

6710 While ``Index`` objects are copied when ``deep=True``, the underlying 

6711 numpy array is not copied for performance reasons. Since ``Index`` is 

6712 immutable, the underlying data can be safely shared and a copy 

6713 is not needed. 

6714 

6715 Since pandas is not thread safe, see the 

6716 :ref:`gotchas <gotchas.thread-safety>` when copying in a threading 

6717 environment. 

6718 

6719 When ``copy_on_write`` in pandas config is set to ``True``, the 

6720 ``copy_on_write`` config takes effect even when ``deep=False``. 

6721 This means that any changes to the copied data would make a new copy 

6722 of the data upon write (and vice versa). Changes made to either the 

6723 original or copied variable would not be reflected in the counterpart. 

6724 See :ref:`Copy_on_Write <copy_on_write>` for more information. 

6725 

6726 Examples 

6727 -------- 

6728 >>> s = pd.Series([1, 2], index=["a", "b"]) 

6729 >>> s 

6730 a 1 

6731 b 2 

6732 dtype: int64 

6733 

6734 >>> s_copy = s.copy() 

6735 >>> s_copy 

6736 a 1 

6737 b 2 

6738 dtype: int64 

6739 

6740 **Shallow copy versus default (deep) copy:** 

6741 

6742 >>> s = pd.Series([1, 2], index=["a", "b"]) 

6743 >>> deep = s.copy() 

6744 >>> shallow = s.copy(deep=False) 

6745 

6746 Shallow copy shares data and index with original. 

6747 

6748 >>> s is shallow 

6749 False 

6750 >>> s.values is shallow.values and s.index is shallow.index 

6751 True 

6752 

6753 Deep copy has own copy of data and index. 

6754 

6755 >>> s is deep 

6756 False 

6757 >>> s.values is deep.values or s.index is deep.index 

6758 False 

6759 

6760 Updates to the data shared by shallow copy and original is reflected 

6761 in both (NOTE: this will no longer be true for pandas >= 3.0); 

6762 deep copy remains unchanged. 

6763 

6764 >>> s.iloc[0] = 3 

6765 >>> shallow.iloc[1] = 4 

6766 >>> s 

6767 a 3 

6768 b 4 

6769 dtype: int64 

6770 >>> shallow 

6771 a 3 

6772 b 4 

6773 dtype: int64 

6774 >>> deep 

6775 a 1 

6776 b 2 

6777 dtype: int64 

6778 

6779 Note that when copying an object containing Python objects, a deep copy 

6780 will copy the data, but will not do so recursively. Updating a nested 

6781 data object will be reflected in the deep copy. 

6782 

6783 >>> s = pd.Series([[1, 2], [3, 4]]) 

6784 >>> deep = s.copy() 

6785 >>> s[0][0] = 10 

6786 >>> s 

6787 0 [10, 2] 

6788 1 [3, 4] 

6789 dtype: object 

6790 >>> deep 

6791 0 [10, 2] 

6792 1 [3, 4] 

6793 dtype: object 

6794 

6795 **Copy-on-Write is set to true**, the shallow copy is not modified 

6796 when the original data is changed: 

6797 

6798 >>> with pd.option_context("mode.copy_on_write", True): 

6799 ... s = pd.Series([1, 2], index=["a", "b"]) 

6800 ... copy = s.copy(deep=False) 

6801 ... s.iloc[0] = 100 

6802 ... s 

6803 a 100 

6804 b 2 

6805 dtype: int64 

6806 >>> copy 

6807 a 1 

6808 b 2 

6809 dtype: int64 

6810 """ 

6811 data = self._mgr.copy(deep=deep) 

6812 self._clear_item_cache() 

6813 return self._constructor_from_mgr(data, axes=data.axes).__finalize__( 

6814 self, method="copy" 

6815 ) 

6816 

6817 @final 

6818 def __copy__(self, deep: bool_t = True) -> Self: 

6819 return self.copy(deep=deep) 

6820 

6821 @final 

6822 def __deepcopy__(self, memo=None) -> Self: 

6823 """ 

6824 Parameters 

6825 ---------- 

6826 memo, default None 

6827 Standard signature. Unused 

6828 """ 

6829 return self.copy(deep=True) 

6830 

6831 @final 

6832 def infer_objects(self, copy: bool_t | None = None) -> Self: 

6833 """ 

6834 Attempt to infer better dtypes for object columns. 

6835 

6836 Attempts soft conversion of object-dtyped 

6837 columns, leaving non-object and unconvertible 

6838 columns unchanged. The inference rules are the 

6839 same as during normal Series/DataFrame construction. 

6840 

6841 Parameters 

6842 ---------- 

6843 copy : bool, default True 

6844 Whether to make a copy for non-object or non-inferable columns 

6845 or Series. 

6846 

6847 .. note:: 

6848 The `copy` keyword will change behavior in pandas 3.0. 

6849 `Copy-on-Write 

6850 <https://pandas.pydata.org/docs/dev/user_guide/copy_on_write.html>`__ 

6851 will be enabled by default, which means that all methods with a 

6852 `copy` keyword will use a lazy copy mechanism to defer the copy and 

6853 ignore the `copy` keyword. The `copy` keyword will be removed in a 

6854 future version of pandas. 

6855 

6856 You can already get the future behavior and improvements through 

6857 enabling copy on write ``pd.options.mode.copy_on_write = True`` 

6858 

6859 Returns 

6860 ------- 

6861 same type as input object 

6862 

6863 See Also 

6864 -------- 

6865 to_datetime : Convert argument to datetime. 

6866 to_timedelta : Convert argument to timedelta. 

6867 to_numeric : Convert argument to numeric type. 

6868 convert_dtypes : Convert argument to best possible dtype. 

6869 

6870 Examples 

6871 -------- 

6872 >>> df = pd.DataFrame({"A": ["a", 1, 2, 3]}) 

6873 >>> df = df.iloc[1:] 

6874 >>> df 

6875 A 

6876 1 1 

6877 2 2 

6878 3 3 

6879 

6880 >>> df.dtypes 

6881 A object 

6882 dtype: object 

6883 

6884 >>> df.infer_objects().dtypes 

6885 A int64 

6886 dtype: object 

6887 """ 

6888 new_mgr = self._mgr.convert(copy=copy) 

6889 res = self._constructor_from_mgr(new_mgr, axes=new_mgr.axes) 

6890 return res.__finalize__(self, method="infer_objects") 

6891 

6892 @final 

6893 def convert_dtypes( 

6894 self, 

6895 infer_objects: bool_t = True, 

6896 convert_string: bool_t = True, 

6897 convert_integer: bool_t = True, 

6898 convert_boolean: bool_t = True, 

6899 convert_floating: bool_t = True, 

6900 dtype_backend: DtypeBackend = "numpy_nullable", 

6901 ) -> Self: 

6902 """ 

6903 Convert columns to the best possible dtypes using dtypes supporting ``pd.NA``. 

6904 

6905 Parameters 

6906 ---------- 

6907 infer_objects : bool, default True 

6908 Whether object dtypes should be converted to the best possible types. 

6909 convert_string : bool, default True 

6910 Whether object dtypes should be converted to ``StringDtype()``. 

6911 convert_integer : bool, default True 

6912 Whether, if possible, conversion can be done to integer extension types. 

6913 convert_boolean : bool, defaults True 

6914 Whether object dtypes should be converted to ``BooleanDtypes()``. 

6915 convert_floating : bool, defaults True 

6916 Whether, if possible, conversion can be done to floating extension types. 

6917 If `convert_integer` is also True, preference will be give to integer 

6918 dtypes if the floats can be faithfully casted to integers. 

6919 dtype_backend : {'numpy_nullable', 'pyarrow'}, default 'numpy_nullable' 

6920 Back-end data type applied to the resultant :class:`DataFrame` 

6921 (still experimental). Behaviour is as follows: 

6922 

6923 * ``"numpy_nullable"``: returns nullable-dtype-backed :class:`DataFrame` 

6924 (default). 

6925 * ``"pyarrow"``: returns pyarrow-backed nullable :class:`ArrowDtype` 

6926 DataFrame. 

6927 

6928 .. versionadded:: 2.0 

6929 

6930 Returns 

6931 ------- 

6932 Series or DataFrame 

6933 Copy of input object with new dtype. 

6934 

6935 See Also 

6936 -------- 

6937 infer_objects : Infer dtypes of objects. 

6938 to_datetime : Convert argument to datetime. 

6939 to_timedelta : Convert argument to timedelta. 

6940 to_numeric : Convert argument to a numeric type. 

6941 

6942 Notes 

6943 ----- 

6944 By default, ``convert_dtypes`` will attempt to convert a Series (or each 

6945 Series in a DataFrame) to dtypes that support ``pd.NA``. By using the options 

6946 ``convert_string``, ``convert_integer``, ``convert_boolean`` and 

6947 ``convert_floating``, it is possible to turn off individual conversions 

6948 to ``StringDtype``, the integer extension types, ``BooleanDtype`` 

6949 or floating extension types, respectively. 

6950 

6951 For object-dtyped columns, if ``infer_objects`` is ``True``, use the inference 

6952 rules as during normal Series/DataFrame construction. Then, if possible, 

6953 convert to ``StringDtype``, ``BooleanDtype`` or an appropriate integer 

6954 or floating extension type, otherwise leave as ``object``. 

6955 

6956 If the dtype is integer, convert to an appropriate integer extension type. 

6957 

6958 If the dtype is numeric, and consists of all integers, convert to an 

6959 appropriate integer extension type. Otherwise, convert to an 

6960 appropriate floating extension type. 

6961 

6962 In the future, as new dtypes are added that support ``pd.NA``, the results 

6963 of this method will change to support those new dtypes. 

6964 

6965 Examples 

6966 -------- 

6967 >>> df = pd.DataFrame( 

6968 ... { 

6969 ... "a": pd.Series([1, 2, 3], dtype=np.dtype("int32")), 

6970 ... "b": pd.Series(["x", "y", "z"], dtype=np.dtype("O")), 

6971 ... "c": pd.Series([True, False, np.nan], dtype=np.dtype("O")), 

6972 ... "d": pd.Series(["h", "i", np.nan], dtype=np.dtype("O")), 

6973 ... "e": pd.Series([10, np.nan, 20], dtype=np.dtype("float")), 

6974 ... "f": pd.Series([np.nan, 100.5, 200], dtype=np.dtype("float")), 

6975 ... } 

6976 ... ) 

6977 

6978 Start with a DataFrame with default dtypes. 

6979 

6980 >>> df 

6981 a b c d e f 

6982 0 1 x True h 10.0 NaN 

6983 1 2 y False i NaN 100.5 

6984 2 3 z NaN NaN 20.0 200.0 

6985 

6986 >>> df.dtypes 

6987 a int32 

6988 b object 

6989 c object 

6990 d object 

6991 e float64 

6992 f float64 

6993 dtype: object 

6994 

6995 Convert the DataFrame to use best possible dtypes. 

6996 

6997 >>> dfn = df.convert_dtypes() 

6998 >>> dfn 

6999 a b c d e f 

7000 0 1 x True h 10 <NA> 

7001 1 2 y False i <NA> 100.5 

7002 2 3 z <NA> <NA> 20 200.0 

7003 

7004 >>> dfn.dtypes 

7005 a Int32 

7006 b string[python] 

7007 c boolean 

7008 d string[python] 

7009 e Int64 

7010 f Float64 

7011 dtype: object 

7012 

7013 Start with a Series of strings and missing data represented by ``np.nan``. 

7014 

7015 >>> s = pd.Series(["a", "b", np.nan]) 

7016 >>> s 

7017 0 a 

7018 1 b 

7019 2 NaN 

7020 dtype: object 

7021 

7022 Obtain a Series with dtype ``StringDtype``. 

7023 

7024 >>> s.convert_dtypes() 

7025 0 a 

7026 1 b 

7027 2 <NA> 

7028 dtype: string 

7029 """ 

7030 check_dtype_backend(dtype_backend) 

7031 new_mgr = self._mgr.convert_dtypes( # type: ignore[union-attr] 

7032 infer_objects=infer_objects, 

7033 convert_string=convert_string, 

7034 convert_integer=convert_integer, 

7035 convert_boolean=convert_boolean, 

7036 convert_floating=convert_floating, 

7037 dtype_backend=dtype_backend, 

7038 ) 

7039 res = self._constructor_from_mgr(new_mgr, axes=new_mgr.axes) 

7040 return res.__finalize__(self, method="convert_dtypes") 

7041 

7042 # ---------------------------------------------------------------------- 

7043 # Filling NA's 

7044 

7045 def _deprecate_downcast(self, downcast, method_name: str): 

7046 # GH#40988 

7047 if downcast is not lib.no_default: 

7048 warnings.warn( 

7049 f"The 'downcast' keyword in {method_name} is deprecated and " 

7050 "will be removed in a future version. Use " 

7051 "res.infer_objects(copy=False) to infer non-object dtype, or " 

7052 "pd.to_numeric with the 'downcast' keyword to downcast numeric " 

7053 "results.", 

7054 FutureWarning, 

7055 stacklevel=find_stack_level(), 

7056 ) 

7057 else: 

7058 downcast = None 

7059 return downcast 

7060 

7061 @final 

7062 def _pad_or_backfill( 

7063 self, 

7064 method: Literal["ffill", "bfill", "pad", "backfill"], 

7065 *, 

7066 axis: None | Axis = None, 

7067 inplace: bool_t = False, 

7068 limit: None | int = None, 

7069 limit_area: Literal["inside", "outside"] | None = None, 

7070 downcast: dict | None = None, 

7071 ): 

7072 if axis is None: 

7073 axis = 0 

7074 axis = self._get_axis_number(axis) 

7075 method = clean_fill_method(method) 

7076 

7077 if not self._mgr.is_single_block and axis == 1: 

7078 # e.g. test_align_fill_method 

7079 # TODO(3.0): once downcast is removed, we can do the .T 

7080 # in all axis=1 cases, and remove axis kward from mgr.pad_or_backfill. 

7081 if inplace: 

7082 raise NotImplementedError() 

7083 result = self.T._pad_or_backfill( 

7084 method=method, limit=limit, limit_area=limit_area 

7085 ).T 

7086 

7087 return result 

7088 

7089 new_mgr = self._mgr.pad_or_backfill( 

7090 method=method, 

7091 axis=self._get_block_manager_axis(axis), 

7092 limit=limit, 

7093 limit_area=limit_area, 

7094 inplace=inplace, 

7095 downcast=downcast, 

7096 ) 

7097 result = self._constructor_from_mgr(new_mgr, axes=new_mgr.axes) 

7098 if inplace: 

7099 return self._update_inplace(result) 

7100 else: 

7101 return result.__finalize__(self, method="fillna") 

7102 

7103 @overload 

7104 def fillna( 

7105 self, 

7106 value: Hashable | Mapping | Series | DataFrame = ..., 

7107 *, 

7108 method: FillnaOptions | None = ..., 

7109 axis: Axis | None = ..., 

7110 inplace: Literal[False] = ..., 

7111 limit: int | None = ..., 

7112 downcast: dict | None = ..., 

7113 ) -> Self: 

7114 ... 

7115 

7116 @overload 

7117 def fillna( 

7118 self, 

7119 value: Hashable | Mapping | Series | DataFrame = ..., 

7120 *, 

7121 method: FillnaOptions | None = ..., 

7122 axis: Axis | None = ..., 

7123 inplace: Literal[True], 

7124 limit: int | None = ..., 

7125 downcast: dict | None = ..., 

7126 ) -> None: 

7127 ... 

7128 

7129 @overload 

7130 def fillna( 

7131 self, 

7132 value: Hashable | Mapping | Series | DataFrame = ..., 

7133 *, 

7134 method: FillnaOptions | None = ..., 

7135 axis: Axis | None = ..., 

7136 inplace: bool_t = ..., 

7137 limit: int | None = ..., 

7138 downcast: dict | None = ..., 

7139 ) -> Self | None: 

7140 ... 

7141 

7142 @final 

7143 @doc( 

7144 klass=_shared_doc_kwargs["klass"], 

7145 axes_single_arg=_shared_doc_kwargs["axes_single_arg"], 

7146 ) 

7147 def fillna( 

7148 self, 

7149 value: Hashable | Mapping | Series | DataFrame | None = None, 

7150 *, 

7151 method: FillnaOptions | None = None, 

7152 axis: Axis | None = None, 

7153 inplace: bool_t = False, 

7154 limit: int | None = None, 

7155 downcast: dict | None | lib.NoDefault = lib.no_default, 

7156 ) -> Self | None: 

7157 """ 

7158 Fill NA/NaN values using the specified method. 

7159 

7160 Parameters 

7161 ---------- 

7162 value : scalar, dict, Series, or DataFrame 

7163 Value to use to fill holes (e.g. 0), alternately a 

7164 dict/Series/DataFrame of values specifying which value to use for 

7165 each index (for a Series) or column (for a DataFrame). Values not 

7166 in the dict/Series/DataFrame will not be filled. This value cannot 

7167 be a list. 

7168 method : {{'backfill', 'bfill', 'ffill', None}}, default None 

7169 Method to use for filling holes in reindexed Series: 

7170 

7171 * ffill: propagate last valid observation forward to next valid. 

7172 * backfill / bfill: use next valid observation to fill gap. 

7173 

7174 .. deprecated:: 2.1.0 

7175 Use ffill or bfill instead. 

7176 

7177 axis : {axes_single_arg} 

7178 Axis along which to fill missing values. For `Series` 

7179 this parameter is unused and defaults to 0. 

7180 inplace : bool, default False 

7181 If True, fill in-place. Note: this will modify any 

7182 other views on this object (e.g., a no-copy slice for a column in a 

7183 DataFrame). 

7184 limit : int, default None 

7185 If method is specified, this is the maximum number of consecutive 

7186 NaN values to forward/backward fill. In other words, if there is 

7187 a gap with more than this number of consecutive NaNs, it will only 

7188 be partially filled. If method is not specified, this is the 

7189 maximum number of entries along the entire axis where NaNs will be 

7190 filled. Must be greater than 0 if not None. 

7191 downcast : dict, default is None 

7192 A dict of item->dtype of what to downcast if possible, 

7193 or the string 'infer' which will try to downcast to an appropriate 

7194 equal type (e.g. float64 to int64 if possible). 

7195 

7196 .. deprecated:: 2.2.0 

7197 

7198 Returns 

7199 ------- 

7200 {klass} or None 

7201 Object with missing values filled or None if ``inplace=True``. 

7202 

7203 See Also 

7204 -------- 

7205 ffill : Fill values by propagating the last valid observation to next valid. 

7206 bfill : Fill values by using the next valid observation to fill the gap. 

7207 interpolate : Fill NaN values using interpolation. 

7208 reindex : Conform object to new index. 

7209 asfreq : Convert TimeSeries to specified frequency. 

7210 

7211 Examples 

7212 -------- 

7213 >>> df = pd.DataFrame([[np.nan, 2, np.nan, 0], 

7214 ... [3, 4, np.nan, 1], 

7215 ... [np.nan, np.nan, np.nan, np.nan], 

7216 ... [np.nan, 3, np.nan, 4]], 

7217 ... columns=list("ABCD")) 

7218 >>> df 

7219 A B C D 

7220 0 NaN 2.0 NaN 0.0 

7221 1 3.0 4.0 NaN 1.0 

7222 2 NaN NaN NaN NaN 

7223 3 NaN 3.0 NaN 4.0 

7224 

7225 Replace all NaN elements with 0s. 

7226 

7227 >>> df.fillna(0) 

7228 A B C D 

7229 0 0.0 2.0 0.0 0.0 

7230 1 3.0 4.0 0.0 1.0 

7231 2 0.0 0.0 0.0 0.0 

7232 3 0.0 3.0 0.0 4.0 

7233 

7234 Replace all NaN elements in column 'A', 'B', 'C', and 'D', with 0, 1, 

7235 2, and 3 respectively. 

7236 

7237 >>> values = {{"A": 0, "B": 1, "C": 2, "D": 3}} 

7238 >>> df.fillna(value=values) 

7239 A B C D 

7240 0 0.0 2.0 2.0 0.0 

7241 1 3.0 4.0 2.0 1.0 

7242 2 0.0 1.0 2.0 3.0 

7243 3 0.0 3.0 2.0 4.0 

7244 

7245 Only replace the first NaN element. 

7246 

7247 >>> df.fillna(value=values, limit=1) 

7248 A B C D 

7249 0 0.0 2.0 2.0 0.0 

7250 1 3.0 4.0 NaN 1.0 

7251 2 NaN 1.0 NaN 3.0 

7252 3 NaN 3.0 NaN 4.0 

7253 

7254 When filling using a DataFrame, replacement happens along 

7255 the same column names and same indices 

7256 

7257 >>> df2 = pd.DataFrame(np.zeros((4, 4)), columns=list("ABCE")) 

7258 >>> df.fillna(df2) 

7259 A B C D 

7260 0 0.0 2.0 0.0 0.0 

7261 1 3.0 4.0 0.0 1.0 

7262 2 0.0 0.0 0.0 NaN 

7263 3 0.0 3.0 0.0 4.0 

7264 

7265 Note that column D is not affected since it is not present in df2. 

7266 """ 

7267 inplace = validate_bool_kwarg(inplace, "inplace") 

7268 if inplace: 

7269 if not PYPY and using_copy_on_write(): 

7270 if sys.getrefcount(self) <= REF_COUNT: 

7271 warnings.warn( 

7272 _chained_assignment_method_msg, 

7273 ChainedAssignmentError, 

7274 stacklevel=2, 

7275 ) 

7276 elif ( 

7277 not PYPY 

7278 and not using_copy_on_write() 

7279 and self._is_view_after_cow_rules() 

7280 ): 

7281 ctr = sys.getrefcount(self) 

7282 ref_count = REF_COUNT 

7283 if isinstance(self, ABCSeries) and _check_cacher(self): 

7284 # see https://github.com/pandas-dev/pandas/pull/56060#discussion_r1399245221 

7285 ref_count += 1 

7286 if ctr <= ref_count: 

7287 warnings.warn( 

7288 _chained_assignment_warning_method_msg, 

7289 FutureWarning, 

7290 stacklevel=2, 

7291 ) 

7292 

7293 value, method = validate_fillna_kwargs(value, method) 

7294 if method is not None: 

7295 warnings.warn( 

7296 f"{type(self).__name__}.fillna with 'method' is deprecated and " 

7297 "will raise in a future version. Use obj.ffill() or obj.bfill() " 

7298 "instead.", 

7299 FutureWarning, 

7300 stacklevel=find_stack_level(), 

7301 ) 

7302 

7303 was_no_default = downcast is lib.no_default 

7304 downcast = self._deprecate_downcast(downcast, "fillna") 

7305 

7306 # set the default here, so functions examining the signaure 

7307 # can detect if something was set (e.g. in groupby) (GH9221) 

7308 if axis is None: 

7309 axis = 0 

7310 axis = self._get_axis_number(axis) 

7311 

7312 if value is None: 

7313 return self._pad_or_backfill( 

7314 # error: Argument 1 to "_pad_or_backfill" of "NDFrame" has 

7315 # incompatible type "Optional[Literal['backfill', 'bfill', 'ffill', 

7316 # 'pad']]"; expected "Literal['ffill', 'bfill', 'pad', 'backfill']" 

7317 method, # type: ignore[arg-type] 

7318 axis=axis, 

7319 limit=limit, 

7320 inplace=inplace, 

7321 # error: Argument "downcast" to "_fillna_with_method" of "NDFrame" 

7322 # has incompatible type "Union[Dict[Any, Any], None, 

7323 # Literal[_NoDefault.no_default]]"; expected 

7324 # "Optional[Dict[Any, Any]]" 

7325 downcast=downcast, # type: ignore[arg-type] 

7326 ) 

7327 else: 

7328 if self.ndim == 1: 

7329 if isinstance(value, (dict, ABCSeries)): 

7330 if not len(value): 

7331 # test_fillna_nonscalar 

7332 if inplace: 

7333 return None 

7334 return self.copy(deep=None) 

7335 from pandas import Series 

7336 

7337 value = Series(value) 

7338 value = value.reindex(self.index, copy=False) 

7339 value = value._values 

7340 elif not is_list_like(value): 

7341 pass 

7342 else: 

7343 raise TypeError( 

7344 '"value" parameter must be a scalar, dict ' 

7345 "or Series, but you passed a " 

7346 f'"{type(value).__name__}"' 

7347 ) 

7348 

7349 new_data = self._mgr.fillna( 

7350 value=value, limit=limit, inplace=inplace, downcast=downcast 

7351 ) 

7352 

7353 elif isinstance(value, (dict, ABCSeries)): 

7354 if axis == 1: 

7355 raise NotImplementedError( 

7356 "Currently only can fill " 

7357 "with dict/Series column " 

7358 "by column" 

7359 ) 

7360 if using_copy_on_write(): 

7361 result = self.copy(deep=None) 

7362 else: 

7363 result = self if inplace else self.copy() 

7364 is_dict = isinstance(downcast, dict) 

7365 for k, v in value.items(): 

7366 if k not in result: 

7367 continue 

7368 

7369 if was_no_default: 

7370 downcast_k = lib.no_default 

7371 else: 

7372 downcast_k = ( 

7373 # error: Incompatible types in assignment (expression 

7374 # has type "Union[Dict[Any, Any], None, 

7375 # Literal[_NoDefault.no_default], Any]", variable has 

7376 # type "_NoDefault") 

7377 downcast # type: ignore[assignment] 

7378 if not is_dict 

7379 # error: Item "None" of "Optional[Dict[Any, Any]]" has 

7380 # no attribute "get" 

7381 else downcast.get(k) # type: ignore[union-attr] 

7382 ) 

7383 

7384 res_k = result[k].fillna(v, limit=limit, downcast=downcast_k) 

7385 

7386 if not inplace: 

7387 result[k] = res_k 

7388 else: 

7389 # We can write into our existing column(s) iff dtype 

7390 # was preserved. 

7391 if isinstance(res_k, ABCSeries): 

7392 # i.e. 'k' only shows up once in self.columns 

7393 if res_k.dtype == result[k].dtype: 

7394 result.loc[:, k] = res_k 

7395 else: 

7396 # Different dtype -> no way to do inplace. 

7397 result[k] = res_k 

7398 else: 

7399 # see test_fillna_dict_inplace_nonunique_columns 

7400 locs = result.columns.get_loc(k) 

7401 if isinstance(locs, slice): 

7402 locs = np.arange(self.shape[1])[locs] 

7403 elif ( 

7404 isinstance(locs, np.ndarray) and locs.dtype.kind == "b" 

7405 ): 

7406 locs = locs.nonzero()[0] 

7407 elif not ( 

7408 isinstance(locs, np.ndarray) and locs.dtype.kind == "i" 

7409 ): 

7410 # Should never be reached, but let's cover our bases 

7411 raise NotImplementedError( 

7412 "Unexpected get_loc result, please report a bug at " 

7413 "https://github.com/pandas-dev/pandas" 

7414 ) 

7415 

7416 for i, loc in enumerate(locs): 

7417 res_loc = res_k.iloc[:, i] 

7418 target = self.iloc[:, loc] 

7419 

7420 if res_loc.dtype == target.dtype: 

7421 result.iloc[:, loc] = res_loc 

7422 else: 

7423 result.isetitem(loc, res_loc) 

7424 if inplace: 

7425 return self._update_inplace(result) 

7426 else: 

7427 return result 

7428 

7429 elif not is_list_like(value): 

7430 if axis == 1: 

7431 result = self.T.fillna(value=value, limit=limit).T 

7432 new_data = result._mgr 

7433 else: 

7434 new_data = self._mgr.fillna( 

7435 value=value, limit=limit, inplace=inplace, downcast=downcast 

7436 ) 

7437 elif isinstance(value, ABCDataFrame) and self.ndim == 2: 

7438 new_data = self.where(self.notna(), value)._mgr 

7439 else: 

7440 raise ValueError(f"invalid fill value with a {type(value)}") 

7441 

7442 result = self._constructor_from_mgr(new_data, axes=new_data.axes) 

7443 if inplace: 

7444 return self._update_inplace(result) 

7445 else: 

7446 return result.__finalize__(self, method="fillna") 

7447 

7448 @overload 

7449 def ffill( 

7450 self, 

7451 *, 

7452 axis: None | Axis = ..., 

7453 inplace: Literal[False] = ..., 

7454 limit: None | int = ..., 

7455 limit_area: Literal["inside", "outside"] | None = ..., 

7456 downcast: dict | None | lib.NoDefault = ..., 

7457 ) -> Self: 

7458 ... 

7459 

7460 @overload 

7461 def ffill( 

7462 self, 

7463 *, 

7464 axis: None | Axis = ..., 

7465 inplace: Literal[True], 

7466 limit: None | int = ..., 

7467 limit_area: Literal["inside", "outside"] | None = ..., 

7468 downcast: dict | None | lib.NoDefault = ..., 

7469 ) -> None: 

7470 ... 

7471 

7472 @overload 

7473 def ffill( 

7474 self, 

7475 *, 

7476 axis: None | Axis = ..., 

7477 inplace: bool_t = ..., 

7478 limit: None | int = ..., 

7479 limit_area: Literal["inside", "outside"] | None = ..., 

7480 downcast: dict | None | lib.NoDefault = ..., 

7481 ) -> Self | None: 

7482 ... 

7483 

7484 @final 

7485 @doc( 

7486 klass=_shared_doc_kwargs["klass"], 

7487 axes_single_arg=_shared_doc_kwargs["axes_single_arg"], 

7488 ) 

7489 def ffill( 

7490 self, 

7491 *, 

7492 axis: None | Axis = None, 

7493 inplace: bool_t = False, 

7494 limit: None | int = None, 

7495 limit_area: Literal["inside", "outside"] | None = None, 

7496 downcast: dict | None | lib.NoDefault = lib.no_default, 

7497 ) -> Self | None: 

7498 """ 

7499 Fill NA/NaN values by propagating the last valid observation to next valid. 

7500 

7501 Parameters 

7502 ---------- 

7503 axis : {axes_single_arg} 

7504 Axis along which to fill missing values. For `Series` 

7505 this parameter is unused and defaults to 0. 

7506 inplace : bool, default False 

7507 If True, fill in-place. Note: this will modify any 

7508 other views on this object (e.g., a no-copy slice for a column in a 

7509 DataFrame). 

7510 limit : int, default None 

7511 If method is specified, this is the maximum number of consecutive 

7512 NaN values to forward/backward fill. In other words, if there is 

7513 a gap with more than this number of consecutive NaNs, it will only 

7514 be partially filled. If method is not specified, this is the 

7515 maximum number of entries along the entire axis where NaNs will be 

7516 filled. Must be greater than 0 if not None. 

7517 limit_area : {{`None`, 'inside', 'outside'}}, default None 

7518 If limit is specified, consecutive NaNs will be filled with this 

7519 restriction. 

7520 

7521 * ``None``: No fill restriction. 

7522 * 'inside': Only fill NaNs surrounded by valid values 

7523 (interpolate). 

7524 * 'outside': Only fill NaNs outside valid values (extrapolate). 

7525 

7526 .. versionadded:: 2.2.0 

7527 

7528 downcast : dict, default is None 

7529 A dict of item->dtype of what to downcast if possible, 

7530 or the string 'infer' which will try to downcast to an appropriate 

7531 equal type (e.g. float64 to int64 if possible). 

7532 

7533 .. deprecated:: 2.2.0 

7534 

7535 Returns 

7536 ------- 

7537 {klass} or None 

7538 Object with missing values filled or None if ``inplace=True``. 

7539 

7540 Examples 

7541 -------- 

7542 >>> df = pd.DataFrame([[np.nan, 2, np.nan, 0], 

7543 ... [3, 4, np.nan, 1], 

7544 ... [np.nan, np.nan, np.nan, np.nan], 

7545 ... [np.nan, 3, np.nan, 4]], 

7546 ... columns=list("ABCD")) 

7547 >>> df 

7548 A B C D 

7549 0 NaN 2.0 NaN 0.0 

7550 1 3.0 4.0 NaN 1.0 

7551 2 NaN NaN NaN NaN 

7552 3 NaN 3.0 NaN 4.0 

7553 

7554 >>> df.ffill() 

7555 A B C D 

7556 0 NaN 2.0 NaN 0.0 

7557 1 3.0 4.0 NaN 1.0 

7558 2 3.0 4.0 NaN 1.0 

7559 3 3.0 3.0 NaN 4.0 

7560 

7561 >>> ser = pd.Series([1, np.nan, 2, 3]) 

7562 >>> ser.ffill() 

7563 0 1.0 

7564 1 1.0 

7565 2 2.0 

7566 3 3.0 

7567 dtype: float64 

7568 """ 

7569 downcast = self._deprecate_downcast(downcast, "ffill") 

7570 inplace = validate_bool_kwarg(inplace, "inplace") 

7571 if inplace: 

7572 if not PYPY and using_copy_on_write(): 

7573 if sys.getrefcount(self) <= REF_COUNT: 

7574 warnings.warn( 

7575 _chained_assignment_method_msg, 

7576 ChainedAssignmentError, 

7577 stacklevel=2, 

7578 ) 

7579 elif ( 

7580 not PYPY 

7581 and not using_copy_on_write() 

7582 and self._is_view_after_cow_rules() 

7583 ): 

7584 ctr = sys.getrefcount(self) 

7585 ref_count = REF_COUNT 

7586 if isinstance(self, ABCSeries) and _check_cacher(self): 

7587 # see https://github.com/pandas-dev/pandas/pull/56060#discussion_r1399245221 

7588 ref_count += 1 

7589 if ctr <= ref_count: 

7590 warnings.warn( 

7591 _chained_assignment_warning_method_msg, 

7592 FutureWarning, 

7593 stacklevel=2, 

7594 ) 

7595 

7596 return self._pad_or_backfill( 

7597 "ffill", 

7598 axis=axis, 

7599 inplace=inplace, 

7600 limit=limit, 

7601 limit_area=limit_area, 

7602 # error: Argument "downcast" to "_fillna_with_method" of "NDFrame" 

7603 # has incompatible type "Union[Dict[Any, Any], None, 

7604 # Literal[_NoDefault.no_default]]"; expected "Optional[Dict[Any, Any]]" 

7605 downcast=downcast, # type: ignore[arg-type] 

7606 ) 

7607 

7608 @final 

7609 @doc(klass=_shared_doc_kwargs["klass"]) 

7610 def pad( 

7611 self, 

7612 *, 

7613 axis: None | Axis = None, 

7614 inplace: bool_t = False, 

7615 limit: None | int = None, 

7616 downcast: dict | None | lib.NoDefault = lib.no_default, 

7617 ) -> Self | None: 

7618 """ 

7619 Fill NA/NaN values by propagating the last valid observation to next valid. 

7620 

7621 .. deprecated:: 2.0 

7622 

7623 {klass}.pad is deprecated. Use {klass}.ffill instead. 

7624 

7625 Returns 

7626 ------- 

7627 {klass} or None 

7628 Object with missing values filled or None if ``inplace=True``. 

7629 

7630 Examples 

7631 -------- 

7632 Please see examples for :meth:`DataFrame.ffill` or :meth:`Series.ffill`. 

7633 """ 

7634 warnings.warn( 

7635 "DataFrame.pad/Series.pad is deprecated. Use " 

7636 "DataFrame.ffill/Series.ffill instead", 

7637 FutureWarning, 

7638 stacklevel=find_stack_level(), 

7639 ) 

7640 return self.ffill(axis=axis, inplace=inplace, limit=limit, downcast=downcast) 

7641 

7642 @overload 

7643 def bfill( 

7644 self, 

7645 *, 

7646 axis: None | Axis = ..., 

7647 inplace: Literal[False] = ..., 

7648 limit: None | int = ..., 

7649 limit_area: Literal["inside", "outside"] | None = ..., 

7650 downcast: dict | None | lib.NoDefault = ..., 

7651 ) -> Self: 

7652 ... 

7653 

7654 @overload 

7655 def bfill( 

7656 self, 

7657 *, 

7658 axis: None | Axis = ..., 

7659 inplace: Literal[True], 

7660 limit: None | int = ..., 

7661 downcast: dict | None | lib.NoDefault = ..., 

7662 ) -> None: 

7663 ... 

7664 

7665 @overload 

7666 def bfill( 

7667 self, 

7668 *, 

7669 axis: None | Axis = ..., 

7670 inplace: bool_t = ..., 

7671 limit: None | int = ..., 

7672 limit_area: Literal["inside", "outside"] | None = ..., 

7673 downcast: dict | None | lib.NoDefault = ..., 

7674 ) -> Self | None: 

7675 ... 

7676 

7677 @final 

7678 @doc( 

7679 klass=_shared_doc_kwargs["klass"], 

7680 axes_single_arg=_shared_doc_kwargs["axes_single_arg"], 

7681 ) 

7682 def bfill( 

7683 self, 

7684 *, 

7685 axis: None | Axis = None, 

7686 inplace: bool_t = False, 

7687 limit: None | int = None, 

7688 limit_area: Literal["inside", "outside"] | None = None, 

7689 downcast: dict | None | lib.NoDefault = lib.no_default, 

7690 ) -> Self | None: 

7691 """ 

7692 Fill NA/NaN values by using the next valid observation to fill the gap. 

7693 

7694 Parameters 

7695 ---------- 

7696 axis : {axes_single_arg} 

7697 Axis along which to fill missing values. For `Series` 

7698 this parameter is unused and defaults to 0. 

7699 inplace : bool, default False 

7700 If True, fill in-place. Note: this will modify any 

7701 other views on this object (e.g., a no-copy slice for a column in a 

7702 DataFrame). 

7703 limit : int, default None 

7704 If method is specified, this is the maximum number of consecutive 

7705 NaN values to forward/backward fill. In other words, if there is 

7706 a gap with more than this number of consecutive NaNs, it will only 

7707 be partially filled. If method is not specified, this is the 

7708 maximum number of entries along the entire axis where NaNs will be 

7709 filled. Must be greater than 0 if not None. 

7710 limit_area : {{`None`, 'inside', 'outside'}}, default None 

7711 If limit is specified, consecutive NaNs will be filled with this 

7712 restriction. 

7713 

7714 * ``None``: No fill restriction. 

7715 * 'inside': Only fill NaNs surrounded by valid values 

7716 (interpolate). 

7717 * 'outside': Only fill NaNs outside valid values (extrapolate). 

7718 

7719 .. versionadded:: 2.2.0 

7720 

7721 downcast : dict, default is None 

7722 A dict of item->dtype of what to downcast if possible, 

7723 or the string 'infer' which will try to downcast to an appropriate 

7724 equal type (e.g. float64 to int64 if possible). 

7725 

7726 .. deprecated:: 2.2.0 

7727 

7728 Returns 

7729 ------- 

7730 {klass} or None 

7731 Object with missing values filled or None if ``inplace=True``. 

7732 

7733 Examples 

7734 -------- 

7735 For Series: 

7736 

7737 >>> s = pd.Series([1, None, None, 2]) 

7738 >>> s.bfill() 

7739 0 1.0 

7740 1 2.0 

7741 2 2.0 

7742 3 2.0 

7743 dtype: float64 

7744 >>> s.bfill(limit=1) 

7745 0 1.0 

7746 1 NaN 

7747 2 2.0 

7748 3 2.0 

7749 dtype: float64 

7750 

7751 With DataFrame: 

7752 

7753 >>> df = pd.DataFrame({{'A': [1, None, None, 4], 'B': [None, 5, None, 7]}}) 

7754 >>> df 

7755 A B 

7756 0 1.0 NaN 

7757 1 NaN 5.0 

7758 2 NaN NaN 

7759 3 4.0 7.0 

7760 >>> df.bfill() 

7761 A B 

7762 0 1.0 5.0 

7763 1 4.0 5.0 

7764 2 4.0 7.0 

7765 3 4.0 7.0 

7766 >>> df.bfill(limit=1) 

7767 A B 

7768 0 1.0 5.0 

7769 1 NaN 5.0 

7770 2 4.0 7.0 

7771 3 4.0 7.0 

7772 """ 

7773 downcast = self._deprecate_downcast(downcast, "bfill") 

7774 inplace = validate_bool_kwarg(inplace, "inplace") 

7775 if inplace: 

7776 if not PYPY and using_copy_on_write(): 

7777 if sys.getrefcount(self) <= REF_COUNT: 

7778 warnings.warn( 

7779 _chained_assignment_method_msg, 

7780 ChainedAssignmentError, 

7781 stacklevel=2, 

7782 ) 

7783 elif ( 

7784 not PYPY 

7785 and not using_copy_on_write() 

7786 and self._is_view_after_cow_rules() 

7787 ): 

7788 ctr = sys.getrefcount(self) 

7789 ref_count = REF_COUNT 

7790 if isinstance(self, ABCSeries) and _check_cacher(self): 

7791 # see https://github.com/pandas-dev/pandas/pull/56060#discussion_r1399245221 

7792 ref_count += 1 

7793 if ctr <= ref_count: 

7794 warnings.warn( 

7795 _chained_assignment_warning_method_msg, 

7796 FutureWarning, 

7797 stacklevel=2, 

7798 ) 

7799 

7800 return self._pad_or_backfill( 

7801 "bfill", 

7802 axis=axis, 

7803 inplace=inplace, 

7804 limit=limit, 

7805 limit_area=limit_area, 

7806 # error: Argument "downcast" to "_fillna_with_method" of "NDFrame" 

7807 # has incompatible type "Union[Dict[Any, Any], None, 

7808 # Literal[_NoDefault.no_default]]"; expected "Optional[Dict[Any, Any]]" 

7809 downcast=downcast, # type: ignore[arg-type] 

7810 ) 

7811 

7812 @final 

7813 @doc(klass=_shared_doc_kwargs["klass"]) 

7814 def backfill( 

7815 self, 

7816 *, 

7817 axis: None | Axis = None, 

7818 inplace: bool_t = False, 

7819 limit: None | int = None, 

7820 downcast: dict | None | lib.NoDefault = lib.no_default, 

7821 ) -> Self | None: 

7822 """ 

7823 Fill NA/NaN values by using the next valid observation to fill the gap. 

7824 

7825 .. deprecated:: 2.0 

7826 

7827 {klass}.backfill is deprecated. Use {klass}.bfill instead. 

7828 

7829 Returns 

7830 ------- 

7831 {klass} or None 

7832 Object with missing values filled or None if ``inplace=True``. 

7833 

7834 Examples 

7835 -------- 

7836 Please see examples for :meth:`DataFrame.bfill` or :meth:`Series.bfill`. 

7837 """ 

7838 warnings.warn( 

7839 "DataFrame.backfill/Series.backfill is deprecated. Use " 

7840 "DataFrame.bfill/Series.bfill instead", 

7841 FutureWarning, 

7842 stacklevel=find_stack_level(), 

7843 ) 

7844 return self.bfill(axis=axis, inplace=inplace, limit=limit, downcast=downcast) 

7845 

7846 @overload 

7847 def replace( 

7848 self, 

7849 to_replace=..., 

7850 value=..., 

7851 *, 

7852 inplace: Literal[False] = ..., 

7853 limit: int | None = ..., 

7854 regex: bool_t = ..., 

7855 method: Literal["pad", "ffill", "bfill"] | lib.NoDefault = ..., 

7856 ) -> Self: 

7857 ... 

7858 

7859 @overload 

7860 def replace( 

7861 self, 

7862 to_replace=..., 

7863 value=..., 

7864 *, 

7865 inplace: Literal[True], 

7866 limit: int | None = ..., 

7867 regex: bool_t = ..., 

7868 method: Literal["pad", "ffill", "bfill"] | lib.NoDefault = ..., 

7869 ) -> None: 

7870 ... 

7871 

7872 @overload 

7873 def replace( 

7874 self, 

7875 to_replace=..., 

7876 value=..., 

7877 *, 

7878 inplace: bool_t = ..., 

7879 limit: int | None = ..., 

7880 regex: bool_t = ..., 

7881 method: Literal["pad", "ffill", "bfill"] | lib.NoDefault = ..., 

7882 ) -> Self | None: 

7883 ... 

7884 

7885 @final 

7886 @doc( 

7887 _shared_docs["replace"], 

7888 klass=_shared_doc_kwargs["klass"], 

7889 inplace=_shared_doc_kwargs["inplace"], 

7890 ) 

7891 def replace( 

7892 self, 

7893 to_replace=None, 

7894 value=lib.no_default, 

7895 *, 

7896 inplace: bool_t = False, 

7897 limit: int | None = None, 

7898 regex: bool_t = False, 

7899 method: Literal["pad", "ffill", "bfill"] | lib.NoDefault = lib.no_default, 

7900 ) -> Self | None: 

7901 if method is not lib.no_default: 

7902 warnings.warn( 

7903 # GH#33302 

7904 f"The 'method' keyword in {type(self).__name__}.replace is " 

7905 "deprecated and will be removed in a future version.", 

7906 FutureWarning, 

7907 stacklevel=find_stack_level(), 

7908 ) 

7909 elif limit is not None: 

7910 warnings.warn( 

7911 # GH#33302 

7912 f"The 'limit' keyword in {type(self).__name__}.replace is " 

7913 "deprecated and will be removed in a future version.", 

7914 FutureWarning, 

7915 stacklevel=find_stack_level(), 

7916 ) 

7917 if ( 

7918 value is lib.no_default 

7919 and method is lib.no_default 

7920 and not is_dict_like(to_replace) 

7921 and regex is False 

7922 ): 

7923 # case that goes through _replace_single and defaults to method="pad" 

7924 warnings.warn( 

7925 # GH#33302 

7926 f"{type(self).__name__}.replace without 'value' and with " 

7927 "non-dict-like 'to_replace' is deprecated " 

7928 "and will raise in a future version. " 

7929 "Explicitly specify the new values instead.", 

7930 FutureWarning, 

7931 stacklevel=find_stack_level(), 

7932 ) 

7933 

7934 if not ( 

7935 is_scalar(to_replace) 

7936 or is_re_compilable(to_replace) 

7937 or is_list_like(to_replace) 

7938 ): 

7939 raise TypeError( 

7940 "Expecting 'to_replace' to be either a scalar, array-like, " 

7941 "dict or None, got invalid type " 

7942 f"{repr(type(to_replace).__name__)}" 

7943 ) 

7944 

7945 inplace = validate_bool_kwarg(inplace, "inplace") 

7946 if inplace: 

7947 if not PYPY and using_copy_on_write(): 

7948 if sys.getrefcount(self) <= REF_COUNT: 

7949 warnings.warn( 

7950 _chained_assignment_method_msg, 

7951 ChainedAssignmentError, 

7952 stacklevel=2, 

7953 ) 

7954 elif ( 

7955 not PYPY 

7956 and not using_copy_on_write() 

7957 and self._is_view_after_cow_rules() 

7958 ): 

7959 ctr = sys.getrefcount(self) 

7960 ref_count = REF_COUNT 

7961 if isinstance(self, ABCSeries) and _check_cacher(self): 

7962 # in non-CoW mode, chained Series access will populate the 

7963 # `_item_cache` which results in an increased ref count not below 

7964 # the threshold, while we still need to warn. We detect this case 

7965 # of a Series derived from a DataFrame through the presence of 

7966 # checking the `_cacher` 

7967 ref_count += 1 

7968 if ctr <= ref_count: 

7969 warnings.warn( 

7970 _chained_assignment_warning_method_msg, 

7971 FutureWarning, 

7972 stacklevel=2, 

7973 ) 

7974 

7975 if not is_bool(regex) and to_replace is not None: 

7976 raise ValueError("'to_replace' must be 'None' if 'regex' is not a bool") 

7977 

7978 if value is lib.no_default or method is not lib.no_default: 

7979 # GH#36984 if the user explicitly passes value=None we want to 

7980 # respect that. We have the corner case where the user explicitly 

7981 # passes value=None *and* a method, which we interpret as meaning 

7982 # they want the (documented) default behavior. 

7983 if method is lib.no_default: 

7984 # TODO: get this to show up as the default in the docs? 

7985 method = "pad" 

7986 

7987 # passing a single value that is scalar like 

7988 # when value is None (GH5319), for compat 

7989 if not is_dict_like(to_replace) and not is_dict_like(regex): 

7990 to_replace = [to_replace] 

7991 

7992 if isinstance(to_replace, (tuple, list)): 

7993 # TODO: Consider copy-on-write for non-replaced columns's here 

7994 if isinstance(self, ABCDataFrame): 

7995 from pandas import Series 

7996 

7997 result = self.apply( 

7998 Series._replace_single, 

7999 args=(to_replace, method, inplace, limit), 

8000 ) 

8001 if inplace: 

8002 return None 

8003 return result 

8004 return self._replace_single(to_replace, method, inplace, limit) 

8005 

8006 if not is_dict_like(to_replace): 

8007 if not is_dict_like(regex): 

8008 raise TypeError( 

8009 'If "to_replace" and "value" are both None ' 

8010 'and "to_replace" is not a list, then ' 

8011 "regex must be a mapping" 

8012 ) 

8013 to_replace = regex 

8014 regex = True 

8015 

8016 items = list(to_replace.items()) 

8017 if items: 

8018 keys, values = zip(*items) 

8019 else: 

8020 # error: Incompatible types in assignment (expression has type 

8021 # "list[Never]", variable has type "tuple[Any, ...]") 

8022 keys, values = ([], []) # type: ignore[assignment] 

8023 

8024 are_mappings = [is_dict_like(v) for v in values] 

8025 

8026 if any(are_mappings): 

8027 if not all(are_mappings): 

8028 raise TypeError( 

8029 "If a nested mapping is passed, all values " 

8030 "of the top level mapping must be mappings" 

8031 ) 

8032 # passed a nested dict/Series 

8033 to_rep_dict = {} 

8034 value_dict = {} 

8035 

8036 for k, v in items: 

8037 # error: Incompatible types in assignment (expression has type 

8038 # "list[Never]", variable has type "tuple[Any, ...]") 

8039 keys, values = list(zip(*v.items())) or ( # type: ignore[assignment] 

8040 [], 

8041 [], 

8042 ) 

8043 

8044 to_rep_dict[k] = list(keys) 

8045 value_dict[k] = list(values) 

8046 

8047 to_replace, value = to_rep_dict, value_dict 

8048 else: 

8049 to_replace, value = keys, values 

8050 

8051 return self.replace( 

8052 to_replace, value, inplace=inplace, limit=limit, regex=regex 

8053 ) 

8054 else: 

8055 # need a non-zero len on all axes 

8056 if not self.size: 

8057 if inplace: 

8058 return None 

8059 return self.copy(deep=None) 

8060 

8061 if is_dict_like(to_replace): 

8062 if is_dict_like(value): # {'A' : NA} -> {'A' : 0} 

8063 # Note: Checking below for `in foo.keys()` instead of 

8064 # `in foo` is needed for when we have a Series and not dict 

8065 mapping = { 

8066 col: (to_replace[col], value[col]) 

8067 for col in to_replace.keys() 

8068 if col in value.keys() and col in self 

8069 } 

8070 return self._replace_columnwise(mapping, inplace, regex) 

8071 

8072 # {'A': NA} -> 0 

8073 elif not is_list_like(value): 

8074 # Operate column-wise 

8075 if self.ndim == 1: 

8076 raise ValueError( 

8077 "Series.replace cannot use dict-like to_replace " 

8078 "and non-None value" 

8079 ) 

8080 mapping = { 

8081 col: (to_rep, value) for col, to_rep in to_replace.items() 

8082 } 

8083 return self._replace_columnwise(mapping, inplace, regex) 

8084 else: 

8085 raise TypeError("value argument must be scalar, dict, or Series") 

8086 

8087 elif is_list_like(to_replace): 

8088 if not is_list_like(value): 

8089 # e.g. to_replace = [NA, ''] and value is 0, 

8090 # so we replace NA with 0 and then replace '' with 0 

8091 value = [value] * len(to_replace) 

8092 

8093 # e.g. we have to_replace = [NA, ''] and value = [0, 'missing'] 

8094 if len(to_replace) != len(value): 

8095 raise ValueError( 

8096 f"Replacement lists must match in length. " 

8097 f"Expecting {len(to_replace)} got {len(value)} " 

8098 ) 

8099 new_data = self._mgr.replace_list( 

8100 src_list=to_replace, 

8101 dest_list=value, 

8102 inplace=inplace, 

8103 regex=regex, 

8104 ) 

8105 

8106 elif to_replace is None: 

8107 if not ( 

8108 is_re_compilable(regex) 

8109 or is_list_like(regex) 

8110 or is_dict_like(regex) 

8111 ): 

8112 raise TypeError( 

8113 f"'regex' must be a string or a compiled regular expression " 

8114 f"or a list or dict of strings or regular expressions, " 

8115 f"you passed a {repr(type(regex).__name__)}" 

8116 ) 

8117 return self.replace( 

8118 regex, value, inplace=inplace, limit=limit, regex=True 

8119 ) 

8120 else: 

8121 # dest iterable dict-like 

8122 if is_dict_like(value): # NA -> {'A' : 0, 'B' : -1} 

8123 # Operate column-wise 

8124 if self.ndim == 1: 

8125 raise ValueError( 

8126 "Series.replace cannot use dict-value and " 

8127 "non-None to_replace" 

8128 ) 

8129 mapping = {col: (to_replace, val) for col, val in value.items()} 

8130 return self._replace_columnwise(mapping, inplace, regex) 

8131 

8132 elif not is_list_like(value): # NA -> 0 

8133 regex = should_use_regex(regex, to_replace) 

8134 if regex: 

8135 new_data = self._mgr.replace_regex( 

8136 to_replace=to_replace, 

8137 value=value, 

8138 inplace=inplace, 

8139 ) 

8140 else: 

8141 new_data = self._mgr.replace( 

8142 to_replace=to_replace, value=value, inplace=inplace 

8143 ) 

8144 else: 

8145 raise TypeError( 

8146 f'Invalid "to_replace" type: {repr(type(to_replace).__name__)}' 

8147 ) 

8148 

8149 result = self._constructor_from_mgr(new_data, axes=new_data.axes) 

8150 if inplace: 

8151 return self._update_inplace(result) 

8152 else: 

8153 return result.__finalize__(self, method="replace") 

8154 

8155 @overload 

8156 def interpolate( 

8157 self, 

8158 method: InterpolateOptions = ..., 

8159 *, 

8160 axis: Axis = ..., 

8161 limit: int | None = ..., 

8162 inplace: Literal[False] = ..., 

8163 limit_direction: Literal["forward", "backward", "both"] | None = ..., 

8164 limit_area: Literal["inside", "outside"] | None = ..., 

8165 downcast: Literal["infer"] | None | lib.NoDefault = ..., 

8166 **kwargs, 

8167 ) -> Self: 

8168 ... 

8169 

8170 @overload 

8171 def interpolate( 

8172 self, 

8173 method: InterpolateOptions = ..., 

8174 *, 

8175 axis: Axis = ..., 

8176 limit: int | None = ..., 

8177 inplace: Literal[True], 

8178 limit_direction: Literal["forward", "backward", "both"] | None = ..., 

8179 limit_area: Literal["inside", "outside"] | None = ..., 

8180 downcast: Literal["infer"] | None | lib.NoDefault = ..., 

8181 **kwargs, 

8182 ) -> None: 

8183 ... 

8184 

8185 @overload 

8186 def interpolate( 

8187 self, 

8188 method: InterpolateOptions = ..., 

8189 *, 

8190 axis: Axis = ..., 

8191 limit: int | None = ..., 

8192 inplace: bool_t = ..., 

8193 limit_direction: Literal["forward", "backward", "both"] | None = ..., 

8194 limit_area: Literal["inside", "outside"] | None = ..., 

8195 downcast: Literal["infer"] | None | lib.NoDefault = ..., 

8196 **kwargs, 

8197 ) -> Self | None: 

8198 ... 

8199 

8200 @final 

8201 def interpolate( 

8202 self, 

8203 method: InterpolateOptions = "linear", 

8204 *, 

8205 axis: Axis = 0, 

8206 limit: int | None = None, 

8207 inplace: bool_t = False, 

8208 limit_direction: Literal["forward", "backward", "both"] | None = None, 

8209 limit_area: Literal["inside", "outside"] | None = None, 

8210 downcast: Literal["infer"] | None | lib.NoDefault = lib.no_default, 

8211 **kwargs, 

8212 ) -> Self | None: 

8213 """ 

8214 Fill NaN values using an interpolation method. 

8215 

8216 Please note that only ``method='linear'`` is supported for 

8217 DataFrame/Series with a MultiIndex. 

8218 

8219 Parameters 

8220 ---------- 

8221 method : str, default 'linear' 

8222 Interpolation technique to use. One of: 

8223 

8224 * 'linear': Ignore the index and treat the values as equally 

8225 spaced. This is the only method supported on MultiIndexes. 

8226 * 'time': Works on daily and higher resolution data to interpolate 

8227 given length of interval. 

8228 * 'index', 'values': use the actual numerical values of the index. 

8229 * 'pad': Fill in NaNs using existing values. 

8230 * 'nearest', 'zero', 'slinear', 'quadratic', 'cubic', 

8231 'barycentric', 'polynomial': Passed to 

8232 `scipy.interpolate.interp1d`, whereas 'spline' is passed to 

8233 `scipy.interpolate.UnivariateSpline`. These methods use the numerical 

8234 values of the index. Both 'polynomial' and 'spline' require that 

8235 you also specify an `order` (int), e.g. 

8236 ``df.interpolate(method='polynomial', order=5)``. Note that, 

8237 `slinear` method in Pandas refers to the Scipy first order `spline` 

8238 instead of Pandas first order `spline`. 

8239 * 'krogh', 'piecewise_polynomial', 'spline', 'pchip', 'akima', 

8240 'cubicspline': Wrappers around the SciPy interpolation methods of 

8241 similar names. See `Notes`. 

8242 * 'from_derivatives': Refers to 

8243 `scipy.interpolate.BPoly.from_derivatives`. 

8244 

8245 axis : {{0 or 'index', 1 or 'columns', None}}, default None 

8246 Axis to interpolate along. For `Series` this parameter is unused 

8247 and defaults to 0. 

8248 limit : int, optional 

8249 Maximum number of consecutive NaNs to fill. Must be greater than 

8250 0. 

8251 inplace : bool, default False 

8252 Update the data in place if possible. 

8253 limit_direction : {{'forward', 'backward', 'both'}}, Optional 

8254 Consecutive NaNs will be filled in this direction. 

8255 

8256 If limit is specified: 

8257 * If 'method' is 'pad' or 'ffill', 'limit_direction' must be 'forward'. 

8258 * If 'method' is 'backfill' or 'bfill', 'limit_direction' must be 

8259 'backwards'. 

8260 

8261 If 'limit' is not specified: 

8262 * If 'method' is 'backfill' or 'bfill', the default is 'backward' 

8263 * else the default is 'forward' 

8264 

8265 raises ValueError if `limit_direction` is 'forward' or 'both' and 

8266 method is 'backfill' or 'bfill'. 

8267 raises ValueError if `limit_direction` is 'backward' or 'both' and 

8268 method is 'pad' or 'ffill'. 

8269 

8270 limit_area : {{`None`, 'inside', 'outside'}}, default None 

8271 If limit is specified, consecutive NaNs will be filled with this 

8272 restriction. 

8273 

8274 * ``None``: No fill restriction. 

8275 * 'inside': Only fill NaNs surrounded by valid values 

8276 (interpolate). 

8277 * 'outside': Only fill NaNs outside valid values (extrapolate). 

8278 

8279 downcast : optional, 'infer' or None, defaults to None 

8280 Downcast dtypes if possible. 

8281 

8282 .. deprecated:: 2.1.0 

8283 

8284 ``**kwargs`` : optional 

8285 Keyword arguments to pass on to the interpolating function. 

8286 

8287 Returns 

8288 ------- 

8289 Series or DataFrame or None 

8290 Returns the same object type as the caller, interpolated at 

8291 some or all ``NaN`` values or None if ``inplace=True``. 

8292 

8293 See Also 

8294 -------- 

8295 fillna : Fill missing values using different methods. 

8296 scipy.interpolate.Akima1DInterpolator : Piecewise cubic polynomials 

8297 (Akima interpolator). 

8298 scipy.interpolate.BPoly.from_derivatives : Piecewise polynomial in the 

8299 Bernstein basis. 

8300 scipy.interpolate.interp1d : Interpolate a 1-D function. 

8301 scipy.interpolate.KroghInterpolator : Interpolate polynomial (Krogh 

8302 interpolator). 

8303 scipy.interpolate.PchipInterpolator : PCHIP 1-d monotonic cubic 

8304 interpolation. 

8305 scipy.interpolate.CubicSpline : Cubic spline data interpolator. 

8306 

8307 Notes 

8308 ----- 

8309 The 'krogh', 'piecewise_polynomial', 'spline', 'pchip' and 'akima' 

8310 methods are wrappers around the respective SciPy implementations of 

8311 similar names. These use the actual numerical values of the index. 

8312 For more information on their behavior, see the 

8313 `SciPy documentation 

8314 <https://docs.scipy.org/doc/scipy/reference/interpolate.html#univariate-interpolation>`__. 

8315 

8316 Examples 

8317 -------- 

8318 Filling in ``NaN`` in a :class:`~pandas.Series` via linear 

8319 interpolation. 

8320 

8321 >>> s = pd.Series([0, 1, np.nan, 3]) 

8322 >>> s 

8323 0 0.0 

8324 1 1.0 

8325 2 NaN 

8326 3 3.0 

8327 dtype: float64 

8328 >>> s.interpolate() 

8329 0 0.0 

8330 1 1.0 

8331 2 2.0 

8332 3 3.0 

8333 dtype: float64 

8334 

8335 Filling in ``NaN`` in a Series via polynomial interpolation or splines: 

8336 Both 'polynomial' and 'spline' methods require that you also specify 

8337 an ``order`` (int). 

8338 

8339 >>> s = pd.Series([0, 2, np.nan, 8]) 

8340 >>> s.interpolate(method='polynomial', order=2) 

8341 0 0.000000 

8342 1 2.000000 

8343 2 4.666667 

8344 3 8.000000 

8345 dtype: float64 

8346 

8347 Fill the DataFrame forward (that is, going down) along each column 

8348 using linear interpolation. 

8349 

8350 Note how the last entry in column 'a' is interpolated differently, 

8351 because there is no entry after it to use for interpolation. 

8352 Note how the first entry in column 'b' remains ``NaN``, because there 

8353 is no entry before it to use for interpolation. 

8354 

8355 >>> df = pd.DataFrame([(0.0, np.nan, -1.0, 1.0), 

8356 ... (np.nan, 2.0, np.nan, np.nan), 

8357 ... (2.0, 3.0, np.nan, 9.0), 

8358 ... (np.nan, 4.0, -4.0, 16.0)], 

8359 ... columns=list('abcd')) 

8360 >>> df 

8361 a b c d 

8362 0 0.0 NaN -1.0 1.0 

8363 1 NaN 2.0 NaN NaN 

8364 2 2.0 3.0 NaN 9.0 

8365 3 NaN 4.0 -4.0 16.0 

8366 >>> df.interpolate(method='linear', limit_direction='forward', axis=0) 

8367 a b c d 

8368 0 0.0 NaN -1.0 1.0 

8369 1 1.0 2.0 -2.0 5.0 

8370 2 2.0 3.0 -3.0 9.0 

8371 3 2.0 4.0 -4.0 16.0 

8372 

8373 Using polynomial interpolation. 

8374 

8375 >>> df['d'].interpolate(method='polynomial', order=2) 

8376 0 1.0 

8377 1 4.0 

8378 2 9.0 

8379 3 16.0 

8380 Name: d, dtype: float64 

8381 """ 

8382 if downcast is not lib.no_default: 

8383 # GH#40988 

8384 warnings.warn( 

8385 f"The 'downcast' keyword in {type(self).__name__}.interpolate " 

8386 "is deprecated and will be removed in a future version. " 

8387 "Call result.infer_objects(copy=False) on the result instead.", 

8388 FutureWarning, 

8389 stacklevel=find_stack_level(), 

8390 ) 

8391 else: 

8392 downcast = None 

8393 if downcast is not None and downcast != "infer": 

8394 raise ValueError("downcast must be either None or 'infer'") 

8395 

8396 inplace = validate_bool_kwarg(inplace, "inplace") 

8397 

8398 if inplace: 

8399 if not PYPY and using_copy_on_write(): 

8400 if sys.getrefcount(self) <= REF_COUNT: 

8401 warnings.warn( 

8402 _chained_assignment_method_msg, 

8403 ChainedAssignmentError, 

8404 stacklevel=2, 

8405 ) 

8406 elif ( 

8407 not PYPY 

8408 and not using_copy_on_write() 

8409 and self._is_view_after_cow_rules() 

8410 ): 

8411 ctr = sys.getrefcount(self) 

8412 ref_count = REF_COUNT 

8413 if isinstance(self, ABCSeries) and _check_cacher(self): 

8414 # see https://github.com/pandas-dev/pandas/pull/56060#discussion_r1399245221 

8415 ref_count += 1 

8416 if ctr <= ref_count: 

8417 warnings.warn( 

8418 _chained_assignment_warning_method_msg, 

8419 FutureWarning, 

8420 stacklevel=2, 

8421 ) 

8422 

8423 axis = self._get_axis_number(axis) 

8424 

8425 if self.empty: 

8426 if inplace: 

8427 return None 

8428 return self.copy() 

8429 

8430 if not isinstance(method, str): 

8431 raise ValueError("'method' should be a string, not None.") 

8432 

8433 fillna_methods = ["ffill", "bfill", "pad", "backfill"] 

8434 if method.lower() in fillna_methods: 

8435 # GH#53581 

8436 warnings.warn( 

8437 f"{type(self).__name__}.interpolate with method={method} is " 

8438 "deprecated and will raise in a future version. " 

8439 "Use obj.ffill() or obj.bfill() instead.", 

8440 FutureWarning, 

8441 stacklevel=find_stack_level(), 

8442 ) 

8443 obj, should_transpose = self, False 

8444 else: 

8445 obj, should_transpose = (self.T, True) if axis == 1 else (self, False) 

8446 if np.any(obj.dtypes == object): 

8447 # GH#53631 

8448 if not (obj.ndim == 2 and np.all(obj.dtypes == object)): 

8449 # don't warn in cases that already raise 

8450 warnings.warn( 

8451 f"{type(self).__name__}.interpolate with object dtype is " 

8452 "deprecated and will raise in a future version. Call " 

8453 "obj.infer_objects(copy=False) before interpolating instead.", 

8454 FutureWarning, 

8455 stacklevel=find_stack_level(), 

8456 ) 

8457 

8458 if method in fillna_methods and "fill_value" in kwargs: 

8459 raise ValueError( 

8460 "'fill_value' is not a valid keyword for " 

8461 f"{type(self).__name__}.interpolate with method from " 

8462 f"{fillna_methods}" 

8463 ) 

8464 

8465 if isinstance(obj.index, MultiIndex) and method != "linear": 

8466 raise ValueError( 

8467 "Only `method=linear` interpolation is supported on MultiIndexes." 

8468 ) 

8469 

8470 limit_direction = missing.infer_limit_direction(limit_direction, method) 

8471 

8472 if obj.ndim == 2 and np.all(obj.dtypes == object): 

8473 raise TypeError( 

8474 "Cannot interpolate with all object-dtype columns " 

8475 "in the DataFrame. Try setting at least one " 

8476 "column to a numeric dtype." 

8477 ) 

8478 

8479 if method.lower() in fillna_methods: 

8480 # TODO(3.0): remove this case 

8481 # TODO: warn/raise on limit_direction or kwargs which are ignored? 

8482 # as of 2023-06-26 no tests get here with either 

8483 if not self._mgr.is_single_block and axis == 1: 

8484 # GH#53898 

8485 if inplace: 

8486 raise NotImplementedError() 

8487 obj, axis, should_transpose = self.T, 1 - axis, True 

8488 

8489 new_data = obj._mgr.pad_or_backfill( 

8490 method=method, 

8491 axis=self._get_block_manager_axis(axis), 

8492 limit=limit, 

8493 limit_area=limit_area, 

8494 inplace=inplace, 

8495 downcast=downcast, 

8496 ) 

8497 else: 

8498 index = missing.get_interp_index(method, obj.index) 

8499 new_data = obj._mgr.interpolate( 

8500 method=method, 

8501 index=index, 

8502 limit=limit, 

8503 limit_direction=limit_direction, 

8504 limit_area=limit_area, 

8505 inplace=inplace, 

8506 downcast=downcast, 

8507 **kwargs, 

8508 ) 

8509 

8510 result = self._constructor_from_mgr(new_data, axes=new_data.axes) 

8511 if should_transpose: 

8512 result = result.T 

8513 if inplace: 

8514 return self._update_inplace(result) 

8515 else: 

8516 return result.__finalize__(self, method="interpolate") 

8517 

8518 # ---------------------------------------------------------------------- 

8519 # Timeseries methods Methods 

8520 

8521 @final 

8522 def asof(self, where, subset=None): 

8523 """ 

8524 Return the last row(s) without any NaNs before `where`. 

8525 

8526 The last row (for each element in `where`, if list) without any 

8527 NaN is taken. 

8528 In case of a :class:`~pandas.DataFrame`, the last row without NaN 

8529 considering only the subset of columns (if not `None`) 

8530 

8531 If there is no good value, NaN is returned for a Series or 

8532 a Series of NaN values for a DataFrame 

8533 

8534 Parameters 

8535 ---------- 

8536 where : date or array-like of dates 

8537 Date(s) before which the last row(s) are returned. 

8538 subset : str or array-like of str, default `None` 

8539 For DataFrame, if not `None`, only use these columns to 

8540 check for NaNs. 

8541 

8542 Returns 

8543 ------- 

8544 scalar, Series, or DataFrame 

8545 

8546 The return can be: 

8547 

8548 * scalar : when `self` is a Series and `where` is a scalar 

8549 * Series: when `self` is a Series and `where` is an array-like, 

8550 or when `self` is a DataFrame and `where` is a scalar 

8551 * DataFrame : when `self` is a DataFrame and `where` is an 

8552 array-like 

8553 

8554 See Also 

8555 -------- 

8556 merge_asof : Perform an asof merge. Similar to left join. 

8557 

8558 Notes 

8559 ----- 

8560 Dates are assumed to be sorted. Raises if this is not the case. 

8561 

8562 Examples 

8563 -------- 

8564 A Series and a scalar `where`. 

8565 

8566 >>> s = pd.Series([1, 2, np.nan, 4], index=[10, 20, 30, 40]) 

8567 >>> s 

8568 10 1.0 

8569 20 2.0 

8570 30 NaN 

8571 40 4.0 

8572 dtype: float64 

8573 

8574 >>> s.asof(20) 

8575 2.0 

8576 

8577 For a sequence `where`, a Series is returned. The first value is 

8578 NaN, because the first element of `where` is before the first 

8579 index value. 

8580 

8581 >>> s.asof([5, 20]) 

8582 5 NaN 

8583 20 2.0 

8584 dtype: float64 

8585 

8586 Missing values are not considered. The following is ``2.0``, not 

8587 NaN, even though NaN is at the index location for ``30``. 

8588 

8589 >>> s.asof(30) 

8590 2.0 

8591 

8592 Take all columns into consideration 

8593 

8594 >>> df = pd.DataFrame({'a': [10., 20., 30., 40., 50.], 

8595 ... 'b': [None, None, None, None, 500]}, 

8596 ... index=pd.DatetimeIndex(['2018-02-27 09:01:00', 

8597 ... '2018-02-27 09:02:00', 

8598 ... '2018-02-27 09:03:00', 

8599 ... '2018-02-27 09:04:00', 

8600 ... '2018-02-27 09:05:00'])) 

8601 >>> df.asof(pd.DatetimeIndex(['2018-02-27 09:03:30', 

8602 ... '2018-02-27 09:04:30'])) 

8603 a b 

8604 2018-02-27 09:03:30 NaN NaN 

8605 2018-02-27 09:04:30 NaN NaN 

8606 

8607 Take a single column into consideration 

8608 

8609 >>> df.asof(pd.DatetimeIndex(['2018-02-27 09:03:30', 

8610 ... '2018-02-27 09:04:30']), 

8611 ... subset=['a']) 

8612 a b 

8613 2018-02-27 09:03:30 30.0 NaN 

8614 2018-02-27 09:04:30 40.0 NaN 

8615 """ 

8616 if isinstance(where, str): 

8617 where = Timestamp(where) 

8618 

8619 if not self.index.is_monotonic_increasing: 

8620 raise ValueError("asof requires a sorted index") 

8621 

8622 is_series = isinstance(self, ABCSeries) 

8623 if is_series: 

8624 if subset is not None: 

8625 raise ValueError("subset is not valid for Series") 

8626 else: 

8627 if subset is None: 

8628 subset = self.columns 

8629 if not is_list_like(subset): 

8630 subset = [subset] 

8631 

8632 is_list = is_list_like(where) 

8633 if not is_list: 

8634 start = self.index[0] 

8635 if isinstance(self.index, PeriodIndex): 

8636 where = Period(where, freq=self.index.freq) 

8637 

8638 if where < start: 

8639 if not is_series: 

8640 return self._constructor_sliced( 

8641 index=self.columns, name=where, dtype=np.float64 

8642 ) 

8643 return np.nan 

8644 

8645 # It's always much faster to use a *while* loop here for 

8646 # Series than pre-computing all the NAs. However a 

8647 # *while* loop is extremely expensive for DataFrame 

8648 # so we later pre-compute all the NAs and use the same 

8649 # code path whether *where* is a scalar or list. 

8650 # See PR: https://github.com/pandas-dev/pandas/pull/14476 

8651 if is_series: 

8652 loc = self.index.searchsorted(where, side="right") 

8653 if loc > 0: 

8654 loc -= 1 

8655 

8656 values = self._values 

8657 while loc > 0 and isna(values[loc]): 

8658 loc -= 1 

8659 return values[loc] 

8660 

8661 if not isinstance(where, Index): 

8662 where = Index(where) if is_list else Index([where]) 

8663 

8664 nulls = self.isna() if is_series else self[subset].isna().any(axis=1) 

8665 if nulls.all(): 

8666 if is_series: 

8667 self = cast("Series", self) 

8668 return self._constructor(np.nan, index=where, name=self.name) 

8669 elif is_list: 

8670 self = cast("DataFrame", self) 

8671 return self._constructor(np.nan, index=where, columns=self.columns) 

8672 else: 

8673 self = cast("DataFrame", self) 

8674 return self._constructor_sliced( 

8675 np.nan, index=self.columns, name=where[0] 

8676 ) 

8677 

8678 locs = self.index.asof_locs(where, ~(nulls._values)) 

8679 

8680 # mask the missing 

8681 mask = locs == -1 

8682 data = self.take(locs) 

8683 data.index = where 

8684 if mask.any(): 

8685 # GH#16063 only do this setting when necessary, otherwise 

8686 # we'd cast e.g. bools to floats 

8687 data.loc[mask] = np.nan 

8688 return data if is_list else data.iloc[-1] 

8689 

8690 # ---------------------------------------------------------------------- 

8691 # Action Methods 

8692 

8693 @doc(klass=_shared_doc_kwargs["klass"]) 

8694 def isna(self) -> Self: 

8695 """ 

8696 Detect missing values. 

8697 

8698 Return a boolean same-sized object indicating if the values are NA. 

8699 NA values, such as None or :attr:`numpy.NaN`, gets mapped to True 

8700 values. 

8701 Everything else gets mapped to False values. Characters such as empty 

8702 strings ``''`` or :attr:`numpy.inf` are not considered NA values 

8703 (unless you set ``pandas.options.mode.use_inf_as_na = True``). 

8704 

8705 Returns 

8706 ------- 

8707 {klass} 

8708 Mask of bool values for each element in {klass} that 

8709 indicates whether an element is an NA value. 

8710 

8711 See Also 

8712 -------- 

8713 {klass}.isnull : Alias of isna. 

8714 {klass}.notna : Boolean inverse of isna. 

8715 {klass}.dropna : Omit axes labels with missing values. 

8716 isna : Top-level isna. 

8717 

8718 Examples 

8719 -------- 

8720 Show which entries in a DataFrame are NA. 

8721 

8722 >>> df = pd.DataFrame(dict(age=[5, 6, np.nan], 

8723 ... born=[pd.NaT, pd.Timestamp('1939-05-27'), 

8724 ... pd.Timestamp('1940-04-25')], 

8725 ... name=['Alfred', 'Batman', ''], 

8726 ... toy=[None, 'Batmobile', 'Joker'])) 

8727 >>> df 

8728 age born name toy 

8729 0 5.0 NaT Alfred None 

8730 1 6.0 1939-05-27 Batman Batmobile 

8731 2 NaN 1940-04-25 Joker 

8732 

8733 >>> df.isna() 

8734 age born name toy 

8735 0 False True False True 

8736 1 False False False False 

8737 2 True False False False 

8738 

8739 Show which entries in a Series are NA. 

8740 

8741 >>> ser = pd.Series([5, 6, np.nan]) 

8742 >>> ser 

8743 0 5.0 

8744 1 6.0 

8745 2 NaN 

8746 dtype: float64 

8747 

8748 >>> ser.isna() 

8749 0 False 

8750 1 False 

8751 2 True 

8752 dtype: bool 

8753 """ 

8754 return isna(self).__finalize__(self, method="isna") 

8755 

8756 @doc(isna, klass=_shared_doc_kwargs["klass"]) 

8757 def isnull(self) -> Self: 

8758 return isna(self).__finalize__(self, method="isnull") 

8759 

8760 @doc(klass=_shared_doc_kwargs["klass"]) 

8761 def notna(self) -> Self: 

8762 """ 

8763 Detect existing (non-missing) values. 

8764 

8765 Return a boolean same-sized object indicating if the values are not NA. 

8766 Non-missing values get mapped to True. Characters such as empty 

8767 strings ``''`` or :attr:`numpy.inf` are not considered NA values 

8768 (unless you set ``pandas.options.mode.use_inf_as_na = True``). 

8769 NA values, such as None or :attr:`numpy.NaN`, get mapped to False 

8770 values. 

8771 

8772 Returns 

8773 ------- 

8774 {klass} 

8775 Mask of bool values for each element in {klass} that 

8776 indicates whether an element is not an NA value. 

8777 

8778 See Also 

8779 -------- 

8780 {klass}.notnull : Alias of notna. 

8781 {klass}.isna : Boolean inverse of notna. 

8782 {klass}.dropna : Omit axes labels with missing values. 

8783 notna : Top-level notna. 

8784 

8785 Examples 

8786 -------- 

8787 Show which entries in a DataFrame are not NA. 

8788 

8789 >>> df = pd.DataFrame(dict(age=[5, 6, np.nan], 

8790 ... born=[pd.NaT, pd.Timestamp('1939-05-27'), 

8791 ... pd.Timestamp('1940-04-25')], 

8792 ... name=['Alfred', 'Batman', ''], 

8793 ... toy=[None, 'Batmobile', 'Joker'])) 

8794 >>> df 

8795 age born name toy 

8796 0 5.0 NaT Alfred None 

8797 1 6.0 1939-05-27 Batman Batmobile 

8798 2 NaN 1940-04-25 Joker 

8799 

8800 >>> df.notna() 

8801 age born name toy 

8802 0 True False True False 

8803 1 True True True True 

8804 2 False True True True 

8805 

8806 Show which entries in a Series are not NA. 

8807 

8808 >>> ser = pd.Series([5, 6, np.nan]) 

8809 >>> ser 

8810 0 5.0 

8811 1 6.0 

8812 2 NaN 

8813 dtype: float64 

8814 

8815 >>> ser.notna() 

8816 0 True 

8817 1 True 

8818 2 False 

8819 dtype: bool 

8820 """ 

8821 return notna(self).__finalize__(self, method="notna") 

8822 

8823 @doc(notna, klass=_shared_doc_kwargs["klass"]) 

8824 def notnull(self) -> Self: 

8825 return notna(self).__finalize__(self, method="notnull") 

8826 

8827 @final 

8828 def _clip_with_scalar(self, lower, upper, inplace: bool_t = False): 

8829 if (lower is not None and np.any(isna(lower))) or ( 

8830 upper is not None and np.any(isna(upper)) 

8831 ): 

8832 raise ValueError("Cannot use an NA value as a clip threshold") 

8833 

8834 result = self 

8835 mask = self.isna() 

8836 

8837 if lower is not None: 

8838 cond = mask | (self >= lower) 

8839 result = result.where( 

8840 cond, lower, inplace=inplace 

8841 ) # type: ignore[assignment] 

8842 if upper is not None: 

8843 cond = mask | (self <= upper) 

8844 result = self if inplace else result 

8845 result = result.where( 

8846 cond, upper, inplace=inplace 

8847 ) # type: ignore[assignment] 

8848 

8849 return result 

8850 

8851 @final 

8852 def _clip_with_one_bound(self, threshold, method, axis, inplace): 

8853 if axis is not None: 

8854 axis = self._get_axis_number(axis) 

8855 

8856 # method is self.le for upper bound and self.ge for lower bound 

8857 if is_scalar(threshold) and is_number(threshold): 

8858 if method.__name__ == "le": 

8859 return self._clip_with_scalar(None, threshold, inplace=inplace) 

8860 return self._clip_with_scalar(threshold, None, inplace=inplace) 

8861 

8862 # GH #15390 

8863 # In order for where method to work, the threshold must 

8864 # be transformed to NDFrame from other array like structure. 

8865 if (not isinstance(threshold, ABCSeries)) and is_list_like(threshold): 

8866 if isinstance(self, ABCSeries): 

8867 threshold = self._constructor(threshold, index=self.index) 

8868 else: 

8869 threshold = self._align_for_op(threshold, axis, flex=None)[1] 

8870 

8871 # GH 40420 

8872 # Treat missing thresholds as no bounds, not clipping the values 

8873 if is_list_like(threshold): 

8874 fill_value = np.inf if method.__name__ == "le" else -np.inf 

8875 threshold_inf = threshold.fillna(fill_value) 

8876 else: 

8877 threshold_inf = threshold 

8878 

8879 subset = method(threshold_inf, axis=axis) | isna(self) 

8880 

8881 # GH 40420 

8882 return self.where(subset, threshold, axis=axis, inplace=inplace) 

8883 

8884 @overload 

8885 def clip( 

8886 self, 

8887 lower=..., 

8888 upper=..., 

8889 *, 

8890 axis: Axis | None = ..., 

8891 inplace: Literal[False] = ..., 

8892 **kwargs, 

8893 ) -> Self: 

8894 ... 

8895 

8896 @overload 

8897 def clip( 

8898 self, 

8899 lower=..., 

8900 upper=..., 

8901 *, 

8902 axis: Axis | None = ..., 

8903 inplace: Literal[True], 

8904 **kwargs, 

8905 ) -> None: 

8906 ... 

8907 

8908 @overload 

8909 def clip( 

8910 self, 

8911 lower=..., 

8912 upper=..., 

8913 *, 

8914 axis: Axis | None = ..., 

8915 inplace: bool_t = ..., 

8916 **kwargs, 

8917 ) -> Self | None: 

8918 ... 

8919 

8920 @final 

8921 def clip( 

8922 self, 

8923 lower=None, 

8924 upper=None, 

8925 *, 

8926 axis: Axis | None = None, 

8927 inplace: bool_t = False, 

8928 **kwargs, 

8929 ) -> Self | None: 

8930 """ 

8931 Trim values at input threshold(s). 

8932 

8933 Assigns values outside boundary to boundary values. Thresholds 

8934 can be singular values or array like, and in the latter case 

8935 the clipping is performed element-wise in the specified axis. 

8936 

8937 Parameters 

8938 ---------- 

8939 lower : float or array-like, default None 

8940 Minimum threshold value. All values below this 

8941 threshold will be set to it. A missing 

8942 threshold (e.g `NA`) will not clip the value. 

8943 upper : float or array-like, default None 

8944 Maximum threshold value. All values above this 

8945 threshold will be set to it. A missing 

8946 threshold (e.g `NA`) will not clip the value. 

8947 axis : {{0 or 'index', 1 or 'columns', None}}, default None 

8948 Align object with lower and upper along the given axis. 

8949 For `Series` this parameter is unused and defaults to `None`. 

8950 inplace : bool, default False 

8951 Whether to perform the operation in place on the data. 

8952 *args, **kwargs 

8953 Additional keywords have no effect but might be accepted 

8954 for compatibility with numpy. 

8955 

8956 Returns 

8957 ------- 

8958 Series or DataFrame or None 

8959 Same type as calling object with the values outside the 

8960 clip boundaries replaced or None if ``inplace=True``. 

8961 

8962 See Also 

8963 -------- 

8964 Series.clip : Trim values at input threshold in series. 

8965 DataFrame.clip : Trim values at input threshold in dataframe. 

8966 numpy.clip : Clip (limit) the values in an array. 

8967 

8968 Examples 

8969 -------- 

8970 >>> data = {'col_0': [9, -3, 0, -1, 5], 'col_1': [-2, -7, 6, 8, -5]} 

8971 >>> df = pd.DataFrame(data) 

8972 >>> df 

8973 col_0 col_1 

8974 0 9 -2 

8975 1 -3 -7 

8976 2 0 6 

8977 3 -1 8 

8978 4 5 -5 

8979 

8980 Clips per column using lower and upper thresholds: 

8981 

8982 >>> df.clip(-4, 6) 

8983 col_0 col_1 

8984 0 6 -2 

8985 1 -3 -4 

8986 2 0 6 

8987 3 -1 6 

8988 4 5 -4 

8989 

8990 Clips using specific lower and upper thresholds per column: 

8991 

8992 >>> df.clip([-2, -1], [4, 5]) 

8993 col_0 col_1 

8994 0 4 -1 

8995 1 -2 -1 

8996 2 0 5 

8997 3 -1 5 

8998 4 4 -1 

8999 

9000 Clips using specific lower and upper thresholds per column element: 

9001 

9002 >>> t = pd.Series([2, -4, -1, 6, 3]) 

9003 >>> t 

9004 0 2 

9005 1 -4 

9006 2 -1 

9007 3 6 

9008 4 3 

9009 dtype: int64 

9010 

9011 >>> df.clip(t, t + 4, axis=0) 

9012 col_0 col_1 

9013 0 6 2 

9014 1 -3 -4 

9015 2 0 3 

9016 3 6 8 

9017 4 5 3 

9018 

9019 Clips using specific lower threshold per column element, with missing values: 

9020 

9021 >>> t = pd.Series([2, -4, np.nan, 6, 3]) 

9022 >>> t 

9023 0 2.0 

9024 1 -4.0 

9025 2 NaN 

9026 3 6.0 

9027 4 3.0 

9028 dtype: float64 

9029 

9030 >>> df.clip(t, axis=0) 

9031 col_0 col_1 

9032 0 9 2 

9033 1 -3 -4 

9034 2 0 6 

9035 3 6 8 

9036 4 5 3 

9037 """ 

9038 inplace = validate_bool_kwarg(inplace, "inplace") 

9039 

9040 if inplace: 

9041 if not PYPY and using_copy_on_write(): 

9042 if sys.getrefcount(self) <= REF_COUNT: 

9043 warnings.warn( 

9044 _chained_assignment_method_msg, 

9045 ChainedAssignmentError, 

9046 stacklevel=2, 

9047 ) 

9048 elif ( 

9049 not PYPY 

9050 and not using_copy_on_write() 

9051 and self._is_view_after_cow_rules() 

9052 ): 

9053 ctr = sys.getrefcount(self) 

9054 ref_count = REF_COUNT 

9055 if isinstance(self, ABCSeries) and hasattr(self, "_cacher"): 

9056 # see https://github.com/pandas-dev/pandas/pull/56060#discussion_r1399245221 

9057 ref_count += 1 

9058 if ctr <= ref_count: 

9059 warnings.warn( 

9060 _chained_assignment_warning_method_msg, 

9061 FutureWarning, 

9062 stacklevel=2, 

9063 ) 

9064 

9065 axis = nv.validate_clip_with_axis(axis, (), kwargs) 

9066 if axis is not None: 

9067 axis = self._get_axis_number(axis) 

9068 

9069 # GH 17276 

9070 # numpy doesn't like NaN as a clip value 

9071 # so ignore 

9072 # GH 19992 

9073 # numpy doesn't drop a list-like bound containing NaN 

9074 isna_lower = isna(lower) 

9075 if not is_list_like(lower): 

9076 if np.any(isna_lower): 

9077 lower = None 

9078 elif np.all(isna_lower): 

9079 lower = None 

9080 isna_upper = isna(upper) 

9081 if not is_list_like(upper): 

9082 if np.any(isna_upper): 

9083 upper = None 

9084 elif np.all(isna_upper): 

9085 upper = None 

9086 

9087 # GH 2747 (arguments were reversed) 

9088 if ( 

9089 lower is not None 

9090 and upper is not None 

9091 and is_scalar(lower) 

9092 and is_scalar(upper) 

9093 ): 

9094 lower, upper = min(lower, upper), max(lower, upper) 

9095 

9096 # fast-path for scalars 

9097 if (lower is None or is_number(lower)) and (upper is None or is_number(upper)): 

9098 return self._clip_with_scalar(lower, upper, inplace=inplace) 

9099 

9100 result = self 

9101 if lower is not None: 

9102 result = result._clip_with_one_bound( 

9103 lower, method=self.ge, axis=axis, inplace=inplace 

9104 ) 

9105 if upper is not None: 

9106 if inplace: 

9107 result = self 

9108 result = result._clip_with_one_bound( 

9109 upper, method=self.le, axis=axis, inplace=inplace 

9110 ) 

9111 

9112 return result 

9113 

9114 @final 

9115 @doc(klass=_shared_doc_kwargs["klass"]) 

9116 def asfreq( 

9117 self, 

9118 freq: Frequency, 

9119 method: FillnaOptions | None = None, 

9120 how: Literal["start", "end"] | None = None, 

9121 normalize: bool_t = False, 

9122 fill_value: Hashable | None = None, 

9123 ) -> Self: 

9124 """ 

9125 Convert time series to specified frequency. 

9126 

9127 Returns the original data conformed to a new index with the specified 

9128 frequency. 

9129 

9130 If the index of this {klass} is a :class:`~pandas.PeriodIndex`, the new index 

9131 is the result of transforming the original index with 

9132 :meth:`PeriodIndex.asfreq <pandas.PeriodIndex.asfreq>` (so the original index 

9133 will map one-to-one to the new index). 

9134 

9135 Otherwise, the new index will be equivalent to ``pd.date_range(start, end, 

9136 freq=freq)`` where ``start`` and ``end`` are, respectively, the first and 

9137 last entries in the original index (see :func:`pandas.date_range`). The 

9138 values corresponding to any timesteps in the new index which were not present 

9139 in the original index will be null (``NaN``), unless a method for filling 

9140 such unknowns is provided (see the ``method`` parameter below). 

9141 

9142 The :meth:`resample` method is more appropriate if an operation on each group of 

9143 timesteps (such as an aggregate) is necessary to represent the data at the new 

9144 frequency. 

9145 

9146 Parameters 

9147 ---------- 

9148 freq : DateOffset or str 

9149 Frequency DateOffset or string. 

9150 method : {{'backfill'/'bfill', 'pad'/'ffill'}}, default None 

9151 Method to use for filling holes in reindexed Series (note this 

9152 does not fill NaNs that already were present): 

9153 

9154 * 'pad' / 'ffill': propagate last valid observation forward to next 

9155 valid 

9156 * 'backfill' / 'bfill': use NEXT valid observation to fill. 

9157 how : {{'start', 'end'}}, default end 

9158 For PeriodIndex only (see PeriodIndex.asfreq). 

9159 normalize : bool, default False 

9160 Whether to reset output index to midnight. 

9161 fill_value : scalar, optional 

9162 Value to use for missing values, applied during upsampling (note 

9163 this does not fill NaNs that already were present). 

9164 

9165 Returns 

9166 ------- 

9167 {klass} 

9168 {klass} object reindexed to the specified frequency. 

9169 

9170 See Also 

9171 -------- 

9172 reindex : Conform DataFrame to new index with optional filling logic. 

9173 

9174 Notes 

9175 ----- 

9176 To learn more about the frequency strings, please see `this link 

9177 <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`__. 

9178 

9179 Examples 

9180 -------- 

9181 Start by creating a series with 4 one minute timestamps. 

9182 

9183 >>> index = pd.date_range('1/1/2000', periods=4, freq='min') 

9184 >>> series = pd.Series([0.0, None, 2.0, 3.0], index=index) 

9185 >>> df = pd.DataFrame({{'s': series}}) 

9186 >>> df 

9187 s 

9188 2000-01-01 00:00:00 0.0 

9189 2000-01-01 00:01:00 NaN 

9190 2000-01-01 00:02:00 2.0 

9191 2000-01-01 00:03:00 3.0 

9192 

9193 Upsample the series into 30 second bins. 

9194 

9195 >>> df.asfreq(freq='30s') 

9196 s 

9197 2000-01-01 00:00:00 0.0 

9198 2000-01-01 00:00:30 NaN 

9199 2000-01-01 00:01:00 NaN 

9200 2000-01-01 00:01:30 NaN 

9201 2000-01-01 00:02:00 2.0 

9202 2000-01-01 00:02:30 NaN 

9203 2000-01-01 00:03:00 3.0 

9204 

9205 Upsample again, providing a ``fill value``. 

9206 

9207 >>> df.asfreq(freq='30s', fill_value=9.0) 

9208 s 

9209 2000-01-01 00:00:00 0.0 

9210 2000-01-01 00:00:30 9.0 

9211 2000-01-01 00:01:00 NaN 

9212 2000-01-01 00:01:30 9.0 

9213 2000-01-01 00:02:00 2.0 

9214 2000-01-01 00:02:30 9.0 

9215 2000-01-01 00:03:00 3.0 

9216 

9217 Upsample again, providing a ``method``. 

9218 

9219 >>> df.asfreq(freq='30s', method='bfill') 

9220 s 

9221 2000-01-01 00:00:00 0.0 

9222 2000-01-01 00:00:30 NaN 

9223 2000-01-01 00:01:00 NaN 

9224 2000-01-01 00:01:30 2.0 

9225 2000-01-01 00:02:00 2.0 

9226 2000-01-01 00:02:30 3.0 

9227 2000-01-01 00:03:00 3.0 

9228 """ 

9229 from pandas.core.resample import asfreq 

9230 

9231 return asfreq( 

9232 self, 

9233 freq, 

9234 method=method, 

9235 how=how, 

9236 normalize=normalize, 

9237 fill_value=fill_value, 

9238 ) 

9239 

9240 @final 

9241 def at_time(self, time, asof: bool_t = False, axis: Axis | None = None) -> Self: 

9242 """ 

9243 Select values at particular time of day (e.g., 9:30AM). 

9244 

9245 Parameters 

9246 ---------- 

9247 time : datetime.time or str 

9248 The values to select. 

9249 axis : {0 or 'index', 1 or 'columns'}, default 0 

9250 For `Series` this parameter is unused and defaults to 0. 

9251 

9252 Returns 

9253 ------- 

9254 Series or DataFrame 

9255 

9256 Raises 

9257 ------ 

9258 TypeError 

9259 If the index is not a :class:`DatetimeIndex` 

9260 

9261 See Also 

9262 -------- 

9263 between_time : Select values between particular times of the day. 

9264 first : Select initial periods of time series based on a date offset. 

9265 last : Select final periods of time series based on a date offset. 

9266 DatetimeIndex.indexer_at_time : Get just the index locations for 

9267 values at particular time of the day. 

9268 

9269 Examples 

9270 -------- 

9271 >>> i = pd.date_range('2018-04-09', periods=4, freq='12h') 

9272 >>> ts = pd.DataFrame({'A': [1, 2, 3, 4]}, index=i) 

9273 >>> ts 

9274 A 

9275 2018-04-09 00:00:00 1 

9276 2018-04-09 12:00:00 2 

9277 2018-04-10 00:00:00 3 

9278 2018-04-10 12:00:00 4 

9279 

9280 >>> ts.at_time('12:00') 

9281 A 

9282 2018-04-09 12:00:00 2 

9283 2018-04-10 12:00:00 4 

9284 """ 

9285 if axis is None: 

9286 axis = 0 

9287 axis = self._get_axis_number(axis) 

9288 

9289 index = self._get_axis(axis) 

9290 

9291 if not isinstance(index, DatetimeIndex): 

9292 raise TypeError("Index must be DatetimeIndex") 

9293 

9294 indexer = index.indexer_at_time(time, asof=asof) 

9295 return self._take_with_is_copy(indexer, axis=axis) 

9296 

9297 @final 

9298 def between_time( 

9299 self, 

9300 start_time, 

9301 end_time, 

9302 inclusive: IntervalClosedType = "both", 

9303 axis: Axis | None = None, 

9304 ) -> Self: 

9305 """ 

9306 Select values between particular times of the day (e.g., 9:00-9:30 AM). 

9307 

9308 By setting ``start_time`` to be later than ``end_time``, 

9309 you can get the times that are *not* between the two times. 

9310 

9311 Parameters 

9312 ---------- 

9313 start_time : datetime.time or str 

9314 Initial time as a time filter limit. 

9315 end_time : datetime.time or str 

9316 End time as a time filter limit. 

9317 inclusive : {"both", "neither", "left", "right"}, default "both" 

9318 Include boundaries; whether to set each bound as closed or open. 

9319 axis : {0 or 'index', 1 or 'columns'}, default 0 

9320 Determine range time on index or columns value. 

9321 For `Series` this parameter is unused and defaults to 0. 

9322 

9323 Returns 

9324 ------- 

9325 Series or DataFrame 

9326 Data from the original object filtered to the specified dates range. 

9327 

9328 Raises 

9329 ------ 

9330 TypeError 

9331 If the index is not a :class:`DatetimeIndex` 

9332 

9333 See Also 

9334 -------- 

9335 at_time : Select values at a particular time of the day. 

9336 first : Select initial periods of time series based on a date offset. 

9337 last : Select final periods of time series based on a date offset. 

9338 DatetimeIndex.indexer_between_time : Get just the index locations for 

9339 values between particular times of the day. 

9340 

9341 Examples 

9342 -------- 

9343 >>> i = pd.date_range('2018-04-09', periods=4, freq='1D20min') 

9344 >>> ts = pd.DataFrame({'A': [1, 2, 3, 4]}, index=i) 

9345 >>> ts 

9346 A 

9347 2018-04-09 00:00:00 1 

9348 2018-04-10 00:20:00 2 

9349 2018-04-11 00:40:00 3 

9350 2018-04-12 01:00:00 4 

9351 

9352 >>> ts.between_time('0:15', '0:45') 

9353 A 

9354 2018-04-10 00:20:00 2 

9355 2018-04-11 00:40:00 3 

9356 

9357 You get the times that are *not* between two times by setting 

9358 ``start_time`` later than ``end_time``: 

9359 

9360 >>> ts.between_time('0:45', '0:15') 

9361 A 

9362 2018-04-09 00:00:00 1 

9363 2018-04-12 01:00:00 4 

9364 """ 

9365 if axis is None: 

9366 axis = 0 

9367 axis = self._get_axis_number(axis) 

9368 

9369 index = self._get_axis(axis) 

9370 if not isinstance(index, DatetimeIndex): 

9371 raise TypeError("Index must be DatetimeIndex") 

9372 

9373 left_inclusive, right_inclusive = validate_inclusive(inclusive) 

9374 indexer = index.indexer_between_time( 

9375 start_time, 

9376 end_time, 

9377 include_start=left_inclusive, 

9378 include_end=right_inclusive, 

9379 ) 

9380 return self._take_with_is_copy(indexer, axis=axis) 

9381 

9382 @final 

9383 @doc(klass=_shared_doc_kwargs["klass"]) 

9384 def resample( 

9385 self, 

9386 rule, 

9387 axis: Axis | lib.NoDefault = lib.no_default, 

9388 closed: Literal["right", "left"] | None = None, 

9389 label: Literal["right", "left"] | None = None, 

9390 convention: Literal["start", "end", "s", "e"] | lib.NoDefault = lib.no_default, 

9391 kind: Literal["timestamp", "period"] | None | lib.NoDefault = lib.no_default, 

9392 on: Level | None = None, 

9393 level: Level | None = None, 

9394 origin: str | TimestampConvertibleTypes = "start_day", 

9395 offset: TimedeltaConvertibleTypes | None = None, 

9396 group_keys: bool_t = False, 

9397 ) -> Resampler: 

9398 """ 

9399 Resample time-series data. 

9400 

9401 Convenience method for frequency conversion and resampling of time series. 

9402 The object must have a datetime-like index (`DatetimeIndex`, `PeriodIndex`, 

9403 or `TimedeltaIndex`), or the caller must pass the label of a datetime-like 

9404 series/index to the ``on``/``level`` keyword parameter. 

9405 

9406 Parameters 

9407 ---------- 

9408 rule : DateOffset, Timedelta or str 

9409 The offset string or object representing target conversion. 

9410 axis : {{0 or 'index', 1 or 'columns'}}, default 0 

9411 Which axis to use for up- or down-sampling. For `Series` this parameter 

9412 is unused and defaults to 0. Must be 

9413 `DatetimeIndex`, `TimedeltaIndex` or `PeriodIndex`. 

9414 

9415 .. deprecated:: 2.0.0 

9416 Use frame.T.resample(...) instead. 

9417 closed : {{'right', 'left'}}, default None 

9418 Which side of bin interval is closed. The default is 'left' 

9419 for all frequency offsets except for 'ME', 'YE', 'QE', 'BME', 

9420 'BA', 'BQE', and 'W' which all have a default of 'right'. 

9421 label : {{'right', 'left'}}, default None 

9422 Which bin edge label to label bucket with. The default is 'left' 

9423 for all frequency offsets except for 'ME', 'YE', 'QE', 'BME', 

9424 'BA', 'BQE', and 'W' which all have a default of 'right'. 

9425 convention : {{'start', 'end', 's', 'e'}}, default 'start' 

9426 For `PeriodIndex` only, controls whether to use the start or 

9427 end of `rule`. 

9428 

9429 .. deprecated:: 2.2.0 

9430 Convert PeriodIndex to DatetimeIndex before resampling instead. 

9431 kind : {{'timestamp', 'period'}}, optional, default None 

9432 Pass 'timestamp' to convert the resulting index to a 

9433 `DateTimeIndex` or 'period' to convert it to a `PeriodIndex`. 

9434 By default the input representation is retained. 

9435 

9436 .. deprecated:: 2.2.0 

9437 Convert index to desired type explicitly instead. 

9438 

9439 on : str, optional 

9440 For a DataFrame, column to use instead of index for resampling. 

9441 Column must be datetime-like. 

9442 level : str or int, optional 

9443 For a MultiIndex, level (name or number) to use for 

9444 resampling. `level` must be datetime-like. 

9445 origin : Timestamp or str, default 'start_day' 

9446 The timestamp on which to adjust the grouping. The timezone of origin 

9447 must match the timezone of the index. 

9448 If string, must be one of the following: 

9449 

9450 - 'epoch': `origin` is 1970-01-01 

9451 - 'start': `origin` is the first value of the timeseries 

9452 - 'start_day': `origin` is the first day at midnight of the timeseries 

9453 

9454 - 'end': `origin` is the last value of the timeseries 

9455 - 'end_day': `origin` is the ceiling midnight of the last day 

9456 

9457 .. versionadded:: 1.3.0 

9458 

9459 .. note:: 

9460 

9461 Only takes effect for Tick-frequencies (i.e. fixed frequencies like 

9462 days, hours, and minutes, rather than months or quarters). 

9463 offset : Timedelta or str, default is None 

9464 An offset timedelta added to the origin. 

9465 

9466 group_keys : bool, default False 

9467 Whether to include the group keys in the result index when using 

9468 ``.apply()`` on the resampled object. 

9469 

9470 .. versionadded:: 1.5.0 

9471 

9472 Not specifying ``group_keys`` will retain values-dependent behavior 

9473 from pandas 1.4 and earlier (see :ref:`pandas 1.5.0 Release notes 

9474 <whatsnew_150.enhancements.resample_group_keys>` for examples). 

9475 

9476 .. versionchanged:: 2.0.0 

9477 

9478 ``group_keys`` now defaults to ``False``. 

9479 

9480 Returns 

9481 ------- 

9482 pandas.api.typing.Resampler 

9483 :class:`~pandas.core.Resampler` object. 

9484 

9485 See Also 

9486 -------- 

9487 Series.resample : Resample a Series. 

9488 DataFrame.resample : Resample a DataFrame. 

9489 groupby : Group {klass} by mapping, function, label, or list of labels. 

9490 asfreq : Reindex a {klass} with the given frequency without grouping. 

9491 

9492 Notes 

9493 ----- 

9494 See the `user guide 

9495 <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#resampling>`__ 

9496 for more. 

9497 

9498 To learn more about the offset strings, please see `this link 

9499 <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#dateoffset-objects>`__. 

9500 

9501 Examples 

9502 -------- 

9503 Start by creating a series with 9 one minute timestamps. 

9504 

9505 >>> index = pd.date_range('1/1/2000', periods=9, freq='min') 

9506 >>> series = pd.Series(range(9), index=index) 

9507 >>> series 

9508 2000-01-01 00:00:00 0 

9509 2000-01-01 00:01:00 1 

9510 2000-01-01 00:02:00 2 

9511 2000-01-01 00:03:00 3 

9512 2000-01-01 00:04:00 4 

9513 2000-01-01 00:05:00 5 

9514 2000-01-01 00:06:00 6 

9515 2000-01-01 00:07:00 7 

9516 2000-01-01 00:08:00 8 

9517 Freq: min, dtype: int64 

9518 

9519 Downsample the series into 3 minute bins and sum the values 

9520 of the timestamps falling into a bin. 

9521 

9522 >>> series.resample('3min').sum() 

9523 2000-01-01 00:00:00 3 

9524 2000-01-01 00:03:00 12 

9525 2000-01-01 00:06:00 21 

9526 Freq: 3min, dtype: int64 

9527 

9528 Downsample the series into 3 minute bins as above, but label each 

9529 bin using the right edge instead of the left. Please note that the 

9530 value in the bucket used as the label is not included in the bucket, 

9531 which it labels. For example, in the original series the 

9532 bucket ``2000-01-01 00:03:00`` contains the value 3, but the summed 

9533 value in the resampled bucket with the label ``2000-01-01 00:03:00`` 

9534 does not include 3 (if it did, the summed value would be 6, not 3). 

9535 

9536 >>> series.resample('3min', label='right').sum() 

9537 2000-01-01 00:03:00 3 

9538 2000-01-01 00:06:00 12 

9539 2000-01-01 00:09:00 21 

9540 Freq: 3min, dtype: int64 

9541 

9542 To include this value close the right side of the bin interval, 

9543 as shown below. 

9544 

9545 >>> series.resample('3min', label='right', closed='right').sum() 

9546 2000-01-01 00:00:00 0 

9547 2000-01-01 00:03:00 6 

9548 2000-01-01 00:06:00 15 

9549 2000-01-01 00:09:00 15 

9550 Freq: 3min, dtype: int64 

9551 

9552 Upsample the series into 30 second bins. 

9553 

9554 >>> series.resample('30s').asfreq()[0:5] # Select first 5 rows 

9555 2000-01-01 00:00:00 0.0 

9556 2000-01-01 00:00:30 NaN 

9557 2000-01-01 00:01:00 1.0 

9558 2000-01-01 00:01:30 NaN 

9559 2000-01-01 00:02:00 2.0 

9560 Freq: 30s, dtype: float64 

9561 

9562 Upsample the series into 30 second bins and fill the ``NaN`` 

9563 values using the ``ffill`` method. 

9564 

9565 >>> series.resample('30s').ffill()[0:5] 

9566 2000-01-01 00:00:00 0 

9567 2000-01-01 00:00:30 0 

9568 2000-01-01 00:01:00 1 

9569 2000-01-01 00:01:30 1 

9570 2000-01-01 00:02:00 2 

9571 Freq: 30s, dtype: int64 

9572 

9573 Upsample the series into 30 second bins and fill the 

9574 ``NaN`` values using the ``bfill`` method. 

9575 

9576 >>> series.resample('30s').bfill()[0:5] 

9577 2000-01-01 00:00:00 0 

9578 2000-01-01 00:00:30 1 

9579 2000-01-01 00:01:00 1 

9580 2000-01-01 00:01:30 2 

9581 2000-01-01 00:02:00 2 

9582 Freq: 30s, dtype: int64 

9583 

9584 Pass a custom function via ``apply`` 

9585 

9586 >>> def custom_resampler(arraylike): 

9587 ... return np.sum(arraylike) + 5 

9588 ... 

9589 >>> series.resample('3min').apply(custom_resampler) 

9590 2000-01-01 00:00:00 8 

9591 2000-01-01 00:03:00 17 

9592 2000-01-01 00:06:00 26 

9593 Freq: 3min, dtype: int64 

9594 

9595 For DataFrame objects, the keyword `on` can be used to specify the 

9596 column instead of the index for resampling. 

9597 

9598 >>> d = {{'price': [10, 11, 9, 13, 14, 18, 17, 19], 

9599 ... 'volume': [50, 60, 40, 100, 50, 100, 40, 50]}} 

9600 >>> df = pd.DataFrame(d) 

9601 >>> df['week_starting'] = pd.date_range('01/01/2018', 

9602 ... periods=8, 

9603 ... freq='W') 

9604 >>> df 

9605 price volume week_starting 

9606 0 10 50 2018-01-07 

9607 1 11 60 2018-01-14 

9608 2 9 40 2018-01-21 

9609 3 13 100 2018-01-28 

9610 4 14 50 2018-02-04 

9611 5 18 100 2018-02-11 

9612 6 17 40 2018-02-18 

9613 7 19 50 2018-02-25 

9614 >>> df.resample('ME', on='week_starting').mean() 

9615 price volume 

9616 week_starting 

9617 2018-01-31 10.75 62.5 

9618 2018-02-28 17.00 60.0 

9619 

9620 For a DataFrame with MultiIndex, the keyword `level` can be used to 

9621 specify on which level the resampling needs to take place. 

9622 

9623 >>> days = pd.date_range('1/1/2000', periods=4, freq='D') 

9624 >>> d2 = {{'price': [10, 11, 9, 13, 14, 18, 17, 19], 

9625 ... 'volume': [50, 60, 40, 100, 50, 100, 40, 50]}} 

9626 >>> df2 = pd.DataFrame( 

9627 ... d2, 

9628 ... index=pd.MultiIndex.from_product( 

9629 ... [days, ['morning', 'afternoon']] 

9630 ... ) 

9631 ... ) 

9632 >>> df2 

9633 price volume 

9634 2000-01-01 morning 10 50 

9635 afternoon 11 60 

9636 2000-01-02 morning 9 40 

9637 afternoon 13 100 

9638 2000-01-03 morning 14 50 

9639 afternoon 18 100 

9640 2000-01-04 morning 17 40 

9641 afternoon 19 50 

9642 >>> df2.resample('D', level=0).sum() 

9643 price volume 

9644 2000-01-01 21 110 

9645 2000-01-02 22 140 

9646 2000-01-03 32 150 

9647 2000-01-04 36 90 

9648 

9649 If you want to adjust the start of the bins based on a fixed timestamp: 

9650 

9651 >>> start, end = '2000-10-01 23:30:00', '2000-10-02 00:30:00' 

9652 >>> rng = pd.date_range(start, end, freq='7min') 

9653 >>> ts = pd.Series(np.arange(len(rng)) * 3, index=rng) 

9654 >>> ts 

9655 2000-10-01 23:30:00 0 

9656 2000-10-01 23:37:00 3 

9657 2000-10-01 23:44:00 6 

9658 2000-10-01 23:51:00 9 

9659 2000-10-01 23:58:00 12 

9660 2000-10-02 00:05:00 15 

9661 2000-10-02 00:12:00 18 

9662 2000-10-02 00:19:00 21 

9663 2000-10-02 00:26:00 24 

9664 Freq: 7min, dtype: int64 

9665 

9666 >>> ts.resample('17min').sum() 

9667 2000-10-01 23:14:00 0 

9668 2000-10-01 23:31:00 9 

9669 2000-10-01 23:48:00 21 

9670 2000-10-02 00:05:00 54 

9671 2000-10-02 00:22:00 24 

9672 Freq: 17min, dtype: int64 

9673 

9674 >>> ts.resample('17min', origin='epoch').sum() 

9675 2000-10-01 23:18:00 0 

9676 2000-10-01 23:35:00 18 

9677 2000-10-01 23:52:00 27 

9678 2000-10-02 00:09:00 39 

9679 2000-10-02 00:26:00 24 

9680 Freq: 17min, dtype: int64 

9681 

9682 >>> ts.resample('17min', origin='2000-01-01').sum() 

9683 2000-10-01 23:24:00 3 

9684 2000-10-01 23:41:00 15 

9685 2000-10-01 23:58:00 45 

9686 2000-10-02 00:15:00 45 

9687 Freq: 17min, dtype: int64 

9688 

9689 If you want to adjust the start of the bins with an `offset` Timedelta, the two 

9690 following lines are equivalent: 

9691 

9692 >>> ts.resample('17min', origin='start').sum() 

9693 2000-10-01 23:30:00 9 

9694 2000-10-01 23:47:00 21 

9695 2000-10-02 00:04:00 54 

9696 2000-10-02 00:21:00 24 

9697 Freq: 17min, dtype: int64 

9698 

9699 >>> ts.resample('17min', offset='23h30min').sum() 

9700 2000-10-01 23:30:00 9 

9701 2000-10-01 23:47:00 21 

9702 2000-10-02 00:04:00 54 

9703 2000-10-02 00:21:00 24 

9704 Freq: 17min, dtype: int64 

9705 

9706 If you want to take the largest Timestamp as the end of the bins: 

9707 

9708 >>> ts.resample('17min', origin='end').sum() 

9709 2000-10-01 23:35:00 0 

9710 2000-10-01 23:52:00 18 

9711 2000-10-02 00:09:00 27 

9712 2000-10-02 00:26:00 63 

9713 Freq: 17min, dtype: int64 

9714 

9715 In contrast with the `start_day`, you can use `end_day` to take the ceiling 

9716 midnight of the largest Timestamp as the end of the bins and drop the bins 

9717 not containing data: 

9718 

9719 >>> ts.resample('17min', origin='end_day').sum() 

9720 2000-10-01 23:38:00 3 

9721 2000-10-01 23:55:00 15 

9722 2000-10-02 00:12:00 45 

9723 2000-10-02 00:29:00 45 

9724 Freq: 17min, dtype: int64 

9725 """ 

9726 from pandas.core.resample import get_resampler 

9727 

9728 if axis is not lib.no_default: 

9729 axis = self._get_axis_number(axis) 

9730 if axis == 1: 

9731 warnings.warn( 

9732 "DataFrame.resample with axis=1 is deprecated. Do " 

9733 "`frame.T.resample(...)` without axis instead.", 

9734 FutureWarning, 

9735 stacklevel=find_stack_level(), 

9736 ) 

9737 else: 

9738 warnings.warn( 

9739 f"The 'axis' keyword in {type(self).__name__}.resample is " 

9740 "deprecated and will be removed in a future version.", 

9741 FutureWarning, 

9742 stacklevel=find_stack_level(), 

9743 ) 

9744 else: 

9745 axis = 0 

9746 

9747 if kind is not lib.no_default: 

9748 # GH#55895 

9749 warnings.warn( 

9750 f"The 'kind' keyword in {type(self).__name__}.resample is " 

9751 "deprecated and will be removed in a future version. " 

9752 "Explicitly cast the index to the desired type instead", 

9753 FutureWarning, 

9754 stacklevel=find_stack_level(), 

9755 ) 

9756 else: 

9757 kind = None 

9758 

9759 if convention is not lib.no_default: 

9760 warnings.warn( 

9761 f"The 'convention' keyword in {type(self).__name__}.resample is " 

9762 "deprecated and will be removed in a future version. " 

9763 "Explicitly cast PeriodIndex to DatetimeIndex before resampling " 

9764 "instead.", 

9765 FutureWarning, 

9766 stacklevel=find_stack_level(), 

9767 ) 

9768 else: 

9769 convention = "start" 

9770 

9771 return get_resampler( 

9772 cast("Series | DataFrame", self), 

9773 freq=rule, 

9774 label=label, 

9775 closed=closed, 

9776 axis=axis, 

9777 kind=kind, 

9778 convention=convention, 

9779 key=on, 

9780 level=level, 

9781 origin=origin, 

9782 offset=offset, 

9783 group_keys=group_keys, 

9784 ) 

9785 

9786 @final 

9787 def first(self, offset) -> Self: 

9788 """ 

9789 Select initial periods of time series data based on a date offset. 

9790 

9791 .. deprecated:: 2.1 

9792 :meth:`.first` is deprecated and will be removed in a future version. 

9793 Please create a mask and filter using `.loc` instead. 

9794 

9795 For a DataFrame with a sorted DatetimeIndex, this function can 

9796 select the first few rows based on a date offset. 

9797 

9798 Parameters 

9799 ---------- 

9800 offset : str, DateOffset or dateutil.relativedelta 

9801 The offset length of the data that will be selected. For instance, 

9802 '1ME' will display all the rows having their index within the first month. 

9803 

9804 Returns 

9805 ------- 

9806 Series or DataFrame 

9807 A subset of the caller. 

9808 

9809 Raises 

9810 ------ 

9811 TypeError 

9812 If the index is not a :class:`DatetimeIndex` 

9813 

9814 See Also 

9815 -------- 

9816 last : Select final periods of time series based on a date offset. 

9817 at_time : Select values at a particular time of the day. 

9818 between_time : Select values between particular times of the day. 

9819 

9820 Examples 

9821 -------- 

9822 >>> i = pd.date_range('2018-04-09', periods=4, freq='2D') 

9823 >>> ts = pd.DataFrame({'A': [1, 2, 3, 4]}, index=i) 

9824 >>> ts 

9825 A 

9826 2018-04-09 1 

9827 2018-04-11 2 

9828 2018-04-13 3 

9829 2018-04-15 4 

9830 

9831 Get the rows for the first 3 days: 

9832 

9833 >>> ts.first('3D') 

9834 A 

9835 2018-04-09 1 

9836 2018-04-11 2 

9837 

9838 Notice the data for 3 first calendar days were returned, not the first 

9839 3 days observed in the dataset, and therefore data for 2018-04-13 was 

9840 not returned. 

9841 """ 

9842 warnings.warn( 

9843 "first is deprecated and will be removed in a future version. " 

9844 "Please create a mask and filter using `.loc` instead", 

9845 FutureWarning, 

9846 stacklevel=find_stack_level(), 

9847 ) 

9848 if not isinstance(self.index, DatetimeIndex): 

9849 raise TypeError("'first' only supports a DatetimeIndex index") 

9850 

9851 if len(self.index) == 0: 

9852 return self.copy(deep=False) 

9853 

9854 offset = to_offset(offset) 

9855 if not isinstance(offset, Tick) and offset.is_on_offset(self.index[0]): 

9856 # GH#29623 if first value is end of period, remove offset with n = 1 

9857 # before adding the real offset 

9858 end_date = end = self.index[0] - offset.base + offset 

9859 else: 

9860 end_date = end = self.index[0] + offset 

9861 

9862 # Tick-like, e.g. 3 weeks 

9863 if isinstance(offset, Tick) and end_date in self.index: 

9864 end = self.index.searchsorted(end_date, side="left") 

9865 return self.iloc[:end] 

9866 

9867 return self.loc[:end] 

9868 

9869 @final 

9870 def last(self, offset) -> Self: 

9871 """ 

9872 Select final periods of time series data based on a date offset. 

9873 

9874 .. deprecated:: 2.1 

9875 :meth:`.last` is deprecated and will be removed in a future version. 

9876 Please create a mask and filter using `.loc` instead. 

9877 

9878 For a DataFrame with a sorted DatetimeIndex, this function 

9879 selects the last few rows based on a date offset. 

9880 

9881 Parameters 

9882 ---------- 

9883 offset : str, DateOffset, dateutil.relativedelta 

9884 The offset length of the data that will be selected. For instance, 

9885 '3D' will display all the rows having their index within the last 3 days. 

9886 

9887 Returns 

9888 ------- 

9889 Series or DataFrame 

9890 A subset of the caller. 

9891 

9892 Raises 

9893 ------ 

9894 TypeError 

9895 If the index is not a :class:`DatetimeIndex` 

9896 

9897 See Also 

9898 -------- 

9899 first : Select initial periods of time series based on a date offset. 

9900 at_time : Select values at a particular time of the day. 

9901 between_time : Select values between particular times of the day. 

9902 

9903 Notes 

9904 ----- 

9905 .. deprecated:: 2.1.0 

9906 Please create a mask and filter using `.loc` instead 

9907 

9908 Examples 

9909 -------- 

9910 >>> i = pd.date_range('2018-04-09', periods=4, freq='2D') 

9911 >>> ts = pd.DataFrame({'A': [1, 2, 3, 4]}, index=i) 

9912 >>> ts 

9913 A 

9914 2018-04-09 1 

9915 2018-04-11 2 

9916 2018-04-13 3 

9917 2018-04-15 4 

9918 

9919 Get the rows for the last 3 days: 

9920 

9921 >>> ts.last('3D') # doctest: +SKIP 

9922 A 

9923 2018-04-13 3 

9924 2018-04-15 4 

9925 

9926 Notice the data for 3 last calendar days were returned, not the last 

9927 3 observed days in the dataset, and therefore data for 2018-04-11 was 

9928 not returned. 

9929 """ 

9930 warnings.warn( 

9931 "last is deprecated and will be removed in a future version. " 

9932 "Please create a mask and filter using `.loc` instead", 

9933 FutureWarning, 

9934 stacklevel=find_stack_level(), 

9935 ) 

9936 

9937 if not isinstance(self.index, DatetimeIndex): 

9938 raise TypeError("'last' only supports a DatetimeIndex index") 

9939 

9940 if len(self.index) == 0: 

9941 return self.copy(deep=False) 

9942 

9943 offset = to_offset(offset) 

9944 

9945 start_date = self.index[-1] - offset 

9946 start = self.index.searchsorted(start_date, side="right") 

9947 return self.iloc[start:] 

9948 

9949 @final 

9950 def rank( 

9951 self, 

9952 axis: Axis = 0, 

9953 method: Literal["average", "min", "max", "first", "dense"] = "average", 

9954 numeric_only: bool_t = False, 

9955 na_option: Literal["keep", "top", "bottom"] = "keep", 

9956 ascending: bool_t = True, 

9957 pct: bool_t = False, 

9958 ) -> Self: 

9959 """ 

9960 Compute numerical data ranks (1 through n) along axis. 

9961 

9962 By default, equal values are assigned a rank that is the average of the 

9963 ranks of those values. 

9964 

9965 Parameters 

9966 ---------- 

9967 axis : {0 or 'index', 1 or 'columns'}, default 0 

9968 Index to direct ranking. 

9969 For `Series` this parameter is unused and defaults to 0. 

9970 method : {'average', 'min', 'max', 'first', 'dense'}, default 'average' 

9971 How to rank the group of records that have the same value (i.e. ties): 

9972 

9973 * average: average rank of the group 

9974 * min: lowest rank in the group 

9975 * max: highest rank in the group 

9976 * first: ranks assigned in order they appear in the array 

9977 * dense: like 'min', but rank always increases by 1 between groups. 

9978 

9979 numeric_only : bool, default False 

9980 For DataFrame objects, rank only numeric columns if set to True. 

9981 

9982 .. versionchanged:: 2.0.0 

9983 The default value of ``numeric_only`` is now ``False``. 

9984 

9985 na_option : {'keep', 'top', 'bottom'}, default 'keep' 

9986 How to rank NaN values: 

9987 

9988 * keep: assign NaN rank to NaN values 

9989 * top: assign lowest rank to NaN values 

9990 * bottom: assign highest rank to NaN values 

9991 

9992 ascending : bool, default True 

9993 Whether or not the elements should be ranked in ascending order. 

9994 pct : bool, default False 

9995 Whether or not to display the returned rankings in percentile 

9996 form. 

9997 

9998 Returns 

9999 ------- 

10000 same type as caller 

10001 Return a Series or DataFrame with data ranks as values. 

10002 

10003 See Also 

10004 -------- 

10005 core.groupby.DataFrameGroupBy.rank : Rank of values within each group. 

10006 core.groupby.SeriesGroupBy.rank : Rank of values within each group. 

10007 

10008 Examples 

10009 -------- 

10010 >>> df = pd.DataFrame(data={'Animal': ['cat', 'penguin', 'dog', 

10011 ... 'spider', 'snake'], 

10012 ... 'Number_legs': [4, 2, 4, 8, np.nan]}) 

10013 >>> df 

10014 Animal Number_legs 

10015 0 cat 4.0 

10016 1 penguin 2.0 

10017 2 dog 4.0 

10018 3 spider 8.0 

10019 4 snake NaN 

10020 

10021 Ties are assigned the mean of the ranks (by default) for the group. 

10022 

10023 >>> s = pd.Series(range(5), index=list("abcde")) 

10024 >>> s["d"] = s["b"] 

10025 >>> s.rank() 

10026 a 1.0 

10027 b 2.5 

10028 c 4.0 

10029 d 2.5 

10030 e 5.0 

10031 dtype: float64 

10032 

10033 The following example shows how the method behaves with the above 

10034 parameters: 

10035 

10036 * default_rank: this is the default behaviour obtained without using 

10037 any parameter. 

10038 * max_rank: setting ``method = 'max'`` the records that have the 

10039 same values are ranked using the highest rank (e.g.: since 'cat' 

10040 and 'dog' are both in the 2nd and 3rd position, rank 3 is assigned.) 

10041 * NA_bottom: choosing ``na_option = 'bottom'``, if there are records 

10042 with NaN values they are placed at the bottom of the ranking. 

10043 * pct_rank: when setting ``pct = True``, the ranking is expressed as 

10044 percentile rank. 

10045 

10046 >>> df['default_rank'] = df['Number_legs'].rank() 

10047 >>> df['max_rank'] = df['Number_legs'].rank(method='max') 

10048 >>> df['NA_bottom'] = df['Number_legs'].rank(na_option='bottom') 

10049 >>> df['pct_rank'] = df['Number_legs'].rank(pct=True) 

10050 >>> df 

10051 Animal Number_legs default_rank max_rank NA_bottom pct_rank 

10052 0 cat 4.0 2.5 3.0 2.5 0.625 

10053 1 penguin 2.0 1.0 1.0 1.0 0.250 

10054 2 dog 4.0 2.5 3.0 2.5 0.625 

10055 3 spider 8.0 4.0 4.0 4.0 1.000 

10056 4 snake NaN NaN NaN 5.0 NaN 

10057 """ 

10058 axis_int = self._get_axis_number(axis) 

10059 

10060 if na_option not in {"keep", "top", "bottom"}: 

10061 msg = "na_option must be one of 'keep', 'top', or 'bottom'" 

10062 raise ValueError(msg) 

10063 

10064 def ranker(data): 

10065 if data.ndim == 2: 

10066 # i.e. DataFrame, we cast to ndarray 

10067 values = data.values 

10068 else: 

10069 # i.e. Series, can dispatch to EA 

10070 values = data._values 

10071 

10072 if isinstance(values, ExtensionArray): 

10073 ranks = values._rank( 

10074 axis=axis_int, 

10075 method=method, 

10076 ascending=ascending, 

10077 na_option=na_option, 

10078 pct=pct, 

10079 ) 

10080 else: 

10081 ranks = algos.rank( 

10082 values, 

10083 axis=axis_int, 

10084 method=method, 

10085 ascending=ascending, 

10086 na_option=na_option, 

10087 pct=pct, 

10088 ) 

10089 

10090 ranks_obj = self._constructor(ranks, **data._construct_axes_dict()) 

10091 return ranks_obj.__finalize__(self, method="rank") 

10092 

10093 if numeric_only: 

10094 if self.ndim == 1 and not is_numeric_dtype(self.dtype): 

10095 # GH#47500 

10096 raise TypeError( 

10097 "Series.rank does not allow numeric_only=True with " 

10098 "non-numeric dtype." 

10099 ) 

10100 data = self._get_numeric_data() 

10101 else: 

10102 data = self 

10103 

10104 return ranker(data) 

10105 

10106 @doc(_shared_docs["compare"], klass=_shared_doc_kwargs["klass"]) 

10107 def compare( 

10108 self, 

10109 other, 

10110 align_axis: Axis = 1, 

10111 keep_shape: bool_t = False, 

10112 keep_equal: bool_t = False, 

10113 result_names: Suffixes = ("self", "other"), 

10114 ): 

10115 if type(self) is not type(other): 

10116 cls_self, cls_other = type(self).__name__, type(other).__name__ 

10117 raise TypeError( 

10118 f"can only compare '{cls_self}' (not '{cls_other}') with '{cls_self}'" 

10119 ) 

10120 

10121 mask = ~((self == other) | (self.isna() & other.isna())) 

10122 mask.fillna(True, inplace=True) 

10123 

10124 if not keep_equal: 

10125 self = self.where(mask) 

10126 other = other.where(mask) 

10127 

10128 if not keep_shape: 

10129 if isinstance(self, ABCDataFrame): 

10130 cmask = mask.any() 

10131 rmask = mask.any(axis=1) 

10132 self = self.loc[rmask, cmask] 

10133 other = other.loc[rmask, cmask] 

10134 else: 

10135 self = self[mask] 

10136 other = other[mask] 

10137 if not isinstance(result_names, tuple): 

10138 raise TypeError( 

10139 f"Passing 'result_names' as a {type(result_names)} is not " 

10140 "supported. Provide 'result_names' as a tuple instead." 

10141 ) 

10142 

10143 if align_axis in (1, "columns"): # This is needed for Series 

10144 axis = 1 

10145 else: 

10146 axis = self._get_axis_number(align_axis) 

10147 

10148 # error: List item 0 has incompatible type "NDFrame"; expected 

10149 # "Union[Series, DataFrame]" 

10150 diff = concat( 

10151 [self, other], # type: ignore[list-item] 

10152 axis=axis, 

10153 keys=result_names, 

10154 ) 

10155 

10156 if axis >= self.ndim: 

10157 # No need to reorganize data if stacking on new axis 

10158 # This currently applies for stacking two Series on columns 

10159 return diff 

10160 

10161 ax = diff._get_axis(axis) 

10162 ax_names = np.array(ax.names) 

10163 

10164 # set index names to positions to avoid confusion 

10165 ax.names = np.arange(len(ax_names)) 

10166 

10167 # bring self-other to inner level 

10168 order = list(range(1, ax.nlevels)) + [0] 

10169 if isinstance(diff, ABCDataFrame): 

10170 diff = diff.reorder_levels(order, axis=axis) 

10171 else: 

10172 diff = diff.reorder_levels(order) 

10173 

10174 # restore the index names in order 

10175 diff._get_axis(axis=axis).names = ax_names[order] 

10176 

10177 # reorder axis to keep things organized 

10178 indices = ( 

10179 np.arange(diff.shape[axis]).reshape([2, diff.shape[axis] // 2]).T.flatten() 

10180 ) 

10181 diff = diff.take(indices, axis=axis) 

10182 

10183 return diff 

10184 

10185 @final 

10186 @doc( 

10187 klass=_shared_doc_kwargs["klass"], 

10188 axes_single_arg=_shared_doc_kwargs["axes_single_arg"], 

10189 ) 

10190 def align( 

10191 self, 

10192 other: NDFrameT, 

10193 join: AlignJoin = "outer", 

10194 axis: Axis | None = None, 

10195 level: Level | None = None, 

10196 copy: bool_t | None = None, 

10197 fill_value: Hashable | None = None, 

10198 method: FillnaOptions | None | lib.NoDefault = lib.no_default, 

10199 limit: int | None | lib.NoDefault = lib.no_default, 

10200 fill_axis: Axis | lib.NoDefault = lib.no_default, 

10201 broadcast_axis: Axis | None | lib.NoDefault = lib.no_default, 

10202 ) -> tuple[Self, NDFrameT]: 

10203 """ 

10204 Align two objects on their axes with the specified join method. 

10205 

10206 Join method is specified for each axis Index. 

10207 

10208 Parameters 

10209 ---------- 

10210 other : DataFrame or Series 

10211 join : {{'outer', 'inner', 'left', 'right'}}, default 'outer' 

10212 Type of alignment to be performed. 

10213 

10214 * left: use only keys from left frame, preserve key order. 

10215 * right: use only keys from right frame, preserve key order. 

10216 * outer: use union of keys from both frames, sort keys lexicographically. 

10217 * inner: use intersection of keys from both frames, 

10218 preserve the order of the left keys. 

10219 

10220 axis : allowed axis of the other object, default None 

10221 Align on index (0), columns (1), or both (None). 

10222 level : int or level name, default None 

10223 Broadcast across a level, matching Index values on the 

10224 passed MultiIndex level. 

10225 copy : bool, default True 

10226 Always returns new objects. If copy=False and no reindexing is 

10227 required then original objects are returned. 

10228 

10229 .. note:: 

10230 The `copy` keyword will change behavior in pandas 3.0. 

10231 `Copy-on-Write 

10232 <https://pandas.pydata.org/docs/dev/user_guide/copy_on_write.html>`__ 

10233 will be enabled by default, which means that all methods with a 

10234 `copy` keyword will use a lazy copy mechanism to defer the copy and 

10235 ignore the `copy` keyword. The `copy` keyword will be removed in a 

10236 future version of pandas. 

10237 

10238 You can already get the future behavior and improvements through 

10239 enabling copy on write ``pd.options.mode.copy_on_write = True`` 

10240 fill_value : scalar, default np.nan 

10241 Value to use for missing values. Defaults to NaN, but can be any 

10242 "compatible" value. 

10243 method : {{'backfill', 'bfill', 'pad', 'ffill', None}}, default None 

10244 Method to use for filling holes in reindexed Series: 

10245 

10246 - pad / ffill: propagate last valid observation forward to next valid. 

10247 - backfill / bfill: use NEXT valid observation to fill gap. 

10248 

10249 .. deprecated:: 2.1 

10250 

10251 limit : int, default None 

10252 If method is specified, this is the maximum number of consecutive 

10253 NaN values to forward/backward fill. In other words, if there is 

10254 a gap with more than this number of consecutive NaNs, it will only 

10255 be partially filled. If method is not specified, this is the 

10256 maximum number of entries along the entire axis where NaNs will be 

10257 filled. Must be greater than 0 if not None. 

10258 

10259 .. deprecated:: 2.1 

10260 

10261 fill_axis : {axes_single_arg}, default 0 

10262 Filling axis, method and limit. 

10263 

10264 .. deprecated:: 2.1 

10265 

10266 broadcast_axis : {axes_single_arg}, default None 

10267 Broadcast values along this axis, if aligning two objects of 

10268 different dimensions. 

10269 

10270 .. deprecated:: 2.1 

10271 

10272 Returns 

10273 ------- 

10274 tuple of ({klass}, type of other) 

10275 Aligned objects. 

10276 

10277 Examples 

10278 -------- 

10279 >>> df = pd.DataFrame( 

10280 ... [[1, 2, 3, 4], [6, 7, 8, 9]], columns=["D", "B", "E", "A"], index=[1, 2] 

10281 ... ) 

10282 >>> other = pd.DataFrame( 

10283 ... [[10, 20, 30, 40], [60, 70, 80, 90], [600, 700, 800, 900]], 

10284 ... columns=["A", "B", "C", "D"], 

10285 ... index=[2, 3, 4], 

10286 ... ) 

10287 >>> df 

10288 D B E A 

10289 1 1 2 3 4 

10290 2 6 7 8 9 

10291 >>> other 

10292 A B C D 

10293 2 10 20 30 40 

10294 3 60 70 80 90 

10295 4 600 700 800 900 

10296 

10297 Align on columns: 

10298 

10299 >>> left, right = df.align(other, join="outer", axis=1) 

10300 >>> left 

10301 A B C D E 

10302 1 4 2 NaN 1 3 

10303 2 9 7 NaN 6 8 

10304 >>> right 

10305 A B C D E 

10306 2 10 20 30 40 NaN 

10307 3 60 70 80 90 NaN 

10308 4 600 700 800 900 NaN 

10309 

10310 We can also align on the index: 

10311 

10312 >>> left, right = df.align(other, join="outer", axis=0) 

10313 >>> left 

10314 D B E A 

10315 1 1.0 2.0 3.0 4.0 

10316 2 6.0 7.0 8.0 9.0 

10317 3 NaN NaN NaN NaN 

10318 4 NaN NaN NaN NaN 

10319 >>> right 

10320 A B C D 

10321 1 NaN NaN NaN NaN 

10322 2 10.0 20.0 30.0 40.0 

10323 3 60.0 70.0 80.0 90.0 

10324 4 600.0 700.0 800.0 900.0 

10325 

10326 Finally, the default `axis=None` will align on both index and columns: 

10327 

10328 >>> left, right = df.align(other, join="outer", axis=None) 

10329 >>> left 

10330 A B C D E 

10331 1 4.0 2.0 NaN 1.0 3.0 

10332 2 9.0 7.0 NaN 6.0 8.0 

10333 3 NaN NaN NaN NaN NaN 

10334 4 NaN NaN NaN NaN NaN 

10335 >>> right 

10336 A B C D E 

10337 1 NaN NaN NaN NaN NaN 

10338 2 10.0 20.0 30.0 40.0 NaN 

10339 3 60.0 70.0 80.0 90.0 NaN 

10340 4 600.0 700.0 800.0 900.0 NaN 

10341 """ 

10342 if ( 

10343 method is not lib.no_default 

10344 or limit is not lib.no_default 

10345 or fill_axis is not lib.no_default 

10346 ): 

10347 # GH#51856 

10348 warnings.warn( 

10349 "The 'method', 'limit', and 'fill_axis' keywords in " 

10350 f"{type(self).__name__}.align are deprecated and will be removed " 

10351 "in a future version. Call fillna directly on the returned objects " 

10352 "instead.", 

10353 FutureWarning, 

10354 stacklevel=find_stack_level(), 

10355 ) 

10356 if fill_axis is lib.no_default: 

10357 fill_axis = 0 

10358 if method is lib.no_default: 

10359 method = None 

10360 if limit is lib.no_default: 

10361 limit = None 

10362 

10363 if method is not None: 

10364 method = clean_fill_method(method) 

10365 

10366 if broadcast_axis is not lib.no_default: 

10367 # GH#51856 

10368 # TODO(3.0): enforcing this deprecation will close GH#13194 

10369 msg = ( 

10370 f"The 'broadcast_axis' keyword in {type(self).__name__}.align is " 

10371 "deprecated and will be removed in a future version." 

10372 ) 

10373 if broadcast_axis is not None: 

10374 if self.ndim == 1 and other.ndim == 2: 

10375 msg += ( 

10376 " Use left = DataFrame({col: left for col in right.columns}, " 

10377 "index=right.index) before calling `left.align(right)` instead." 

10378 ) 

10379 elif self.ndim == 2 and other.ndim == 1: 

10380 msg += ( 

10381 " Use right = DataFrame({col: right for col in left.columns}, " 

10382 "index=left.index) before calling `left.align(right)` instead" 

10383 ) 

10384 warnings.warn(msg, FutureWarning, stacklevel=find_stack_level()) 

10385 else: 

10386 broadcast_axis = None 

10387 

10388 if broadcast_axis == 1 and self.ndim != other.ndim: 

10389 if isinstance(self, ABCSeries): 

10390 # this means other is a DataFrame, and we need to broadcast 

10391 # self 

10392 cons = self._constructor_expanddim 

10393 df = cons( 

10394 {c: self for c in other.columns}, **other._construct_axes_dict() 

10395 ) 

10396 # error: Incompatible return value type (got "Tuple[DataFrame, 

10397 # DataFrame]", expected "Tuple[Self, NDFrameT]") 

10398 return df._align_frame( # type: ignore[return-value] 

10399 other, # type: ignore[arg-type] 

10400 join=join, 

10401 axis=axis, 

10402 level=level, 

10403 copy=copy, 

10404 fill_value=fill_value, 

10405 method=method, 

10406 limit=limit, 

10407 fill_axis=fill_axis, 

10408 )[:2] 

10409 elif isinstance(other, ABCSeries): 

10410 # this means self is a DataFrame, and we need to broadcast 

10411 # other 

10412 cons = other._constructor_expanddim 

10413 df = cons( 

10414 {c: other for c in self.columns}, **self._construct_axes_dict() 

10415 ) 

10416 # error: Incompatible return value type (got "Tuple[NDFrameT, 

10417 # DataFrame]", expected "Tuple[Self, NDFrameT]") 

10418 return self._align_frame( # type: ignore[return-value] 

10419 df, 

10420 join=join, 

10421 axis=axis, 

10422 level=level, 

10423 copy=copy, 

10424 fill_value=fill_value, 

10425 method=method, 

10426 limit=limit, 

10427 fill_axis=fill_axis, 

10428 )[:2] 

10429 

10430 _right: DataFrame | Series 

10431 if axis is not None: 

10432 axis = self._get_axis_number(axis) 

10433 if isinstance(other, ABCDataFrame): 

10434 left, _right, join_index = self._align_frame( 

10435 other, 

10436 join=join, 

10437 axis=axis, 

10438 level=level, 

10439 copy=copy, 

10440 fill_value=fill_value, 

10441 method=method, 

10442 limit=limit, 

10443 fill_axis=fill_axis, 

10444 ) 

10445 

10446 elif isinstance(other, ABCSeries): 

10447 left, _right, join_index = self._align_series( 

10448 other, 

10449 join=join, 

10450 axis=axis, 

10451 level=level, 

10452 copy=copy, 

10453 fill_value=fill_value, 

10454 method=method, 

10455 limit=limit, 

10456 fill_axis=fill_axis, 

10457 ) 

10458 else: # pragma: no cover 

10459 raise TypeError(f"unsupported type: {type(other)}") 

10460 

10461 right = cast(NDFrameT, _right) 

10462 if self.ndim == 1 or axis == 0: 

10463 # If we are aligning timezone-aware DatetimeIndexes and the timezones 

10464 # do not match, convert both to UTC. 

10465 if isinstance(left.index.dtype, DatetimeTZDtype): 

10466 if left.index.tz != right.index.tz: 

10467 if join_index is not None: 

10468 # GH#33671 copy to ensure we don't change the index on 

10469 # our original Series 

10470 left = left.copy(deep=False) 

10471 right = right.copy(deep=False) 

10472 left.index = join_index 

10473 right.index = join_index 

10474 

10475 left = left.__finalize__(self) 

10476 right = right.__finalize__(other) 

10477 return left, right 

10478 

10479 @final 

10480 def _align_frame( 

10481 self, 

10482 other: DataFrame, 

10483 join: AlignJoin = "outer", 

10484 axis: Axis | None = None, 

10485 level=None, 

10486 copy: bool_t | None = None, 

10487 fill_value=None, 

10488 method=None, 

10489 limit: int | None = None, 

10490 fill_axis: Axis = 0, 

10491 ) -> tuple[Self, DataFrame, Index | None]: 

10492 # defaults 

10493 join_index, join_columns = None, None 

10494 ilidx, iridx = None, None 

10495 clidx, cridx = None, None 

10496 

10497 is_series = isinstance(self, ABCSeries) 

10498 

10499 if (axis is None or axis == 0) and not self.index.equals(other.index): 

10500 join_index, ilidx, iridx = self.index.join( 

10501 other.index, how=join, level=level, return_indexers=True 

10502 ) 

10503 

10504 if ( 

10505 (axis is None or axis == 1) 

10506 and not is_series 

10507 and not self.columns.equals(other.columns) 

10508 ): 

10509 join_columns, clidx, cridx = self.columns.join( 

10510 other.columns, how=join, level=level, return_indexers=True 

10511 ) 

10512 

10513 if is_series: 

10514 reindexers = {0: [join_index, ilidx]} 

10515 else: 

10516 reindexers = {0: [join_index, ilidx], 1: [join_columns, clidx]} 

10517 

10518 left = self._reindex_with_indexers( 

10519 reindexers, copy=copy, fill_value=fill_value, allow_dups=True 

10520 ) 

10521 # other must be always DataFrame 

10522 right = other._reindex_with_indexers( 

10523 {0: [join_index, iridx], 1: [join_columns, cridx]}, 

10524 copy=copy, 

10525 fill_value=fill_value, 

10526 allow_dups=True, 

10527 ) 

10528 

10529 if method is not None: 

10530 left = left._pad_or_backfill(method, axis=fill_axis, limit=limit) 

10531 right = right._pad_or_backfill(method, axis=fill_axis, limit=limit) 

10532 

10533 return left, right, join_index 

10534 

10535 @final 

10536 def _align_series( 

10537 self, 

10538 other: Series, 

10539 join: AlignJoin = "outer", 

10540 axis: Axis | None = None, 

10541 level=None, 

10542 copy: bool_t | None = None, 

10543 fill_value=None, 

10544 method=None, 

10545 limit: int | None = None, 

10546 fill_axis: Axis = 0, 

10547 ) -> tuple[Self, Series, Index | None]: 

10548 is_series = isinstance(self, ABCSeries) 

10549 if copy and using_copy_on_write(): 

10550 copy = False 

10551 

10552 if (not is_series and axis is None) or axis not in [None, 0, 1]: 

10553 raise ValueError("Must specify axis=0 or 1") 

10554 

10555 if is_series and axis == 1: 

10556 raise ValueError("cannot align series to a series other than axis 0") 

10557 

10558 # series/series compat, other must always be a Series 

10559 if not axis: 

10560 # equal 

10561 if self.index.equals(other.index): 

10562 join_index, lidx, ridx = None, None, None 

10563 else: 

10564 join_index, lidx, ridx = self.index.join( 

10565 other.index, how=join, level=level, return_indexers=True 

10566 ) 

10567 

10568 if is_series: 

10569 left = self._reindex_indexer(join_index, lidx, copy) 

10570 elif lidx is None or join_index is None: 

10571 left = self.copy(deep=copy) 

10572 else: 

10573 new_mgr = self._mgr.reindex_indexer(join_index, lidx, axis=1, copy=copy) 

10574 left = self._constructor_from_mgr(new_mgr, axes=new_mgr.axes) 

10575 

10576 right = other._reindex_indexer(join_index, ridx, copy) 

10577 

10578 else: 

10579 # one has > 1 ndim 

10580 fdata = self._mgr 

10581 join_index = self.axes[1] 

10582 lidx, ridx = None, None 

10583 if not join_index.equals(other.index): 

10584 join_index, lidx, ridx = join_index.join( 

10585 other.index, how=join, level=level, return_indexers=True 

10586 ) 

10587 

10588 if lidx is not None: 

10589 bm_axis = self._get_block_manager_axis(1) 

10590 fdata = fdata.reindex_indexer(join_index, lidx, axis=bm_axis) 

10591 

10592 if copy and fdata is self._mgr: 

10593 fdata = fdata.copy() 

10594 

10595 left = self._constructor_from_mgr(fdata, axes=fdata.axes) 

10596 

10597 if ridx is None: 

10598 right = other.copy(deep=copy) 

10599 else: 

10600 right = other.reindex(join_index, level=level) 

10601 

10602 # fill 

10603 fill_na = notna(fill_value) or (method is not None) 

10604 if fill_na: 

10605 fill_value, method = validate_fillna_kwargs(fill_value, method) 

10606 if method is not None: 

10607 left = left._pad_or_backfill(method, limit=limit, axis=fill_axis) 

10608 right = right._pad_or_backfill(method, limit=limit) 

10609 else: 

10610 left = left.fillna(fill_value, limit=limit, axis=fill_axis) 

10611 right = right.fillna(fill_value, limit=limit) 

10612 

10613 return left, right, join_index 

10614 

10615 @final 

10616 def _where( 

10617 self, 

10618 cond, 

10619 other=lib.no_default, 

10620 inplace: bool_t = False, 

10621 axis: Axis | None = None, 

10622 level=None, 

10623 warn: bool_t = True, 

10624 ): 

10625 """ 

10626 Equivalent to public method `where`, except that `other` is not 

10627 applied as a function even if callable. Used in __setitem__. 

10628 """ 

10629 inplace = validate_bool_kwarg(inplace, "inplace") 

10630 

10631 if axis is not None: 

10632 axis = self._get_axis_number(axis) 

10633 

10634 # align the cond to same shape as myself 

10635 cond = common.apply_if_callable(cond, self) 

10636 if isinstance(cond, NDFrame): 

10637 # CoW: Make sure reference is not kept alive 

10638 if cond.ndim == 1 and self.ndim == 2: 

10639 cond = cond._constructor_expanddim( 

10640 {i: cond for i in range(len(self.columns))}, 

10641 copy=False, 

10642 ) 

10643 cond.columns = self.columns 

10644 cond = cond.align(self, join="right", copy=False)[0] 

10645 else: 

10646 if not hasattr(cond, "shape"): 

10647 cond = np.asanyarray(cond) 

10648 if cond.shape != self.shape: 

10649 raise ValueError("Array conditional must be same shape as self") 

10650 cond = self._constructor(cond, **self._construct_axes_dict(), copy=False) 

10651 

10652 # make sure we are boolean 

10653 fill_value = bool(inplace) 

10654 with warnings.catch_warnings(): 

10655 warnings.filterwarnings( 

10656 "ignore", 

10657 "Downcasting object dtype arrays", 

10658 category=FutureWarning, 

10659 ) 

10660 cond = cond.fillna(fill_value) 

10661 cond = cond.infer_objects(copy=False) 

10662 

10663 msg = "Boolean array expected for the condition, not {dtype}" 

10664 

10665 if not cond.empty: 

10666 if not isinstance(cond, ABCDataFrame): 

10667 # This is a single-dimensional object. 

10668 if not is_bool_dtype(cond): 

10669 raise ValueError(msg.format(dtype=cond.dtype)) 

10670 else: 

10671 for _dt in cond.dtypes: 

10672 if not is_bool_dtype(_dt): 

10673 raise ValueError(msg.format(dtype=_dt)) 

10674 if cond._mgr.any_extension_types: 

10675 # GH51574: avoid object ndarray conversion later on 

10676 cond = cond._constructor( 

10677 cond.to_numpy(dtype=bool, na_value=fill_value), 

10678 **cond._construct_axes_dict(), 

10679 ) 

10680 else: 

10681 # GH#21947 we have an empty DataFrame/Series, could be object-dtype 

10682 cond = cond.astype(bool) 

10683 

10684 cond = -cond if inplace else cond 

10685 cond = cond.reindex(self._info_axis, axis=self._info_axis_number, copy=False) 

10686 

10687 # try to align with other 

10688 if isinstance(other, NDFrame): 

10689 # align with me 

10690 if other.ndim <= self.ndim: 

10691 # CoW: Make sure reference is not kept alive 

10692 other = self.align( 

10693 other, 

10694 join="left", 

10695 axis=axis, 

10696 level=level, 

10697 fill_value=None, 

10698 copy=False, 

10699 )[1] 

10700 

10701 # if we are NOT aligned, raise as we cannot where index 

10702 if axis is None and not other._indexed_same(self): 

10703 raise InvalidIndexError 

10704 

10705 if other.ndim < self.ndim: 

10706 # TODO(EA2D): avoid object-dtype cast in EA case GH#38729 

10707 other = other._values 

10708 if axis == 0: 

10709 other = np.reshape(other, (-1, 1)) 

10710 elif axis == 1: 

10711 other = np.reshape(other, (1, -1)) 

10712 

10713 other = np.broadcast_to(other, self.shape) 

10714 

10715 # slice me out of the other 

10716 else: 

10717 raise NotImplementedError( 

10718 "cannot align with a higher dimensional NDFrame" 

10719 ) 

10720 

10721 elif not isinstance(other, (MultiIndex, NDFrame)): 

10722 # mainly just catching Index here 

10723 other = extract_array(other, extract_numpy=True) 

10724 

10725 if isinstance(other, (np.ndarray, ExtensionArray)): 

10726 if other.shape != self.shape: 

10727 if self.ndim != 1: 

10728 # In the ndim == 1 case we may have 

10729 # other length 1, which we treat as scalar (GH#2745, GH#4192) 

10730 # or len(other) == icond.sum(), which we treat like 

10731 # __setitem__ (GH#3235) 

10732 raise ValueError( 

10733 "other must be the same shape as self when an ndarray" 

10734 ) 

10735 

10736 # we are the same shape, so create an actual object for alignment 

10737 else: 

10738 other = self._constructor( 

10739 other, **self._construct_axes_dict(), copy=False 

10740 ) 

10741 

10742 if axis is None: 

10743 axis = 0 

10744 

10745 if self.ndim == getattr(other, "ndim", 0): 

10746 align = True 

10747 else: 

10748 align = self._get_axis_number(axis) == 1 

10749 

10750 if inplace: 

10751 # we may have different type blocks come out of putmask, so 

10752 # reconstruct the block manager 

10753 

10754 new_data = self._mgr.putmask(mask=cond, new=other, align=align, warn=warn) 

10755 result = self._constructor_from_mgr(new_data, axes=new_data.axes) 

10756 return self._update_inplace(result) 

10757 

10758 else: 

10759 new_data = self._mgr.where( 

10760 other=other, 

10761 cond=cond, 

10762 align=align, 

10763 ) 

10764 result = self._constructor_from_mgr(new_data, axes=new_data.axes) 

10765 return result.__finalize__(self) 

10766 

10767 @overload 

10768 def where( 

10769 self, 

10770 cond, 

10771 other=..., 

10772 *, 

10773 inplace: Literal[False] = ..., 

10774 axis: Axis | None = ..., 

10775 level: Level = ..., 

10776 ) -> Self: 

10777 ... 

10778 

10779 @overload 

10780 def where( 

10781 self, 

10782 cond, 

10783 other=..., 

10784 *, 

10785 inplace: Literal[True], 

10786 axis: Axis | None = ..., 

10787 level: Level = ..., 

10788 ) -> None: 

10789 ... 

10790 

10791 @overload 

10792 def where( 

10793 self, 

10794 cond, 

10795 other=..., 

10796 *, 

10797 inplace: bool_t = ..., 

10798 axis: Axis | None = ..., 

10799 level: Level = ..., 

10800 ) -> Self | None: 

10801 ... 

10802 

10803 @final 

10804 @doc( 

10805 klass=_shared_doc_kwargs["klass"], 

10806 cond="True", 

10807 cond_rev="False", 

10808 name="where", 

10809 name_other="mask", 

10810 ) 

10811 def where( 

10812 self, 

10813 cond, 

10814 other=np.nan, 

10815 *, 

10816 inplace: bool_t = False, 

10817 axis: Axis | None = None, 

10818 level: Level | None = None, 

10819 ) -> Self | None: 

10820 """ 

10821 Replace values where the condition is {cond_rev}. 

10822 

10823 Parameters 

10824 ---------- 

10825 cond : bool {klass}, array-like, or callable 

10826 Where `cond` is {cond}, keep the original value. Where 

10827 {cond_rev}, replace with corresponding value from `other`. 

10828 If `cond` is callable, it is computed on the {klass} and 

10829 should return boolean {klass} or array. The callable must 

10830 not change input {klass} (though pandas doesn't check it). 

10831 other : scalar, {klass}, or callable 

10832 Entries where `cond` is {cond_rev} are replaced with 

10833 corresponding value from `other`. 

10834 If other is callable, it is computed on the {klass} and 

10835 should return scalar or {klass}. The callable must not 

10836 change input {klass} (though pandas doesn't check it). 

10837 If not specified, entries will be filled with the corresponding 

10838 NULL value (``np.nan`` for numpy dtypes, ``pd.NA`` for extension 

10839 dtypes). 

10840 inplace : bool, default False 

10841 Whether to perform the operation in place on the data. 

10842 axis : int, default None 

10843 Alignment axis if needed. For `Series` this parameter is 

10844 unused and defaults to 0. 

10845 level : int, default None 

10846 Alignment level if needed. 

10847 

10848 Returns 

10849 ------- 

10850 Same type as caller or None if ``inplace=True``. 

10851 

10852 See Also 

10853 -------- 

10854 :func:`DataFrame.{name_other}` : Return an object of same shape as 

10855 self. 

10856 

10857 Notes 

10858 ----- 

10859 The {name} method is an application of the if-then idiom. For each 

10860 element in the calling DataFrame, if ``cond`` is ``{cond}`` the 

10861 element is used; otherwise the corresponding element from the DataFrame 

10862 ``other`` is used. If the axis of ``other`` does not align with axis of 

10863 ``cond`` {klass}, the misaligned index positions will be filled with 

10864 {cond_rev}. 

10865 

10866 The signature for :func:`DataFrame.where` differs from 

10867 :func:`numpy.where`. Roughly ``df1.where(m, df2)`` is equivalent to 

10868 ``np.where(m, df1, df2)``. 

10869 

10870 For further details and examples see the ``{name}`` documentation in 

10871 :ref:`indexing <indexing.where_mask>`. 

10872 

10873 The dtype of the object takes precedence. The fill value is casted to 

10874 the object's dtype, if this can be done losslessly. 

10875 

10876 Examples 

10877 -------- 

10878 >>> s = pd.Series(range(5)) 

10879 >>> s.where(s > 0) 

10880 0 NaN 

10881 1 1.0 

10882 2 2.0 

10883 3 3.0 

10884 4 4.0 

10885 dtype: float64 

10886 >>> s.mask(s > 0) 

10887 0 0.0 

10888 1 NaN 

10889 2 NaN 

10890 3 NaN 

10891 4 NaN 

10892 dtype: float64 

10893 

10894 >>> s = pd.Series(range(5)) 

10895 >>> t = pd.Series([True, False]) 

10896 >>> s.where(t, 99) 

10897 0 0 

10898 1 99 

10899 2 99 

10900 3 99 

10901 4 99 

10902 dtype: int64 

10903 >>> s.mask(t, 99) 

10904 0 99 

10905 1 1 

10906 2 99 

10907 3 99 

10908 4 99 

10909 dtype: int64 

10910 

10911 >>> s.where(s > 1, 10) 

10912 0 10 

10913 1 10 

10914 2 2 

10915 3 3 

10916 4 4 

10917 dtype: int64 

10918 >>> s.mask(s > 1, 10) 

10919 0 0 

10920 1 1 

10921 2 10 

10922 3 10 

10923 4 10 

10924 dtype: int64 

10925 

10926 >>> df = pd.DataFrame(np.arange(10).reshape(-1, 2), columns=['A', 'B']) 

10927 >>> df 

10928 A B 

10929 0 0 1 

10930 1 2 3 

10931 2 4 5 

10932 3 6 7 

10933 4 8 9 

10934 >>> m = df % 3 == 0 

10935 >>> df.where(m, -df) 

10936 A B 

10937 0 0 -1 

10938 1 -2 3 

10939 2 -4 -5 

10940 3 6 -7 

10941 4 -8 9 

10942 >>> df.where(m, -df) == np.where(m, df, -df) 

10943 A B 

10944 0 True True 

10945 1 True True 

10946 2 True True 

10947 3 True True 

10948 4 True True 

10949 >>> df.where(m, -df) == df.mask(~m, -df) 

10950 A B 

10951 0 True True 

10952 1 True True 

10953 2 True True 

10954 3 True True 

10955 4 True True 

10956 """ 

10957 inplace = validate_bool_kwarg(inplace, "inplace") 

10958 if inplace: 

10959 if not PYPY and using_copy_on_write(): 

10960 if sys.getrefcount(self) <= REF_COUNT: 

10961 warnings.warn( 

10962 _chained_assignment_method_msg, 

10963 ChainedAssignmentError, 

10964 stacklevel=2, 

10965 ) 

10966 elif ( 

10967 not PYPY 

10968 and not using_copy_on_write() 

10969 and self._is_view_after_cow_rules() 

10970 ): 

10971 ctr = sys.getrefcount(self) 

10972 ref_count = REF_COUNT 

10973 if isinstance(self, ABCSeries) and hasattr(self, "_cacher"): 

10974 # see https://github.com/pandas-dev/pandas/pull/56060#discussion_r1399245221 

10975 ref_count += 1 

10976 if ctr <= ref_count: 

10977 warnings.warn( 

10978 _chained_assignment_warning_method_msg, 

10979 FutureWarning, 

10980 stacklevel=2, 

10981 ) 

10982 

10983 other = common.apply_if_callable(other, self) 

10984 return self._where(cond, other, inplace, axis, level) 

10985 

10986 @overload 

10987 def mask( 

10988 self, 

10989 cond, 

10990 other=..., 

10991 *, 

10992 inplace: Literal[False] = ..., 

10993 axis: Axis | None = ..., 

10994 level: Level = ..., 

10995 ) -> Self: 

10996 ... 

10997 

10998 @overload 

10999 def mask( 

11000 self, 

11001 cond, 

11002 other=..., 

11003 *, 

11004 inplace: Literal[True], 

11005 axis: Axis | None = ..., 

11006 level: Level = ..., 

11007 ) -> None: 

11008 ... 

11009 

11010 @overload 

11011 def mask( 

11012 self, 

11013 cond, 

11014 other=..., 

11015 *, 

11016 inplace: bool_t = ..., 

11017 axis: Axis | None = ..., 

11018 level: Level = ..., 

11019 ) -> Self | None: 

11020 ... 

11021 

11022 @final 

11023 @doc( 

11024 where, 

11025 klass=_shared_doc_kwargs["klass"], 

11026 cond="False", 

11027 cond_rev="True", 

11028 name="mask", 

11029 name_other="where", 

11030 ) 

11031 def mask( 

11032 self, 

11033 cond, 

11034 other=lib.no_default, 

11035 *, 

11036 inplace: bool_t = False, 

11037 axis: Axis | None = None, 

11038 level: Level | None = None, 

11039 ) -> Self | None: 

11040 inplace = validate_bool_kwarg(inplace, "inplace") 

11041 if inplace: 

11042 if not PYPY and using_copy_on_write(): 

11043 if sys.getrefcount(self) <= REF_COUNT: 

11044 warnings.warn( 

11045 _chained_assignment_method_msg, 

11046 ChainedAssignmentError, 

11047 stacklevel=2, 

11048 ) 

11049 elif ( 

11050 not PYPY 

11051 and not using_copy_on_write() 

11052 and self._is_view_after_cow_rules() 

11053 ): 

11054 ctr = sys.getrefcount(self) 

11055 ref_count = REF_COUNT 

11056 if isinstance(self, ABCSeries) and hasattr(self, "_cacher"): 

11057 # see https://github.com/pandas-dev/pandas/pull/56060#discussion_r1399245221 

11058 ref_count += 1 

11059 if ctr <= ref_count: 

11060 warnings.warn( 

11061 _chained_assignment_warning_method_msg, 

11062 FutureWarning, 

11063 stacklevel=2, 

11064 ) 

11065 

11066 cond = common.apply_if_callable(cond, self) 

11067 other = common.apply_if_callable(other, self) 

11068 

11069 # see gh-21891 

11070 if not hasattr(cond, "__invert__"): 

11071 cond = np.array(cond) 

11072 

11073 return self._where( 

11074 ~cond, 

11075 other=other, 

11076 inplace=inplace, 

11077 axis=axis, 

11078 level=level, 

11079 ) 

11080 

11081 @doc(klass=_shared_doc_kwargs["klass"]) 

11082 def shift( 

11083 self, 

11084 periods: int | Sequence[int] = 1, 

11085 freq=None, 

11086 axis: Axis = 0, 

11087 fill_value: Hashable = lib.no_default, 

11088 suffix: str | None = None, 

11089 ) -> Self | DataFrame: 

11090 """ 

11091 Shift index by desired number of periods with an optional time `freq`. 

11092 

11093 When `freq` is not passed, shift the index without realigning the data. 

11094 If `freq` is passed (in this case, the index must be date or datetime, 

11095 or it will raise a `NotImplementedError`), the index will be 

11096 increased using the periods and the `freq`. `freq` can be inferred 

11097 when specified as "infer" as long as either freq or inferred_freq 

11098 attribute is set in the index. 

11099 

11100 Parameters 

11101 ---------- 

11102 periods : int or Sequence 

11103 Number of periods to shift. Can be positive or negative. 

11104 If an iterable of ints, the data will be shifted once by each int. 

11105 This is equivalent to shifting by one value at a time and 

11106 concatenating all resulting frames. The resulting columns will have 

11107 the shift suffixed to their column names. For multiple periods, 

11108 axis must not be 1. 

11109 freq : DateOffset, tseries.offsets, timedelta, or str, optional 

11110 Offset to use from the tseries module or time rule (e.g. 'EOM'). 

11111 If `freq` is specified then the index values are shifted but the 

11112 data is not realigned. That is, use `freq` if you would like to 

11113 extend the index when shifting and preserve the original data. 

11114 If `freq` is specified as "infer" then it will be inferred from 

11115 the freq or inferred_freq attributes of the index. If neither of 

11116 those attributes exist, a ValueError is thrown. 

11117 axis : {{0 or 'index', 1 or 'columns', None}}, default None 

11118 Shift direction. For `Series` this parameter is unused and defaults to 0. 

11119 fill_value : object, optional 

11120 The scalar value to use for newly introduced missing values. 

11121 the default depends on the dtype of `self`. 

11122 For numeric data, ``np.nan`` is used. 

11123 For datetime, timedelta, or period data, etc. :attr:`NaT` is used. 

11124 For extension dtypes, ``self.dtype.na_value`` is used. 

11125 suffix : str, optional 

11126 If str and periods is an iterable, this is added after the column 

11127 name and before the shift value for each shifted column name. 

11128 

11129 Returns 

11130 ------- 

11131 {klass} 

11132 Copy of input object, shifted. 

11133 

11134 See Also 

11135 -------- 

11136 Index.shift : Shift values of Index. 

11137 DatetimeIndex.shift : Shift values of DatetimeIndex. 

11138 PeriodIndex.shift : Shift values of PeriodIndex. 

11139 

11140 Examples 

11141 -------- 

11142 >>> df = pd.DataFrame({{"Col1": [10, 20, 15, 30, 45], 

11143 ... "Col2": [13, 23, 18, 33, 48], 

11144 ... "Col3": [17, 27, 22, 37, 52]}}, 

11145 ... index=pd.date_range("2020-01-01", "2020-01-05")) 

11146 >>> df 

11147 Col1 Col2 Col3 

11148 2020-01-01 10 13 17 

11149 2020-01-02 20 23 27 

11150 2020-01-03 15 18 22 

11151 2020-01-04 30 33 37 

11152 2020-01-05 45 48 52 

11153 

11154 >>> df.shift(periods=3) 

11155 Col1 Col2 Col3 

11156 2020-01-01 NaN NaN NaN 

11157 2020-01-02 NaN NaN NaN 

11158 2020-01-03 NaN NaN NaN 

11159 2020-01-04 10.0 13.0 17.0 

11160 2020-01-05 20.0 23.0 27.0 

11161 

11162 >>> df.shift(periods=1, axis="columns") 

11163 Col1 Col2 Col3 

11164 2020-01-01 NaN 10 13 

11165 2020-01-02 NaN 20 23 

11166 2020-01-03 NaN 15 18 

11167 2020-01-04 NaN 30 33 

11168 2020-01-05 NaN 45 48 

11169 

11170 >>> df.shift(periods=3, fill_value=0) 

11171 Col1 Col2 Col3 

11172 2020-01-01 0 0 0 

11173 2020-01-02 0 0 0 

11174 2020-01-03 0 0 0 

11175 2020-01-04 10 13 17 

11176 2020-01-05 20 23 27 

11177 

11178 >>> df.shift(periods=3, freq="D") 

11179 Col1 Col2 Col3 

11180 2020-01-04 10 13 17 

11181 2020-01-05 20 23 27 

11182 2020-01-06 15 18 22 

11183 2020-01-07 30 33 37 

11184 2020-01-08 45 48 52 

11185 

11186 >>> df.shift(periods=3, freq="infer") 

11187 Col1 Col2 Col3 

11188 2020-01-04 10 13 17 

11189 2020-01-05 20 23 27 

11190 2020-01-06 15 18 22 

11191 2020-01-07 30 33 37 

11192 2020-01-08 45 48 52 

11193 

11194 >>> df['Col1'].shift(periods=[0, 1, 2]) 

11195 Col1_0 Col1_1 Col1_2 

11196 2020-01-01 10 NaN NaN 

11197 2020-01-02 20 10.0 NaN 

11198 2020-01-03 15 20.0 10.0 

11199 2020-01-04 30 15.0 20.0 

11200 2020-01-05 45 30.0 15.0 

11201 """ 

11202 axis = self._get_axis_number(axis) 

11203 

11204 if freq is not None and fill_value is not lib.no_default: 

11205 # GH#53832 

11206 warnings.warn( 

11207 "Passing a 'freq' together with a 'fill_value' silently ignores " 

11208 "the fill_value and is deprecated. This will raise in a future " 

11209 "version.", 

11210 FutureWarning, 

11211 stacklevel=find_stack_level(), 

11212 ) 

11213 fill_value = lib.no_default 

11214 

11215 if periods == 0: 

11216 return self.copy(deep=None) 

11217 

11218 if is_list_like(periods) and isinstance(self, ABCSeries): 

11219 return self.to_frame().shift( 

11220 periods=periods, freq=freq, axis=axis, fill_value=fill_value 

11221 ) 

11222 periods = cast(int, periods) 

11223 

11224 if freq is None: 

11225 # when freq is None, data is shifted, index is not 

11226 axis = self._get_axis_number(axis) 

11227 assert axis == 0 # axis == 1 cases handled in DataFrame.shift 

11228 new_data = self._mgr.shift(periods=periods, fill_value=fill_value) 

11229 return self._constructor_from_mgr( 

11230 new_data, axes=new_data.axes 

11231 ).__finalize__(self, method="shift") 

11232 

11233 return self._shift_with_freq(periods, axis, freq) 

11234 

11235 @final 

11236 def _shift_with_freq(self, periods: int, axis: int, freq) -> Self: 

11237 # see shift.__doc__ 

11238 # when freq is given, index is shifted, data is not 

11239 index = self._get_axis(axis) 

11240 

11241 if freq == "infer": 

11242 freq = getattr(index, "freq", None) 

11243 

11244 if freq is None: 

11245 freq = getattr(index, "inferred_freq", None) 

11246 

11247 if freq is None: 

11248 msg = "Freq was not set in the index hence cannot be inferred" 

11249 raise ValueError(msg) 

11250 

11251 elif isinstance(freq, str): 

11252 is_period = isinstance(index, PeriodIndex) 

11253 freq = to_offset(freq, is_period=is_period) 

11254 

11255 if isinstance(index, PeriodIndex): 

11256 orig_freq = to_offset(index.freq) 

11257 if freq != orig_freq: 

11258 assert orig_freq is not None # for mypy 

11259 raise ValueError( 

11260 f"Given freq {freq_to_period_freqstr(freq.n, freq.name)} " 

11261 f"does not match PeriodIndex freq " 

11262 f"{freq_to_period_freqstr(orig_freq.n, orig_freq.name)}" 

11263 ) 

11264 new_ax = index.shift(periods) 

11265 else: 

11266 new_ax = index.shift(periods, freq) 

11267 

11268 result = self.set_axis(new_ax, axis=axis) 

11269 return result.__finalize__(self, method="shift") 

11270 

11271 @final 

11272 def truncate( 

11273 self, 

11274 before=None, 

11275 after=None, 

11276 axis: Axis | None = None, 

11277 copy: bool_t | None = None, 

11278 ) -> Self: 

11279 """ 

11280 Truncate a Series or DataFrame before and after some index value. 

11281 

11282 This is a useful shorthand for boolean indexing based on index 

11283 values above or below certain thresholds. 

11284 

11285 Parameters 

11286 ---------- 

11287 before : date, str, int 

11288 Truncate all rows before this index value. 

11289 after : date, str, int 

11290 Truncate all rows after this index value. 

11291 axis : {0 or 'index', 1 or 'columns'}, optional 

11292 Axis to truncate. Truncates the index (rows) by default. 

11293 For `Series` this parameter is unused and defaults to 0. 

11294 copy : bool, default is True, 

11295 Return a copy of the truncated section. 

11296 

11297 .. note:: 

11298 The `copy` keyword will change behavior in pandas 3.0. 

11299 `Copy-on-Write 

11300 <https://pandas.pydata.org/docs/dev/user_guide/copy_on_write.html>`__ 

11301 will be enabled by default, which means that all methods with a 

11302 `copy` keyword will use a lazy copy mechanism to defer the copy and 

11303 ignore the `copy` keyword. The `copy` keyword will be removed in a 

11304 future version of pandas. 

11305 

11306 You can already get the future behavior and improvements through 

11307 enabling copy on write ``pd.options.mode.copy_on_write = True`` 

11308 

11309 Returns 

11310 ------- 

11311 type of caller 

11312 The truncated Series or DataFrame. 

11313 

11314 See Also 

11315 -------- 

11316 DataFrame.loc : Select a subset of a DataFrame by label. 

11317 DataFrame.iloc : Select a subset of a DataFrame by position. 

11318 

11319 Notes 

11320 ----- 

11321 If the index being truncated contains only datetime values, 

11322 `before` and `after` may be specified as strings instead of 

11323 Timestamps. 

11324 

11325 Examples 

11326 -------- 

11327 >>> df = pd.DataFrame({'A': ['a', 'b', 'c', 'd', 'e'], 

11328 ... 'B': ['f', 'g', 'h', 'i', 'j'], 

11329 ... 'C': ['k', 'l', 'm', 'n', 'o']}, 

11330 ... index=[1, 2, 3, 4, 5]) 

11331 >>> df 

11332 A B C 

11333 1 a f k 

11334 2 b g l 

11335 3 c h m 

11336 4 d i n 

11337 5 e j o 

11338 

11339 >>> df.truncate(before=2, after=4) 

11340 A B C 

11341 2 b g l 

11342 3 c h m 

11343 4 d i n 

11344 

11345 The columns of a DataFrame can be truncated. 

11346 

11347 >>> df.truncate(before="A", after="B", axis="columns") 

11348 A B 

11349 1 a f 

11350 2 b g 

11351 3 c h 

11352 4 d i 

11353 5 e j 

11354 

11355 For Series, only rows can be truncated. 

11356 

11357 >>> df['A'].truncate(before=2, after=4) 

11358 2 b 

11359 3 c 

11360 4 d 

11361 Name: A, dtype: object 

11362 

11363 The index values in ``truncate`` can be datetimes or string 

11364 dates. 

11365 

11366 >>> dates = pd.date_range('2016-01-01', '2016-02-01', freq='s') 

11367 >>> df = pd.DataFrame(index=dates, data={'A': 1}) 

11368 >>> df.tail() 

11369 A 

11370 2016-01-31 23:59:56 1 

11371 2016-01-31 23:59:57 1 

11372 2016-01-31 23:59:58 1 

11373 2016-01-31 23:59:59 1 

11374 2016-02-01 00:00:00 1 

11375 

11376 >>> df.truncate(before=pd.Timestamp('2016-01-05'), 

11377 ... after=pd.Timestamp('2016-01-10')).tail() 

11378 A 

11379 2016-01-09 23:59:56 1 

11380 2016-01-09 23:59:57 1 

11381 2016-01-09 23:59:58 1 

11382 2016-01-09 23:59:59 1 

11383 2016-01-10 00:00:00 1 

11384 

11385 Because the index is a DatetimeIndex containing only dates, we can 

11386 specify `before` and `after` as strings. They will be coerced to 

11387 Timestamps before truncation. 

11388 

11389 >>> df.truncate('2016-01-05', '2016-01-10').tail() 

11390 A 

11391 2016-01-09 23:59:56 1 

11392 2016-01-09 23:59:57 1 

11393 2016-01-09 23:59:58 1 

11394 2016-01-09 23:59:59 1 

11395 2016-01-10 00:00:00 1 

11396 

11397 Note that ``truncate`` assumes a 0 value for any unspecified time 

11398 component (midnight). This differs from partial string slicing, which 

11399 returns any partially matching dates. 

11400 

11401 >>> df.loc['2016-01-05':'2016-01-10', :].tail() 

11402 A 

11403 2016-01-10 23:59:55 1 

11404 2016-01-10 23:59:56 1 

11405 2016-01-10 23:59:57 1 

11406 2016-01-10 23:59:58 1 

11407 2016-01-10 23:59:59 1 

11408 """ 

11409 if axis is None: 

11410 axis = 0 

11411 axis = self._get_axis_number(axis) 

11412 ax = self._get_axis(axis) 

11413 

11414 # GH 17935 

11415 # Check that index is sorted 

11416 if not ax.is_monotonic_increasing and not ax.is_monotonic_decreasing: 

11417 raise ValueError("truncate requires a sorted index") 

11418 

11419 # if we have a date index, convert to dates, otherwise 

11420 # treat like a slice 

11421 if ax._is_all_dates: 

11422 from pandas.core.tools.datetimes import to_datetime 

11423 

11424 before = to_datetime(before) 

11425 after = to_datetime(after) 

11426 

11427 if before is not None and after is not None and before > after: 

11428 raise ValueError(f"Truncate: {after} must be after {before}") 

11429 

11430 if len(ax) > 1 and ax.is_monotonic_decreasing and ax.nunique() > 1: 

11431 before, after = after, before 

11432 

11433 slicer = [slice(None, None)] * self._AXIS_LEN 

11434 slicer[axis] = slice(before, after) 

11435 result = self.loc[tuple(slicer)] 

11436 

11437 if isinstance(ax, MultiIndex): 

11438 setattr(result, self._get_axis_name(axis), ax.truncate(before, after)) 

11439 

11440 result = result.copy(deep=copy and not using_copy_on_write()) 

11441 

11442 return result 

11443 

11444 @final 

11445 @doc(klass=_shared_doc_kwargs["klass"]) 

11446 def tz_convert( 

11447 self, tz, axis: Axis = 0, level=None, copy: bool_t | None = None 

11448 ) -> Self: 

11449 """ 

11450 Convert tz-aware axis to target time zone. 

11451 

11452 Parameters 

11453 ---------- 

11454 tz : str or tzinfo object or None 

11455 Target time zone. Passing ``None`` will convert to 

11456 UTC and remove the timezone information. 

11457 axis : {{0 or 'index', 1 or 'columns'}}, default 0 

11458 The axis to convert 

11459 level : int, str, default None 

11460 If axis is a MultiIndex, convert a specific level. Otherwise 

11461 must be None. 

11462 copy : bool, default True 

11463 Also make a copy of the underlying data. 

11464 

11465 .. note:: 

11466 The `copy` keyword will change behavior in pandas 3.0. 

11467 `Copy-on-Write 

11468 <https://pandas.pydata.org/docs/dev/user_guide/copy_on_write.html>`__ 

11469 will be enabled by default, which means that all methods with a 

11470 `copy` keyword will use a lazy copy mechanism to defer the copy and 

11471 ignore the `copy` keyword. The `copy` keyword will be removed in a 

11472 future version of pandas. 

11473 

11474 You can already get the future behavior and improvements through 

11475 enabling copy on write ``pd.options.mode.copy_on_write = True`` 

11476 

11477 Returns 

11478 ------- 

11479 {klass} 

11480 Object with time zone converted axis. 

11481 

11482 Raises 

11483 ------ 

11484 TypeError 

11485 If the axis is tz-naive. 

11486 

11487 Examples 

11488 -------- 

11489 Change to another time zone: 

11490 

11491 >>> s = pd.Series( 

11492 ... [1], 

11493 ... index=pd.DatetimeIndex(['2018-09-15 01:30:00+02:00']), 

11494 ... ) 

11495 >>> s.tz_convert('Asia/Shanghai') 

11496 2018-09-15 07:30:00+08:00 1 

11497 dtype: int64 

11498 

11499 Pass None to convert to UTC and get a tz-naive index: 

11500 

11501 >>> s = pd.Series([1], 

11502 ... index=pd.DatetimeIndex(['2018-09-15 01:30:00+02:00'])) 

11503 >>> s.tz_convert(None) 

11504 2018-09-14 23:30:00 1 

11505 dtype: int64 

11506 """ 

11507 axis = self._get_axis_number(axis) 

11508 ax = self._get_axis(axis) 

11509 

11510 def _tz_convert(ax, tz): 

11511 if not hasattr(ax, "tz_convert"): 

11512 if len(ax) > 0: 

11513 ax_name = self._get_axis_name(axis) 

11514 raise TypeError( 

11515 f"{ax_name} is not a valid DatetimeIndex or PeriodIndex" 

11516 ) 

11517 ax = DatetimeIndex([], tz=tz) 

11518 else: 

11519 ax = ax.tz_convert(tz) 

11520 return ax 

11521 

11522 # if a level is given it must be a MultiIndex level or 

11523 # equivalent to the axis name 

11524 if isinstance(ax, MultiIndex): 

11525 level = ax._get_level_number(level) 

11526 new_level = _tz_convert(ax.levels[level], tz) 

11527 ax = ax.set_levels(new_level, level=level) 

11528 else: 

11529 if level not in (None, 0, ax.name): 

11530 raise ValueError(f"The level {level} is not valid") 

11531 ax = _tz_convert(ax, tz) 

11532 

11533 result = self.copy(deep=copy and not using_copy_on_write()) 

11534 result = result.set_axis(ax, axis=axis, copy=False) 

11535 return result.__finalize__(self, method="tz_convert") 

11536 

11537 @final 

11538 @doc(klass=_shared_doc_kwargs["klass"]) 

11539 def tz_localize( 

11540 self, 

11541 tz, 

11542 axis: Axis = 0, 

11543 level=None, 

11544 copy: bool_t | None = None, 

11545 ambiguous: TimeAmbiguous = "raise", 

11546 nonexistent: TimeNonexistent = "raise", 

11547 ) -> Self: 

11548 """ 

11549 Localize tz-naive index of a Series or DataFrame to target time zone. 

11550 

11551 This operation localizes the Index. To localize the values in a 

11552 timezone-naive Series, use :meth:`Series.dt.tz_localize`. 

11553 

11554 Parameters 

11555 ---------- 

11556 tz : str or tzinfo or None 

11557 Time zone to localize. Passing ``None`` will remove the 

11558 time zone information and preserve local time. 

11559 axis : {{0 or 'index', 1 or 'columns'}}, default 0 

11560 The axis to localize 

11561 level : int, str, default None 

11562 If axis ia a MultiIndex, localize a specific level. Otherwise 

11563 must be None. 

11564 copy : bool, default True 

11565 Also make a copy of the underlying data. 

11566 

11567 .. note:: 

11568 The `copy` keyword will change behavior in pandas 3.0. 

11569 `Copy-on-Write 

11570 <https://pandas.pydata.org/docs/dev/user_guide/copy_on_write.html>`__ 

11571 will be enabled by default, which means that all methods with a 

11572 `copy` keyword will use a lazy copy mechanism to defer the copy and 

11573 ignore the `copy` keyword. The `copy` keyword will be removed in a 

11574 future version of pandas. 

11575 

11576 You can already get the future behavior and improvements through 

11577 enabling copy on write ``pd.options.mode.copy_on_write = True`` 

11578 ambiguous : 'infer', bool-ndarray, 'NaT', default 'raise' 

11579 When clocks moved backward due to DST, ambiguous times may arise. 

11580 For example in Central European Time (UTC+01), when going from 

11581 03:00 DST to 02:00 non-DST, 02:30:00 local time occurs both at 

11582 00:30:00 UTC and at 01:30:00 UTC. In such a situation, the 

11583 `ambiguous` parameter dictates how ambiguous times should be 

11584 handled. 

11585 

11586 - 'infer' will attempt to infer fall dst-transition hours based on 

11587 order 

11588 - bool-ndarray where True signifies a DST time, False designates 

11589 a non-DST time (note that this flag is only applicable for 

11590 ambiguous times) 

11591 - 'NaT' will return NaT where there are ambiguous times 

11592 - 'raise' will raise an AmbiguousTimeError if there are ambiguous 

11593 times. 

11594 nonexistent : str, default 'raise' 

11595 A nonexistent time does not exist in a particular timezone 

11596 where clocks moved forward due to DST. Valid values are: 

11597 

11598 - 'shift_forward' will shift the nonexistent time forward to the 

11599 closest existing time 

11600 - 'shift_backward' will shift the nonexistent time backward to the 

11601 closest existing time 

11602 - 'NaT' will return NaT where there are nonexistent times 

11603 - timedelta objects will shift nonexistent times by the timedelta 

11604 - 'raise' will raise an NonExistentTimeError if there are 

11605 nonexistent times. 

11606 

11607 Returns 

11608 ------- 

11609 {klass} 

11610 Same type as the input. 

11611 

11612 Raises 

11613 ------ 

11614 TypeError 

11615 If the TimeSeries is tz-aware and tz is not None. 

11616 

11617 Examples 

11618 -------- 

11619 Localize local times: 

11620 

11621 >>> s = pd.Series( 

11622 ... [1], 

11623 ... index=pd.DatetimeIndex(['2018-09-15 01:30:00']), 

11624 ... ) 

11625 >>> s.tz_localize('CET') 

11626 2018-09-15 01:30:00+02:00 1 

11627 dtype: int64 

11628 

11629 Pass None to convert to tz-naive index and preserve local time: 

11630 

11631 >>> s = pd.Series([1], 

11632 ... index=pd.DatetimeIndex(['2018-09-15 01:30:00+02:00'])) 

11633 >>> s.tz_localize(None) 

11634 2018-09-15 01:30:00 1 

11635 dtype: int64 

11636 

11637 Be careful with DST changes. When there is sequential data, pandas 

11638 can infer the DST time: 

11639 

11640 >>> s = pd.Series(range(7), 

11641 ... index=pd.DatetimeIndex(['2018-10-28 01:30:00', 

11642 ... '2018-10-28 02:00:00', 

11643 ... '2018-10-28 02:30:00', 

11644 ... '2018-10-28 02:00:00', 

11645 ... '2018-10-28 02:30:00', 

11646 ... '2018-10-28 03:00:00', 

11647 ... '2018-10-28 03:30:00'])) 

11648 >>> s.tz_localize('CET', ambiguous='infer') 

11649 2018-10-28 01:30:00+02:00 0 

11650 2018-10-28 02:00:00+02:00 1 

11651 2018-10-28 02:30:00+02:00 2 

11652 2018-10-28 02:00:00+01:00 3 

11653 2018-10-28 02:30:00+01:00 4 

11654 2018-10-28 03:00:00+01:00 5 

11655 2018-10-28 03:30:00+01:00 6 

11656 dtype: int64 

11657 

11658 In some cases, inferring the DST is impossible. In such cases, you can 

11659 pass an ndarray to the ambiguous parameter to set the DST explicitly 

11660 

11661 >>> s = pd.Series(range(3), 

11662 ... index=pd.DatetimeIndex(['2018-10-28 01:20:00', 

11663 ... '2018-10-28 02:36:00', 

11664 ... '2018-10-28 03:46:00'])) 

11665 >>> s.tz_localize('CET', ambiguous=np.array([True, True, False])) 

11666 2018-10-28 01:20:00+02:00 0 

11667 2018-10-28 02:36:00+02:00 1 

11668 2018-10-28 03:46:00+01:00 2 

11669 dtype: int64 

11670 

11671 If the DST transition causes nonexistent times, you can shift these 

11672 dates forward or backward with a timedelta object or `'shift_forward'` 

11673 or `'shift_backward'`. 

11674 

11675 >>> s = pd.Series(range(2), 

11676 ... index=pd.DatetimeIndex(['2015-03-29 02:30:00', 

11677 ... '2015-03-29 03:30:00'])) 

11678 >>> s.tz_localize('Europe/Warsaw', nonexistent='shift_forward') 

11679 2015-03-29 03:00:00+02:00 0 

11680 2015-03-29 03:30:00+02:00 1 

11681 dtype: int64 

11682 >>> s.tz_localize('Europe/Warsaw', nonexistent='shift_backward') 

11683 2015-03-29 01:59:59.999999999+01:00 0 

11684 2015-03-29 03:30:00+02:00 1 

11685 dtype: int64 

11686 >>> s.tz_localize('Europe/Warsaw', nonexistent=pd.Timedelta('1h')) 

11687 2015-03-29 03:30:00+02:00 0 

11688 2015-03-29 03:30:00+02:00 1 

11689 dtype: int64 

11690 """ 

11691 nonexistent_options = ("raise", "NaT", "shift_forward", "shift_backward") 

11692 if nonexistent not in nonexistent_options and not isinstance( 

11693 nonexistent, dt.timedelta 

11694 ): 

11695 raise ValueError( 

11696 "The nonexistent argument must be one of 'raise', " 

11697 "'NaT', 'shift_forward', 'shift_backward' or " 

11698 "a timedelta object" 

11699 ) 

11700 

11701 axis = self._get_axis_number(axis) 

11702 ax = self._get_axis(axis) 

11703 

11704 def _tz_localize(ax, tz, ambiguous, nonexistent): 

11705 if not hasattr(ax, "tz_localize"): 

11706 if len(ax) > 0: 

11707 ax_name = self._get_axis_name(axis) 

11708 raise TypeError( 

11709 f"{ax_name} is not a valid DatetimeIndex or PeriodIndex" 

11710 ) 

11711 ax = DatetimeIndex([], tz=tz) 

11712 else: 

11713 ax = ax.tz_localize(tz, ambiguous=ambiguous, nonexistent=nonexistent) 

11714 return ax 

11715 

11716 # if a level is given it must be a MultiIndex level or 

11717 # equivalent to the axis name 

11718 if isinstance(ax, MultiIndex): 

11719 level = ax._get_level_number(level) 

11720 new_level = _tz_localize(ax.levels[level], tz, ambiguous, nonexistent) 

11721 ax = ax.set_levels(new_level, level=level) 

11722 else: 

11723 if level not in (None, 0, ax.name): 

11724 raise ValueError(f"The level {level} is not valid") 

11725 ax = _tz_localize(ax, tz, ambiguous, nonexistent) 

11726 

11727 result = self.copy(deep=copy and not using_copy_on_write()) 

11728 result = result.set_axis(ax, axis=axis, copy=False) 

11729 return result.__finalize__(self, method="tz_localize") 

11730 

11731 # ---------------------------------------------------------------------- 

11732 # Numeric Methods 

11733 

11734 @final 

11735 def describe( 

11736 self, 

11737 percentiles=None, 

11738 include=None, 

11739 exclude=None, 

11740 ) -> Self: 

11741 """ 

11742 Generate descriptive statistics. 

11743 

11744 Descriptive statistics include those that summarize the central 

11745 tendency, dispersion and shape of a 

11746 dataset's distribution, excluding ``NaN`` values. 

11747 

11748 Analyzes both numeric and object series, as well 

11749 as ``DataFrame`` column sets of mixed data types. The output 

11750 will vary depending on what is provided. Refer to the notes 

11751 below for more detail. 

11752 

11753 Parameters 

11754 ---------- 

11755 percentiles : list-like of numbers, optional 

11756 The percentiles to include in the output. All should 

11757 fall between 0 and 1. The default is 

11758 ``[.25, .5, .75]``, which returns the 25th, 50th, and 

11759 75th percentiles. 

11760 include : 'all', list-like of dtypes or None (default), optional 

11761 A white list of data types to include in the result. Ignored 

11762 for ``Series``. Here are the options: 

11763 

11764 - 'all' : All columns of the input will be included in the output. 

11765 - A list-like of dtypes : Limits the results to the 

11766 provided data types. 

11767 To limit the result to numeric types submit 

11768 ``numpy.number``. To limit it instead to object columns submit 

11769 the ``numpy.object`` data type. Strings 

11770 can also be used in the style of 

11771 ``select_dtypes`` (e.g. ``df.describe(include=['O'])``). To 

11772 select pandas categorical columns, use ``'category'`` 

11773 - None (default) : The result will include all numeric columns. 

11774 exclude : list-like of dtypes or None (default), optional, 

11775 A black list of data types to omit from the result. Ignored 

11776 for ``Series``. Here are the options: 

11777 

11778 - A list-like of dtypes : Excludes the provided data types 

11779 from the result. To exclude numeric types submit 

11780 ``numpy.number``. To exclude object columns submit the data 

11781 type ``numpy.object``. Strings can also be used in the style of 

11782 ``select_dtypes`` (e.g. ``df.describe(exclude=['O'])``). To 

11783 exclude pandas categorical columns, use ``'category'`` 

11784 - None (default) : The result will exclude nothing. 

11785 

11786 Returns 

11787 ------- 

11788 Series or DataFrame 

11789 Summary statistics of the Series or Dataframe provided. 

11790 

11791 See Also 

11792 -------- 

11793 DataFrame.count: Count number of non-NA/null observations. 

11794 DataFrame.max: Maximum of the values in the object. 

11795 DataFrame.min: Minimum of the values in the object. 

11796 DataFrame.mean: Mean of the values. 

11797 DataFrame.std: Standard deviation of the observations. 

11798 DataFrame.select_dtypes: Subset of a DataFrame including/excluding 

11799 columns based on their dtype. 

11800 

11801 Notes 

11802 ----- 

11803 For numeric data, the result's index will include ``count``, 

11804 ``mean``, ``std``, ``min``, ``max`` as well as lower, ``50`` and 

11805 upper percentiles. By default the lower percentile is ``25`` and the 

11806 upper percentile is ``75``. The ``50`` percentile is the 

11807 same as the median. 

11808 

11809 For object data (e.g. strings or timestamps), the result's index 

11810 will include ``count``, ``unique``, ``top``, and ``freq``. The ``top`` 

11811 is the most common value. The ``freq`` is the most common value's 

11812 frequency. Timestamps also include the ``first`` and ``last`` items. 

11813 

11814 If multiple object values have the highest count, then the 

11815 ``count`` and ``top`` results will be arbitrarily chosen from 

11816 among those with the highest count. 

11817 

11818 For mixed data types provided via a ``DataFrame``, the default is to 

11819 return only an analysis of numeric columns. If the dataframe consists 

11820 only of object and categorical data without any numeric columns, the 

11821 default is to return an analysis of both the object and categorical 

11822 columns. If ``include='all'`` is provided as an option, the result 

11823 will include a union of attributes of each type. 

11824 

11825 The `include` and `exclude` parameters can be used to limit 

11826 which columns in a ``DataFrame`` are analyzed for the output. 

11827 The parameters are ignored when analyzing a ``Series``. 

11828 

11829 Examples 

11830 -------- 

11831 Describing a numeric ``Series``. 

11832 

11833 >>> s = pd.Series([1, 2, 3]) 

11834 >>> s.describe() 

11835 count 3.0 

11836 mean 2.0 

11837 std 1.0 

11838 min 1.0 

11839 25% 1.5 

11840 50% 2.0 

11841 75% 2.5 

11842 max 3.0 

11843 dtype: float64 

11844 

11845 Describing a categorical ``Series``. 

11846 

11847 >>> s = pd.Series(['a', 'a', 'b', 'c']) 

11848 >>> s.describe() 

11849 count 4 

11850 unique 3 

11851 top a 

11852 freq 2 

11853 dtype: object 

11854 

11855 Describing a timestamp ``Series``. 

11856 

11857 >>> s = pd.Series([ 

11858 ... np.datetime64("2000-01-01"), 

11859 ... np.datetime64("2010-01-01"), 

11860 ... np.datetime64("2010-01-01") 

11861 ... ]) 

11862 >>> s.describe() 

11863 count 3 

11864 mean 2006-09-01 08:00:00 

11865 min 2000-01-01 00:00:00 

11866 25% 2004-12-31 12:00:00 

11867 50% 2010-01-01 00:00:00 

11868 75% 2010-01-01 00:00:00 

11869 max 2010-01-01 00:00:00 

11870 dtype: object 

11871 

11872 Describing a ``DataFrame``. By default only numeric fields 

11873 are returned. 

11874 

11875 >>> df = pd.DataFrame({'categorical': pd.Categorical(['d', 'e', 'f']), 

11876 ... 'numeric': [1, 2, 3], 

11877 ... 'object': ['a', 'b', 'c'] 

11878 ... }) 

11879 >>> df.describe() 

11880 numeric 

11881 count 3.0 

11882 mean 2.0 

11883 std 1.0 

11884 min 1.0 

11885 25% 1.5 

11886 50% 2.0 

11887 75% 2.5 

11888 max 3.0 

11889 

11890 Describing all columns of a ``DataFrame`` regardless of data type. 

11891 

11892 >>> df.describe(include='all') # doctest: +SKIP 

11893 categorical numeric object 

11894 count 3 3.0 3 

11895 unique 3 NaN 3 

11896 top f NaN a 

11897 freq 1 NaN 1 

11898 mean NaN 2.0 NaN 

11899 std NaN 1.0 NaN 

11900 min NaN 1.0 NaN 

11901 25% NaN 1.5 NaN 

11902 50% NaN 2.0 NaN 

11903 75% NaN 2.5 NaN 

11904 max NaN 3.0 NaN 

11905 

11906 Describing a column from a ``DataFrame`` by accessing it as 

11907 an attribute. 

11908 

11909 >>> df.numeric.describe() 

11910 count 3.0 

11911 mean 2.0 

11912 std 1.0 

11913 min 1.0 

11914 25% 1.5 

11915 50% 2.0 

11916 75% 2.5 

11917 max 3.0 

11918 Name: numeric, dtype: float64 

11919 

11920 Including only numeric columns in a ``DataFrame`` description. 

11921 

11922 >>> df.describe(include=[np.number]) 

11923 numeric 

11924 count 3.0 

11925 mean 2.0 

11926 std 1.0 

11927 min 1.0 

11928 25% 1.5 

11929 50% 2.0 

11930 75% 2.5 

11931 max 3.0 

11932 

11933 Including only string columns in a ``DataFrame`` description. 

11934 

11935 >>> df.describe(include=[object]) # doctest: +SKIP 

11936 object 

11937 count 3 

11938 unique 3 

11939 top a 

11940 freq 1 

11941 

11942 Including only categorical columns from a ``DataFrame`` description. 

11943 

11944 >>> df.describe(include=['category']) 

11945 categorical 

11946 count 3 

11947 unique 3 

11948 top d 

11949 freq 1 

11950 

11951 Excluding numeric columns from a ``DataFrame`` description. 

11952 

11953 >>> df.describe(exclude=[np.number]) # doctest: +SKIP 

11954 categorical object 

11955 count 3 3 

11956 unique 3 3 

11957 top f a 

11958 freq 1 1 

11959 

11960 Excluding object columns from a ``DataFrame`` description. 

11961 

11962 >>> df.describe(exclude=[object]) # doctest: +SKIP 

11963 categorical numeric 

11964 count 3 3.0 

11965 unique 3 NaN 

11966 top f NaN 

11967 freq 1 NaN 

11968 mean NaN 2.0 

11969 std NaN 1.0 

11970 min NaN 1.0 

11971 25% NaN 1.5 

11972 50% NaN 2.0 

11973 75% NaN 2.5 

11974 max NaN 3.0 

11975 """ 

11976 return describe_ndframe( 

11977 obj=self, 

11978 include=include, 

11979 exclude=exclude, 

11980 percentiles=percentiles, 

11981 ).__finalize__(self, method="describe") 

11982 

11983 @final 

11984 def pct_change( 

11985 self, 

11986 periods: int = 1, 

11987 fill_method: FillnaOptions | None | lib.NoDefault = lib.no_default, 

11988 limit: int | None | lib.NoDefault = lib.no_default, 

11989 freq=None, 

11990 **kwargs, 

11991 ) -> Self: 

11992 """ 

11993 Fractional change between the current and a prior element. 

11994 

11995 Computes the fractional change from the immediately previous row by 

11996 default. This is useful in comparing the fraction of change in a time 

11997 series of elements. 

11998 

11999 .. note:: 

12000 

12001 Despite the name of this method, it calculates fractional change 

12002 (also known as per unit change or relative change) and not 

12003 percentage change. If you need the percentage change, multiply 

12004 these values by 100. 

12005 

12006 Parameters 

12007 ---------- 

12008 periods : int, default 1 

12009 Periods to shift for forming percent change. 

12010 fill_method : {'backfill', 'bfill', 'pad', 'ffill', None}, default 'pad' 

12011 How to handle NAs **before** computing percent changes. 

12012 

12013 .. deprecated:: 2.1 

12014 All options of `fill_method` are deprecated except `fill_method=None`. 

12015 

12016 limit : int, default None 

12017 The number of consecutive NAs to fill before stopping. 

12018 

12019 .. deprecated:: 2.1 

12020 

12021 freq : DateOffset, timedelta, or str, optional 

12022 Increment to use from time series API (e.g. 'ME' or BDay()). 

12023 **kwargs 

12024 Additional keyword arguments are passed into 

12025 `DataFrame.shift` or `Series.shift`. 

12026 

12027 Returns 

12028 ------- 

12029 Series or DataFrame 

12030 The same type as the calling object. 

12031 

12032 See Also 

12033 -------- 

12034 Series.diff : Compute the difference of two elements in a Series. 

12035 DataFrame.diff : Compute the difference of two elements in a DataFrame. 

12036 Series.shift : Shift the index by some number of periods. 

12037 DataFrame.shift : Shift the index by some number of periods. 

12038 

12039 Examples 

12040 -------- 

12041 **Series** 

12042 

12043 >>> s = pd.Series([90, 91, 85]) 

12044 >>> s 

12045 0 90 

12046 1 91 

12047 2 85 

12048 dtype: int64 

12049 

12050 >>> s.pct_change() 

12051 0 NaN 

12052 1 0.011111 

12053 2 -0.065934 

12054 dtype: float64 

12055 

12056 >>> s.pct_change(periods=2) 

12057 0 NaN 

12058 1 NaN 

12059 2 -0.055556 

12060 dtype: float64 

12061 

12062 See the percentage change in a Series where filling NAs with last 

12063 valid observation forward to next valid. 

12064 

12065 >>> s = pd.Series([90, 91, None, 85]) 

12066 >>> s 

12067 0 90.0 

12068 1 91.0 

12069 2 NaN 

12070 3 85.0 

12071 dtype: float64 

12072 

12073 >>> s.ffill().pct_change() 

12074 0 NaN 

12075 1 0.011111 

12076 2 0.000000 

12077 3 -0.065934 

12078 dtype: float64 

12079 

12080 **DataFrame** 

12081 

12082 Percentage change in French franc, Deutsche Mark, and Italian lira from 

12083 1980-01-01 to 1980-03-01. 

12084 

12085 >>> df = pd.DataFrame({ 

12086 ... 'FR': [4.0405, 4.0963, 4.3149], 

12087 ... 'GR': [1.7246, 1.7482, 1.8519], 

12088 ... 'IT': [804.74, 810.01, 860.13]}, 

12089 ... index=['1980-01-01', '1980-02-01', '1980-03-01']) 

12090 >>> df 

12091 FR GR IT 

12092 1980-01-01 4.0405 1.7246 804.74 

12093 1980-02-01 4.0963 1.7482 810.01 

12094 1980-03-01 4.3149 1.8519 860.13 

12095 

12096 >>> df.pct_change() 

12097 FR GR IT 

12098 1980-01-01 NaN NaN NaN 

12099 1980-02-01 0.013810 0.013684 0.006549 

12100 1980-03-01 0.053365 0.059318 0.061876 

12101 

12102 Percentage of change in GOOG and APPL stock volume. Shows computing 

12103 the percentage change between columns. 

12104 

12105 >>> df = pd.DataFrame({ 

12106 ... '2016': [1769950, 30586265], 

12107 ... '2015': [1500923, 40912316], 

12108 ... '2014': [1371819, 41403351]}, 

12109 ... index=['GOOG', 'APPL']) 

12110 >>> df 

12111 2016 2015 2014 

12112 GOOG 1769950 1500923 1371819 

12113 APPL 30586265 40912316 41403351 

12114 

12115 >>> df.pct_change(axis='columns', periods=-1) 

12116 2016 2015 2014 

12117 GOOG 0.179241 0.094112 NaN 

12118 APPL -0.252395 -0.011860 NaN 

12119 """ 

12120 # GH#53491 

12121 if fill_method not in (lib.no_default, None) or limit is not lib.no_default: 

12122 warnings.warn( 

12123 "The 'fill_method' keyword being not None and the 'limit' keyword in " 

12124 f"{type(self).__name__}.pct_change are deprecated and will be removed " 

12125 "in a future version. Either fill in any non-leading NA values prior " 

12126 "to calling pct_change or specify 'fill_method=None' to not fill NA " 

12127 "values.", 

12128 FutureWarning, 

12129 stacklevel=find_stack_level(), 

12130 ) 

12131 if fill_method is lib.no_default: 

12132 if limit is lib.no_default: 

12133 cols = self.items() if self.ndim == 2 else [(None, self)] 

12134 for _, col in cols: 

12135 if len(col) > 0: 

12136 mask = col.isna().values 

12137 mask = mask[np.argmax(~mask) :] 

12138 if mask.any(): 

12139 warnings.warn( 

12140 "The default fill_method='pad' in " 

12141 f"{type(self).__name__}.pct_change is deprecated and " 

12142 "will be removed in a future version. Either fill in " 

12143 "any non-leading NA values prior to calling pct_change " 

12144 "or specify 'fill_method=None' to not fill NA values.", 

12145 FutureWarning, 

12146 stacklevel=find_stack_level(), 

12147 ) 

12148 break 

12149 fill_method = "pad" 

12150 if limit is lib.no_default: 

12151 limit = None 

12152 

12153 axis = self._get_axis_number(kwargs.pop("axis", "index")) 

12154 if fill_method is None: 

12155 data = self 

12156 else: 

12157 data = self._pad_or_backfill(fill_method, axis=axis, limit=limit) 

12158 

12159 shifted = data.shift(periods=periods, freq=freq, axis=axis, **kwargs) 

12160 # Unsupported left operand type for / ("Self") 

12161 rs = data / shifted - 1 # type: ignore[operator] 

12162 if freq is not None: 

12163 # Shift method is implemented differently when freq is not None 

12164 # We want to restore the original index 

12165 rs = rs.loc[~rs.index.duplicated()] 

12166 rs = rs.reindex_like(data) 

12167 return rs.__finalize__(self, method="pct_change") 

12168 

12169 @final 

12170 def _logical_func( 

12171 self, 

12172 name: str, 

12173 func, 

12174 axis: Axis | None = 0, 

12175 bool_only: bool_t = False, 

12176 skipna: bool_t = True, 

12177 **kwargs, 

12178 ) -> Series | bool_t: 

12179 nv.validate_logical_func((), kwargs, fname=name) 

12180 validate_bool_kwarg(skipna, "skipna", none_allowed=False) 

12181 

12182 if self.ndim > 1 and axis is None: 

12183 # Reduce along one dimension then the other, to simplify DataFrame._reduce 

12184 res = self._logical_func( 

12185 name, func, axis=0, bool_only=bool_only, skipna=skipna, **kwargs 

12186 ) 

12187 # error: Item "bool" of "Series | bool" has no attribute "_logical_func" 

12188 return res._logical_func( # type: ignore[union-attr] 

12189 name, func, skipna=skipna, **kwargs 

12190 ) 

12191 elif axis is None: 

12192 axis = 0 

12193 

12194 if ( 

12195 self.ndim > 1 

12196 and axis == 1 

12197 and len(self._mgr.arrays) > 1 

12198 # TODO(EA2D): special-case not needed 

12199 and all(x.ndim == 2 for x in self._mgr.arrays) 

12200 and not kwargs 

12201 ): 

12202 # Fastpath avoiding potentially expensive transpose 

12203 obj = self 

12204 if bool_only: 

12205 obj = self._get_bool_data() 

12206 return obj._reduce_axis1(name, func, skipna=skipna) 

12207 

12208 return self._reduce( 

12209 func, 

12210 name=name, 

12211 axis=axis, 

12212 skipna=skipna, 

12213 numeric_only=bool_only, 

12214 filter_type="bool", 

12215 ) 

12216 

12217 def any( 

12218 self, 

12219 axis: Axis | None = 0, 

12220 bool_only: bool_t = False, 

12221 skipna: bool_t = True, 

12222 **kwargs, 

12223 ) -> Series | bool_t: 

12224 return self._logical_func( 

12225 "any", nanops.nanany, axis, bool_only, skipna, **kwargs 

12226 ) 

12227 

12228 def all( 

12229 self, 

12230 axis: Axis = 0, 

12231 bool_only: bool_t = False, 

12232 skipna: bool_t = True, 

12233 **kwargs, 

12234 ) -> Series | bool_t: 

12235 return self._logical_func( 

12236 "all", nanops.nanall, axis, bool_only, skipna, **kwargs 

12237 ) 

12238 

12239 @final 

12240 def _accum_func( 

12241 self, 

12242 name: str, 

12243 func, 

12244 axis: Axis | None = None, 

12245 skipna: bool_t = True, 

12246 *args, 

12247 **kwargs, 

12248 ): 

12249 skipna = nv.validate_cum_func_with_skipna(skipna, args, kwargs, name) 

12250 if axis is None: 

12251 axis = 0 

12252 else: 

12253 axis = self._get_axis_number(axis) 

12254 

12255 if axis == 1: 

12256 return self.T._accum_func( 

12257 name, func, axis=0, skipna=skipna, *args, **kwargs # noqa: B026 

12258 ).T 

12259 

12260 def block_accum_func(blk_values): 

12261 values = blk_values.T if hasattr(blk_values, "T") else blk_values 

12262 

12263 result: np.ndarray | ExtensionArray 

12264 if isinstance(values, ExtensionArray): 

12265 result = values._accumulate(name, skipna=skipna, **kwargs) 

12266 else: 

12267 result = nanops.na_accum_func(values, func, skipna=skipna) 

12268 

12269 result = result.T if hasattr(result, "T") else result 

12270 return result 

12271 

12272 result = self._mgr.apply(block_accum_func) 

12273 

12274 return self._constructor_from_mgr(result, axes=result.axes).__finalize__( 

12275 self, method=name 

12276 ) 

12277 

12278 def cummax(self, axis: Axis | None = None, skipna: bool_t = True, *args, **kwargs): 

12279 return self._accum_func( 

12280 "cummax", np.maximum.accumulate, axis, skipna, *args, **kwargs 

12281 ) 

12282 

12283 def cummin(self, axis: Axis | None = None, skipna: bool_t = True, *args, **kwargs): 

12284 return self._accum_func( 

12285 "cummin", np.minimum.accumulate, axis, skipna, *args, **kwargs 

12286 ) 

12287 

12288 def cumsum(self, axis: Axis | None = None, skipna: bool_t = True, *args, **kwargs): 

12289 return self._accum_func("cumsum", np.cumsum, axis, skipna, *args, **kwargs) 

12290 

12291 def cumprod(self, axis: Axis | None = None, skipna: bool_t = True, *args, **kwargs): 

12292 return self._accum_func("cumprod", np.cumprod, axis, skipna, *args, **kwargs) 

12293 

12294 @final 

12295 def _stat_function_ddof( 

12296 self, 

12297 name: str, 

12298 func, 

12299 axis: Axis | None | lib.NoDefault = lib.no_default, 

12300 skipna: bool_t = True, 

12301 ddof: int = 1, 

12302 numeric_only: bool_t = False, 

12303 **kwargs, 

12304 ) -> Series | float: 

12305 nv.validate_stat_ddof_func((), kwargs, fname=name) 

12306 validate_bool_kwarg(skipna, "skipna", none_allowed=False) 

12307 

12308 if axis is None: 

12309 if self.ndim > 1: 

12310 warnings.warn( 

12311 f"The behavior of {type(self).__name__}.{name} with axis=None " 

12312 "is deprecated, in a future version this will reduce over both " 

12313 "axes and return a scalar. To retain the old behavior, pass " 

12314 "axis=0 (or do not pass axis)", 

12315 FutureWarning, 

12316 stacklevel=find_stack_level(), 

12317 ) 

12318 axis = 0 

12319 elif axis is lib.no_default: 

12320 axis = 0 

12321 

12322 return self._reduce( 

12323 func, name, axis=axis, numeric_only=numeric_only, skipna=skipna, ddof=ddof 

12324 ) 

12325 

12326 def sem( 

12327 self, 

12328 axis: Axis | None = 0, 

12329 skipna: bool_t = True, 

12330 ddof: int = 1, 

12331 numeric_only: bool_t = False, 

12332 **kwargs, 

12333 ) -> Series | float: 

12334 return self._stat_function_ddof( 

12335 "sem", nanops.nansem, axis, skipna, ddof, numeric_only, **kwargs 

12336 ) 

12337 

12338 def var( 

12339 self, 

12340 axis: Axis | None = 0, 

12341 skipna: bool_t = True, 

12342 ddof: int = 1, 

12343 numeric_only: bool_t = False, 

12344 **kwargs, 

12345 ) -> Series | float: 

12346 return self._stat_function_ddof( 

12347 "var", nanops.nanvar, axis, skipna, ddof, numeric_only, **kwargs 

12348 ) 

12349 

12350 def std( 

12351 self, 

12352 axis: Axis | None = 0, 

12353 skipna: bool_t = True, 

12354 ddof: int = 1, 

12355 numeric_only: bool_t = False, 

12356 **kwargs, 

12357 ) -> Series | float: 

12358 return self._stat_function_ddof( 

12359 "std", nanops.nanstd, axis, skipna, ddof, numeric_only, **kwargs 

12360 ) 

12361 

12362 @final 

12363 def _stat_function( 

12364 self, 

12365 name: str, 

12366 func, 

12367 axis: Axis | None = 0, 

12368 skipna: bool_t = True, 

12369 numeric_only: bool_t = False, 

12370 **kwargs, 

12371 ): 

12372 assert name in ["median", "mean", "min", "max", "kurt", "skew"], name 

12373 nv.validate_func(name, (), kwargs) 

12374 

12375 validate_bool_kwarg(skipna, "skipna", none_allowed=False) 

12376 

12377 return self._reduce( 

12378 func, name=name, axis=axis, skipna=skipna, numeric_only=numeric_only 

12379 ) 

12380 

12381 def min( 

12382 self, 

12383 axis: Axis | None = 0, 

12384 skipna: bool_t = True, 

12385 numeric_only: bool_t = False, 

12386 **kwargs, 

12387 ): 

12388 return self._stat_function( 

12389 "min", 

12390 nanops.nanmin, 

12391 axis, 

12392 skipna, 

12393 numeric_only, 

12394 **kwargs, 

12395 ) 

12396 

12397 def max( 

12398 self, 

12399 axis: Axis | None = 0, 

12400 skipna: bool_t = True, 

12401 numeric_only: bool_t = False, 

12402 **kwargs, 

12403 ): 

12404 return self._stat_function( 

12405 "max", 

12406 nanops.nanmax, 

12407 axis, 

12408 skipna, 

12409 numeric_only, 

12410 **kwargs, 

12411 ) 

12412 

12413 def mean( 

12414 self, 

12415 axis: Axis | None = 0, 

12416 skipna: bool_t = True, 

12417 numeric_only: bool_t = False, 

12418 **kwargs, 

12419 ) -> Series | float: 

12420 return self._stat_function( 

12421 "mean", nanops.nanmean, axis, skipna, numeric_only, **kwargs 

12422 ) 

12423 

12424 def median( 

12425 self, 

12426 axis: Axis | None = 0, 

12427 skipna: bool_t = True, 

12428 numeric_only: bool_t = False, 

12429 **kwargs, 

12430 ) -> Series | float: 

12431 return self._stat_function( 

12432 "median", nanops.nanmedian, axis, skipna, numeric_only, **kwargs 

12433 ) 

12434 

12435 def skew( 

12436 self, 

12437 axis: Axis | None = 0, 

12438 skipna: bool_t = True, 

12439 numeric_only: bool_t = False, 

12440 **kwargs, 

12441 ) -> Series | float: 

12442 return self._stat_function( 

12443 "skew", nanops.nanskew, axis, skipna, numeric_only, **kwargs 

12444 ) 

12445 

12446 def kurt( 

12447 self, 

12448 axis: Axis | None = 0, 

12449 skipna: bool_t = True, 

12450 numeric_only: bool_t = False, 

12451 **kwargs, 

12452 ) -> Series | float: 

12453 return self._stat_function( 

12454 "kurt", nanops.nankurt, axis, skipna, numeric_only, **kwargs 

12455 ) 

12456 

12457 kurtosis = kurt 

12458 

12459 @final 

12460 def _min_count_stat_function( 

12461 self, 

12462 name: str, 

12463 func, 

12464 axis: Axis | None | lib.NoDefault = lib.no_default, 

12465 skipna: bool_t = True, 

12466 numeric_only: bool_t = False, 

12467 min_count: int = 0, 

12468 **kwargs, 

12469 ): 

12470 assert name in ["sum", "prod"], name 

12471 nv.validate_func(name, (), kwargs) 

12472 

12473 validate_bool_kwarg(skipna, "skipna", none_allowed=False) 

12474 

12475 if axis is None: 

12476 if self.ndim > 1: 

12477 warnings.warn( 

12478 f"The behavior of {type(self).__name__}.{name} with axis=None " 

12479 "is deprecated, in a future version this will reduce over both " 

12480 "axes and return a scalar. To retain the old behavior, pass " 

12481 "axis=0 (or do not pass axis)", 

12482 FutureWarning, 

12483 stacklevel=find_stack_level(), 

12484 ) 

12485 axis = 0 

12486 elif axis is lib.no_default: 

12487 axis = 0 

12488 

12489 return self._reduce( 

12490 func, 

12491 name=name, 

12492 axis=axis, 

12493 skipna=skipna, 

12494 numeric_only=numeric_only, 

12495 min_count=min_count, 

12496 ) 

12497 

12498 def sum( 

12499 self, 

12500 axis: Axis | None = 0, 

12501 skipna: bool_t = True, 

12502 numeric_only: bool_t = False, 

12503 min_count: int = 0, 

12504 **kwargs, 

12505 ): 

12506 return self._min_count_stat_function( 

12507 "sum", nanops.nansum, axis, skipna, numeric_only, min_count, **kwargs 

12508 ) 

12509 

12510 def prod( 

12511 self, 

12512 axis: Axis | None = 0, 

12513 skipna: bool_t = True, 

12514 numeric_only: bool_t = False, 

12515 min_count: int = 0, 

12516 **kwargs, 

12517 ): 

12518 return self._min_count_stat_function( 

12519 "prod", 

12520 nanops.nanprod, 

12521 axis, 

12522 skipna, 

12523 numeric_only, 

12524 min_count, 

12525 **kwargs, 

12526 ) 

12527 

12528 product = prod 

12529 

12530 @final 

12531 @doc(Rolling) 

12532 def rolling( 

12533 self, 

12534 window: int | dt.timedelta | str | BaseOffset | BaseIndexer, 

12535 min_periods: int | None = None, 

12536 center: bool_t = False, 

12537 win_type: str | None = None, 

12538 on: str | None = None, 

12539 axis: Axis | lib.NoDefault = lib.no_default, 

12540 closed: IntervalClosedType | None = None, 

12541 step: int | None = None, 

12542 method: str = "single", 

12543 ) -> Window | Rolling: 

12544 if axis is not lib.no_default: 

12545 axis = self._get_axis_number(axis) 

12546 name = "rolling" 

12547 if axis == 1: 

12548 warnings.warn( 

12549 f"Support for axis=1 in {type(self).__name__}.{name} is " 

12550 "deprecated and will be removed in a future version. " 

12551 f"Use obj.T.{name}(...) instead", 

12552 FutureWarning, 

12553 stacklevel=find_stack_level(), 

12554 ) 

12555 else: 

12556 warnings.warn( 

12557 f"The 'axis' keyword in {type(self).__name__}.{name} is " 

12558 "deprecated and will be removed in a future version. " 

12559 "Call the method without the axis keyword instead.", 

12560 FutureWarning, 

12561 stacklevel=find_stack_level(), 

12562 ) 

12563 else: 

12564 axis = 0 

12565 

12566 if win_type is not None: 

12567 return Window( 

12568 self, 

12569 window=window, 

12570 min_periods=min_periods, 

12571 center=center, 

12572 win_type=win_type, 

12573 on=on, 

12574 axis=axis, 

12575 closed=closed, 

12576 step=step, 

12577 method=method, 

12578 ) 

12579 

12580 return Rolling( 

12581 self, 

12582 window=window, 

12583 min_periods=min_periods, 

12584 center=center, 

12585 win_type=win_type, 

12586 on=on, 

12587 axis=axis, 

12588 closed=closed, 

12589 step=step, 

12590 method=method, 

12591 ) 

12592 

12593 @final 

12594 @doc(Expanding) 

12595 def expanding( 

12596 self, 

12597 min_periods: int = 1, 

12598 axis: Axis | lib.NoDefault = lib.no_default, 

12599 method: Literal["single", "table"] = "single", 

12600 ) -> Expanding: 

12601 if axis is not lib.no_default: 

12602 axis = self._get_axis_number(axis) 

12603 name = "expanding" 

12604 if axis == 1: 

12605 warnings.warn( 

12606 f"Support for axis=1 in {type(self).__name__}.{name} is " 

12607 "deprecated and will be removed in a future version. " 

12608 f"Use obj.T.{name}(...) instead", 

12609 FutureWarning, 

12610 stacklevel=find_stack_level(), 

12611 ) 

12612 else: 

12613 warnings.warn( 

12614 f"The 'axis' keyword in {type(self).__name__}.{name} is " 

12615 "deprecated and will be removed in a future version. " 

12616 "Call the method without the axis keyword instead.", 

12617 FutureWarning, 

12618 stacklevel=find_stack_level(), 

12619 ) 

12620 else: 

12621 axis = 0 

12622 return Expanding(self, min_periods=min_periods, axis=axis, method=method) 

12623 

12624 @final 

12625 @doc(ExponentialMovingWindow) 

12626 def ewm( 

12627 self, 

12628 com: float | None = None, 

12629 span: float | None = None, 

12630 halflife: float | TimedeltaConvertibleTypes | None = None, 

12631 alpha: float | None = None, 

12632 min_periods: int | None = 0, 

12633 adjust: bool_t = True, 

12634 ignore_na: bool_t = False, 

12635 axis: Axis | lib.NoDefault = lib.no_default, 

12636 times: np.ndarray | DataFrame | Series | None = None, 

12637 method: Literal["single", "table"] = "single", 

12638 ) -> ExponentialMovingWindow: 

12639 if axis is not lib.no_default: 

12640 axis = self._get_axis_number(axis) 

12641 name = "ewm" 

12642 if axis == 1: 

12643 warnings.warn( 

12644 f"Support for axis=1 in {type(self).__name__}.{name} is " 

12645 "deprecated and will be removed in a future version. " 

12646 f"Use obj.T.{name}(...) instead", 

12647 FutureWarning, 

12648 stacklevel=find_stack_level(), 

12649 ) 

12650 else: 

12651 warnings.warn( 

12652 f"The 'axis' keyword in {type(self).__name__}.{name} is " 

12653 "deprecated and will be removed in a future version. " 

12654 "Call the method without the axis keyword instead.", 

12655 FutureWarning, 

12656 stacklevel=find_stack_level(), 

12657 ) 

12658 else: 

12659 axis = 0 

12660 

12661 return ExponentialMovingWindow( 

12662 self, 

12663 com=com, 

12664 span=span, 

12665 halflife=halflife, 

12666 alpha=alpha, 

12667 min_periods=min_periods, 

12668 adjust=adjust, 

12669 ignore_na=ignore_na, 

12670 axis=axis, 

12671 times=times, 

12672 method=method, 

12673 ) 

12674 

12675 # ---------------------------------------------------------------------- 

12676 # Arithmetic Methods 

12677 

12678 @final 

12679 def _inplace_method(self, other, op) -> Self: 

12680 """ 

12681 Wrap arithmetic method to operate inplace. 

12682 """ 

12683 warn = True 

12684 if not PYPY and warn_copy_on_write(): 

12685 if sys.getrefcount(self) <= REF_COUNT + 2: 

12686 # we are probably in an inplace setitem context (e.g. df['a'] += 1) 

12687 warn = False 

12688 

12689 result = op(self, other) 

12690 

12691 if ( 

12692 self.ndim == 1 

12693 and result._indexed_same(self) 

12694 and result.dtype == self.dtype 

12695 and not using_copy_on_write() 

12696 and not (warn_copy_on_write() and not warn) 

12697 ): 

12698 # GH#36498 this inplace op can _actually_ be inplace. 

12699 # Item "ArrayManager" of "Union[ArrayManager, SingleArrayManager, 

12700 # BlockManager, SingleBlockManager]" has no attribute "setitem_inplace" 

12701 self._mgr.setitem_inplace( # type: ignore[union-attr] 

12702 slice(None), result._values, warn=warn 

12703 ) 

12704 return self 

12705 

12706 # Delete cacher 

12707 self._reset_cacher() 

12708 

12709 # this makes sure that we are aligned like the input 

12710 # we are updating inplace so we want to ignore is_copy 

12711 self._update_inplace( 

12712 result.reindex_like(self, copy=False), verify_is_copy=False 

12713 ) 

12714 return self 

12715 

12716 @final 

12717 def __iadd__(self, other) -> Self: 

12718 # error: Unsupported left operand type for + ("Type[NDFrame]") 

12719 return self._inplace_method(other, type(self).__add__) # type: ignore[operator] 

12720 

12721 @final 

12722 def __isub__(self, other) -> Self: 

12723 # error: Unsupported left operand type for - ("Type[NDFrame]") 

12724 return self._inplace_method(other, type(self).__sub__) # type: ignore[operator] 

12725 

12726 @final 

12727 def __imul__(self, other) -> Self: 

12728 # error: Unsupported left operand type for * ("Type[NDFrame]") 

12729 return self._inplace_method(other, type(self).__mul__) # type: ignore[operator] 

12730 

12731 @final 

12732 def __itruediv__(self, other) -> Self: 

12733 # error: Unsupported left operand type for / ("Type[NDFrame]") 

12734 return self._inplace_method( 

12735 other, type(self).__truediv__ # type: ignore[operator] 

12736 ) 

12737 

12738 @final 

12739 def __ifloordiv__(self, other) -> Self: 

12740 # error: Unsupported left operand type for // ("Type[NDFrame]") 

12741 return self._inplace_method( 

12742 other, type(self).__floordiv__ # type: ignore[operator] 

12743 ) 

12744 

12745 @final 

12746 def __imod__(self, other) -> Self: 

12747 # error: Unsupported left operand type for % ("Type[NDFrame]") 

12748 return self._inplace_method(other, type(self).__mod__) # type: ignore[operator] 

12749 

12750 @final 

12751 def __ipow__(self, other) -> Self: 

12752 # error: Unsupported left operand type for ** ("Type[NDFrame]") 

12753 return self._inplace_method(other, type(self).__pow__) # type: ignore[operator] 

12754 

12755 @final 

12756 def __iand__(self, other) -> Self: 

12757 # error: Unsupported left operand type for & ("Type[NDFrame]") 

12758 return self._inplace_method(other, type(self).__and__) # type: ignore[operator] 

12759 

12760 @final 

12761 def __ior__(self, other) -> Self: 

12762 return self._inplace_method(other, type(self).__or__) 

12763 

12764 @final 

12765 def __ixor__(self, other) -> Self: 

12766 # error: Unsupported left operand type for ^ ("Type[NDFrame]") 

12767 return self._inplace_method(other, type(self).__xor__) # type: ignore[operator] 

12768 

12769 # ---------------------------------------------------------------------- 

12770 # Misc methods 

12771 

12772 @final 

12773 def _find_valid_index(self, *, how: str) -> Hashable | None: 

12774 """ 

12775 Retrieves the index of the first valid value. 

12776 

12777 Parameters 

12778 ---------- 

12779 how : {'first', 'last'} 

12780 Use this parameter to change between the first or last valid index. 

12781 

12782 Returns 

12783 ------- 

12784 idx_first_valid : type of index 

12785 """ 

12786 is_valid = self.notna().values 

12787 idxpos = find_valid_index(how=how, is_valid=is_valid) 

12788 if idxpos is None: 

12789 return None 

12790 return self.index[idxpos] 

12791 

12792 @final 

12793 @doc(position="first", klass=_shared_doc_kwargs["klass"]) 

12794 def first_valid_index(self) -> Hashable | None: 

12795 """ 

12796 Return index for {position} non-NA value or None, if no non-NA value is found. 

12797 

12798 Returns 

12799 ------- 

12800 type of index 

12801 

12802 Examples 

12803 -------- 

12804 For Series: 

12805 

12806 >>> s = pd.Series([None, 3, 4]) 

12807 >>> s.first_valid_index() 

12808 1 

12809 >>> s.last_valid_index() 

12810 2 

12811 

12812 >>> s = pd.Series([None, None]) 

12813 >>> print(s.first_valid_index()) 

12814 None 

12815 >>> print(s.last_valid_index()) 

12816 None 

12817 

12818 If all elements in Series are NA/null, returns None. 

12819 

12820 >>> s = pd.Series() 

12821 >>> print(s.first_valid_index()) 

12822 None 

12823 >>> print(s.last_valid_index()) 

12824 None 

12825 

12826 If Series is empty, returns None. 

12827 

12828 For DataFrame: 

12829 

12830 >>> df = pd.DataFrame({{'A': [None, None, 2], 'B': [None, 3, 4]}}) 

12831 >>> df 

12832 A B 

12833 0 NaN NaN 

12834 1 NaN 3.0 

12835 2 2.0 4.0 

12836 >>> df.first_valid_index() 

12837 1 

12838 >>> df.last_valid_index() 

12839 2 

12840 

12841 >>> df = pd.DataFrame({{'A': [None, None, None], 'B': [None, None, None]}}) 

12842 >>> df 

12843 A B 

12844 0 None None 

12845 1 None None 

12846 2 None None 

12847 >>> print(df.first_valid_index()) 

12848 None 

12849 >>> print(df.last_valid_index()) 

12850 None 

12851 

12852 If all elements in DataFrame are NA/null, returns None. 

12853 

12854 >>> df = pd.DataFrame() 

12855 >>> df 

12856 Empty DataFrame 

12857 Columns: [] 

12858 Index: [] 

12859 >>> print(df.first_valid_index()) 

12860 None 

12861 >>> print(df.last_valid_index()) 

12862 None 

12863 

12864 If DataFrame is empty, returns None. 

12865 """ 

12866 return self._find_valid_index(how="first") 

12867 

12868 @final 

12869 @doc(first_valid_index, position="last", klass=_shared_doc_kwargs["klass"]) 

12870 def last_valid_index(self) -> Hashable | None: 

12871 return self._find_valid_index(how="last") 

12872 

12873 

12874_num_doc = """ 

12875{desc} 

12876 

12877Parameters 

12878---------- 

12879axis : {axis_descr} 

12880 Axis for the function to be applied on. 

12881 For `Series` this parameter is unused and defaults to 0. 

12882 

12883 For DataFrames, specifying ``axis=None`` will apply the aggregation 

12884 across both axes. 

12885 

12886 .. versionadded:: 2.0.0 

12887 

12888skipna : bool, default True 

12889 Exclude NA/null values when computing the result. 

12890numeric_only : bool, default False 

12891 Include only float, int, boolean columns. Not implemented for Series. 

12892 

12893{min_count}\ 

12894**kwargs 

12895 Additional keyword arguments to be passed to the function. 

12896 

12897Returns 

12898------- 

12899{name1} or scalar\ 

12900{see_also}\ 

12901{examples} 

12902""" 

12903 

12904_sum_prod_doc = """ 

12905{desc} 

12906 

12907Parameters 

12908---------- 

12909axis : {axis_descr} 

12910 Axis for the function to be applied on. 

12911 For `Series` this parameter is unused and defaults to 0. 

12912 

12913 .. warning:: 

12914 

12915 The behavior of DataFrame.{name} with ``axis=None`` is deprecated, 

12916 in a future version this will reduce over both axes and return a scalar 

12917 To retain the old behavior, pass axis=0 (or do not pass axis). 

12918 

12919 .. versionadded:: 2.0.0 

12920 

12921skipna : bool, default True 

12922 Exclude NA/null values when computing the result. 

12923numeric_only : bool, default False 

12924 Include only float, int, boolean columns. Not implemented for Series. 

12925 

12926{min_count}\ 

12927**kwargs 

12928 Additional keyword arguments to be passed to the function. 

12929 

12930Returns 

12931------- 

12932{name1} or scalar\ 

12933{see_also}\ 

12934{examples} 

12935""" 

12936 

12937_num_ddof_doc = """ 

12938{desc} 

12939 

12940Parameters 

12941---------- 

12942axis : {axis_descr} 

12943 For `Series` this parameter is unused and defaults to 0. 

12944 

12945 .. warning:: 

12946 

12947 The behavior of DataFrame.{name} with ``axis=None`` is deprecated, 

12948 in a future version this will reduce over both axes and return a scalar 

12949 To retain the old behavior, pass axis=0 (or do not pass axis). 

12950 

12951skipna : bool, default True 

12952 Exclude NA/null values. If an entire row/column is NA, the result 

12953 will be NA. 

12954ddof : int, default 1 

12955 Delta Degrees of Freedom. The divisor used in calculations is N - ddof, 

12956 where N represents the number of elements. 

12957numeric_only : bool, default False 

12958 Include only float, int, boolean columns. Not implemented for Series. 

12959 

12960Returns 

12961------- 

12962{name1} or {name2} (if level specified) \ 

12963{notes}\ 

12964{examples} 

12965""" 

12966 

12967_std_notes = """ 

12968 

12969Notes 

12970----- 

12971To have the same behaviour as `numpy.std`, use `ddof=0` (instead of the 

12972default `ddof=1`)""" 

12973 

12974_std_examples = """ 

12975 

12976Examples 

12977-------- 

12978>>> df = pd.DataFrame({'person_id': [0, 1, 2, 3], 

12979... 'age': [21, 25, 62, 43], 

12980... 'height': [1.61, 1.87, 1.49, 2.01]} 

12981... ).set_index('person_id') 

12982>>> df 

12983 age height 

12984person_id 

129850 21 1.61 

129861 25 1.87 

129872 62 1.49 

129883 43 2.01 

12989 

12990The standard deviation of the columns can be found as follows: 

12991 

12992>>> df.std() 

12993age 18.786076 

12994height 0.237417 

12995dtype: float64 

12996 

12997Alternatively, `ddof=0` can be set to normalize by N instead of N-1: 

12998 

12999>>> df.std(ddof=0) 

13000age 16.269219 

13001height 0.205609 

13002dtype: float64""" 

13003 

13004_var_examples = """ 

13005 

13006Examples 

13007-------- 

13008>>> df = pd.DataFrame({'person_id': [0, 1, 2, 3], 

13009... 'age': [21, 25, 62, 43], 

13010... 'height': [1.61, 1.87, 1.49, 2.01]} 

13011... ).set_index('person_id') 

13012>>> df 

13013 age height 

13014person_id 

130150 21 1.61 

130161 25 1.87 

130172 62 1.49 

130183 43 2.01 

13019 

13020>>> df.var() 

13021age 352.916667 

13022height 0.056367 

13023dtype: float64 

13024 

13025Alternatively, ``ddof=0`` can be set to normalize by N instead of N-1: 

13026 

13027>>> df.var(ddof=0) 

13028age 264.687500 

13029height 0.042275 

13030dtype: float64""" 

13031 

13032_bool_doc = """ 

13033{desc} 

13034 

13035Parameters 

13036---------- 

13037axis : {{0 or 'index', 1 or 'columns', None}}, default 0 

13038 Indicate which axis or axes should be reduced. For `Series` this parameter 

13039 is unused and defaults to 0. 

13040 

13041 * 0 / 'index' : reduce the index, return a Series whose index is the 

13042 original column labels. 

13043 * 1 / 'columns' : reduce the columns, return a Series whose index is the 

13044 original index. 

13045 * None : reduce all axes, return a scalar. 

13046 

13047bool_only : bool, default False 

13048 Include only boolean columns. Not implemented for Series. 

13049skipna : bool, default True 

13050 Exclude NA/null values. If the entire row/column is NA and skipna is 

13051 True, then the result will be {empty_value}, as for an empty row/column. 

13052 If skipna is False, then NA are treated as True, because these are not 

13053 equal to zero. 

13054**kwargs : any, default None 

13055 Additional keywords have no effect but might be accepted for 

13056 compatibility with NumPy. 

13057 

13058Returns 

13059------- 

13060{name1} or {name2} 

13061 If level is specified, then, {name2} is returned; otherwise, {name1} 

13062 is returned. 

13063 

13064{see_also} 

13065{examples}""" 

13066 

13067_all_desc = """\ 

13068Return whether all elements are True, potentially over an axis. 

13069 

13070Returns True unless there at least one element within a series or 

13071along a Dataframe axis that is False or equivalent (e.g. zero or 

13072empty).""" 

13073 

13074_all_examples = """\ 

13075Examples 

13076-------- 

13077**Series** 

13078 

13079>>> pd.Series([True, True]).all() 

13080True 

13081>>> pd.Series([True, False]).all() 

13082False 

13083>>> pd.Series([], dtype="float64").all() 

13084True 

13085>>> pd.Series([np.nan]).all() 

13086True 

13087>>> pd.Series([np.nan]).all(skipna=False) 

13088True 

13089 

13090**DataFrames** 

13091 

13092Create a dataframe from a dictionary. 

13093 

13094>>> df = pd.DataFrame({'col1': [True, True], 'col2': [True, False]}) 

13095>>> df 

13096 col1 col2 

130970 True True 

130981 True False 

13099 

13100Default behaviour checks if values in each column all return True. 

13101 

13102>>> df.all() 

13103col1 True 

13104col2 False 

13105dtype: bool 

13106 

13107Specify ``axis='columns'`` to check if values in each row all return True. 

13108 

13109>>> df.all(axis='columns') 

131100 True 

131111 False 

13112dtype: bool 

13113 

13114Or ``axis=None`` for whether every value is True. 

13115 

13116>>> df.all(axis=None) 

13117False 

13118""" 

13119 

13120_all_see_also = """\ 

13121See Also 

13122-------- 

13123Series.all : Return True if all elements are True. 

13124DataFrame.any : Return True if one (or more) elements are True. 

13125""" 

13126 

13127_cnum_doc = """ 

13128Return cumulative {desc} over a DataFrame or Series axis. 

13129 

13130Returns a DataFrame or Series of the same size containing the cumulative 

13131{desc}. 

13132 

13133Parameters 

13134---------- 

13135axis : {{0 or 'index', 1 or 'columns'}}, default 0 

13136 The index or the name of the axis. 0 is equivalent to None or 'index'. 

13137 For `Series` this parameter is unused and defaults to 0. 

13138skipna : bool, default True 

13139 Exclude NA/null values. If an entire row/column is NA, the result 

13140 will be NA. 

13141*args, **kwargs 

13142 Additional keywords have no effect but might be accepted for 

13143 compatibility with NumPy. 

13144 

13145Returns 

13146------- 

13147{name1} or {name2} 

13148 Return cumulative {desc} of {name1} or {name2}. 

13149 

13150See Also 

13151-------- 

13152core.window.expanding.Expanding.{accum_func_name} : Similar functionality 

13153 but ignores ``NaN`` values. 

13154{name2}.{accum_func_name} : Return the {desc} over 

13155 {name2} axis. 

13156{name2}.cummax : Return cumulative maximum over {name2} axis. 

13157{name2}.cummin : Return cumulative minimum over {name2} axis. 

13158{name2}.cumsum : Return cumulative sum over {name2} axis. 

13159{name2}.cumprod : Return cumulative product over {name2} axis. 

13160 

13161{examples}""" 

13162 

13163_cummin_examples = """\ 

13164Examples 

13165-------- 

13166**Series** 

13167 

13168>>> s = pd.Series([2, np.nan, 5, -1, 0]) 

13169>>> s 

131700 2.0 

131711 NaN 

131722 5.0 

131733 -1.0 

131744 0.0 

13175dtype: float64 

13176 

13177By default, NA values are ignored. 

13178 

13179>>> s.cummin() 

131800 2.0 

131811 NaN 

131822 2.0 

131833 -1.0 

131844 -1.0 

13185dtype: float64 

13186 

13187To include NA values in the operation, use ``skipna=False`` 

13188 

13189>>> s.cummin(skipna=False) 

131900 2.0 

131911 NaN 

131922 NaN 

131933 NaN 

131944 NaN 

13195dtype: float64 

13196 

13197**DataFrame** 

13198 

13199>>> df = pd.DataFrame([[2.0, 1.0], 

13200... [3.0, np.nan], 

13201... [1.0, 0.0]], 

13202... columns=list('AB')) 

13203>>> df 

13204 A B 

132050 2.0 1.0 

132061 3.0 NaN 

132072 1.0 0.0 

13208 

13209By default, iterates over rows and finds the minimum 

13210in each column. This is equivalent to ``axis=None`` or ``axis='index'``. 

13211 

13212>>> df.cummin() 

13213 A B 

132140 2.0 1.0 

132151 2.0 NaN 

132162 1.0 0.0 

13217 

13218To iterate over columns and find the minimum in each row, 

13219use ``axis=1`` 

13220 

13221>>> df.cummin(axis=1) 

13222 A B 

132230 2.0 1.0 

132241 3.0 NaN 

132252 1.0 0.0 

13226""" 

13227 

13228_cumsum_examples = """\ 

13229Examples 

13230-------- 

13231**Series** 

13232 

13233>>> s = pd.Series([2, np.nan, 5, -1, 0]) 

13234>>> s 

132350 2.0 

132361 NaN 

132372 5.0 

132383 -1.0 

132394 0.0 

13240dtype: float64 

13241 

13242By default, NA values are ignored. 

13243 

13244>>> s.cumsum() 

132450 2.0 

132461 NaN 

132472 7.0 

132483 6.0 

132494 6.0 

13250dtype: float64 

13251 

13252To include NA values in the operation, use ``skipna=False`` 

13253 

13254>>> s.cumsum(skipna=False) 

132550 2.0 

132561 NaN 

132572 NaN 

132583 NaN 

132594 NaN 

13260dtype: float64 

13261 

13262**DataFrame** 

13263 

13264>>> df = pd.DataFrame([[2.0, 1.0], 

13265... [3.0, np.nan], 

13266... [1.0, 0.0]], 

13267... columns=list('AB')) 

13268>>> df 

13269 A B 

132700 2.0 1.0 

132711 3.0 NaN 

132722 1.0 0.0 

13273 

13274By default, iterates over rows and finds the sum 

13275in each column. This is equivalent to ``axis=None`` or ``axis='index'``. 

13276 

13277>>> df.cumsum() 

13278 A B 

132790 2.0 1.0 

132801 5.0 NaN 

132812 6.0 1.0 

13282 

13283To iterate over columns and find the sum in each row, 

13284use ``axis=1`` 

13285 

13286>>> df.cumsum(axis=1) 

13287 A B 

132880 2.0 3.0 

132891 3.0 NaN 

132902 1.0 1.0 

13291""" 

13292 

13293_cumprod_examples = """\ 

13294Examples 

13295-------- 

13296**Series** 

13297 

13298>>> s = pd.Series([2, np.nan, 5, -1, 0]) 

13299>>> s 

133000 2.0 

133011 NaN 

133022 5.0 

133033 -1.0 

133044 0.0 

13305dtype: float64 

13306 

13307By default, NA values are ignored. 

13308 

13309>>> s.cumprod() 

133100 2.0 

133111 NaN 

133122 10.0 

133133 -10.0 

133144 -0.0 

13315dtype: float64 

13316 

13317To include NA values in the operation, use ``skipna=False`` 

13318 

13319>>> s.cumprod(skipna=False) 

133200 2.0 

133211 NaN 

133222 NaN 

133233 NaN 

133244 NaN 

13325dtype: float64 

13326 

13327**DataFrame** 

13328 

13329>>> df = pd.DataFrame([[2.0, 1.0], 

13330... [3.0, np.nan], 

13331... [1.0, 0.0]], 

13332... columns=list('AB')) 

13333>>> df 

13334 A B 

133350 2.0 1.0 

133361 3.0 NaN 

133372 1.0 0.0 

13338 

13339By default, iterates over rows and finds the product 

13340in each column. This is equivalent to ``axis=None`` or ``axis='index'``. 

13341 

13342>>> df.cumprod() 

13343 A B 

133440 2.0 1.0 

133451 6.0 NaN 

133462 6.0 0.0 

13347 

13348To iterate over columns and find the product in each row, 

13349use ``axis=1`` 

13350 

13351>>> df.cumprod(axis=1) 

13352 A B 

133530 2.0 2.0 

133541 3.0 NaN 

133552 1.0 0.0 

13356""" 

13357 

13358_cummax_examples = """\ 

13359Examples 

13360-------- 

13361**Series** 

13362 

13363>>> s = pd.Series([2, np.nan, 5, -1, 0]) 

13364>>> s 

133650 2.0 

133661 NaN 

133672 5.0 

133683 -1.0 

133694 0.0 

13370dtype: float64 

13371 

13372By default, NA values are ignored. 

13373 

13374>>> s.cummax() 

133750 2.0 

133761 NaN 

133772 5.0 

133783 5.0 

133794 5.0 

13380dtype: float64 

13381 

13382To include NA values in the operation, use ``skipna=False`` 

13383 

13384>>> s.cummax(skipna=False) 

133850 2.0 

133861 NaN 

133872 NaN 

133883 NaN 

133894 NaN 

13390dtype: float64 

13391 

13392**DataFrame** 

13393 

13394>>> df = pd.DataFrame([[2.0, 1.0], 

13395... [3.0, np.nan], 

13396... [1.0, 0.0]], 

13397... columns=list('AB')) 

13398>>> df 

13399 A B 

134000 2.0 1.0 

134011 3.0 NaN 

134022 1.0 0.0 

13403 

13404By default, iterates over rows and finds the maximum 

13405in each column. This is equivalent to ``axis=None`` or ``axis='index'``. 

13406 

13407>>> df.cummax() 

13408 A B 

134090 2.0 1.0 

134101 3.0 NaN 

134112 3.0 1.0 

13412 

13413To iterate over columns and find the maximum in each row, 

13414use ``axis=1`` 

13415 

13416>>> df.cummax(axis=1) 

13417 A B 

134180 2.0 2.0 

134191 3.0 NaN 

134202 1.0 1.0 

13421""" 

13422 

13423_any_see_also = """\ 

13424See Also 

13425-------- 

13426numpy.any : Numpy version of this method. 

13427Series.any : Return whether any element is True. 

13428Series.all : Return whether all elements are True. 

13429DataFrame.any : Return whether any element is True over requested axis. 

13430DataFrame.all : Return whether all elements are True over requested axis. 

13431""" 

13432 

13433_any_desc = """\ 

13434Return whether any element is True, potentially over an axis. 

13435 

13436Returns False unless there is at least one element within a series or 

13437along a Dataframe axis that is True or equivalent (e.g. non-zero or 

13438non-empty).""" 

13439 

13440_any_examples = """\ 

13441Examples 

13442-------- 

13443**Series** 

13444 

13445For Series input, the output is a scalar indicating whether any element 

13446is True. 

13447 

13448>>> pd.Series([False, False]).any() 

13449False 

13450>>> pd.Series([True, False]).any() 

13451True 

13452>>> pd.Series([], dtype="float64").any() 

13453False 

13454>>> pd.Series([np.nan]).any() 

13455False 

13456>>> pd.Series([np.nan]).any(skipna=False) 

13457True 

13458 

13459**DataFrame** 

13460 

13461Whether each column contains at least one True element (the default). 

13462 

13463>>> df = pd.DataFrame({"A": [1, 2], "B": [0, 2], "C": [0, 0]}) 

13464>>> df 

13465 A B C 

134660 1 0 0 

134671 2 2 0 

13468 

13469>>> df.any() 

13470A True 

13471B True 

13472C False 

13473dtype: bool 

13474 

13475Aggregating over the columns. 

13476 

13477>>> df = pd.DataFrame({"A": [True, False], "B": [1, 2]}) 

13478>>> df 

13479 A B 

134800 True 1 

134811 False 2 

13482 

13483>>> df.any(axis='columns') 

134840 True 

134851 True 

13486dtype: bool 

13487 

13488>>> df = pd.DataFrame({"A": [True, False], "B": [1, 0]}) 

13489>>> df 

13490 A B 

134910 True 1 

134921 False 0 

13493 

13494>>> df.any(axis='columns') 

134950 True 

134961 False 

13497dtype: bool 

13498 

13499Aggregating over the entire DataFrame with ``axis=None``. 

13500 

13501>>> df.any(axis=None) 

13502True 

13503 

13504`any` for an empty DataFrame is an empty Series. 

13505 

13506>>> pd.DataFrame([]).any() 

13507Series([], dtype: bool) 

13508""" 

13509 

13510_shared_docs[ 

13511 "stat_func_example" 

13512] = """ 

13513 

13514Examples 

13515-------- 

13516>>> idx = pd.MultiIndex.from_arrays([ 

13517... ['warm', 'warm', 'cold', 'cold'], 

13518... ['dog', 'falcon', 'fish', 'spider']], 

13519... names=['blooded', 'animal']) 

13520>>> s = pd.Series([4, 2, 0, 8], name='legs', index=idx) 

13521>>> s 

13522blooded animal 

13523warm dog 4 

13524 falcon 2 

13525cold fish 0 

13526 spider 8 

13527Name: legs, dtype: int64 

13528 

13529>>> s.{stat_func}() 

13530{default_output}""" 

13531 

13532_sum_examples = _shared_docs["stat_func_example"].format( 

13533 stat_func="sum", verb="Sum", default_output=14, level_output_0=6, level_output_1=8 

13534) 

13535 

13536_sum_examples += """ 

13537 

13538By default, the sum of an empty or all-NA Series is ``0``. 

13539 

13540>>> pd.Series([], dtype="float64").sum() # min_count=0 is the default 

135410.0 

13542 

13543This can be controlled with the ``min_count`` parameter. For example, if 

13544you'd like the sum of an empty series to be NaN, pass ``min_count=1``. 

13545 

13546>>> pd.Series([], dtype="float64").sum(min_count=1) 

13547nan 

13548 

13549Thanks to the ``skipna`` parameter, ``min_count`` handles all-NA and 

13550empty series identically. 

13551 

13552>>> pd.Series([np.nan]).sum() 

135530.0 

13554 

13555>>> pd.Series([np.nan]).sum(min_count=1) 

13556nan""" 

13557 

13558_max_examples: str = _shared_docs["stat_func_example"].format( 

13559 stat_func="max", verb="Max", default_output=8, level_output_0=4, level_output_1=8 

13560) 

13561 

13562_min_examples: str = _shared_docs["stat_func_example"].format( 

13563 stat_func="min", verb="Min", default_output=0, level_output_0=2, level_output_1=0 

13564) 

13565 

13566_stat_func_see_also = """ 

13567 

13568See Also 

13569-------- 

13570Series.sum : Return the sum. 

13571Series.min : Return the minimum. 

13572Series.max : Return the maximum. 

13573Series.idxmin : Return the index of the minimum. 

13574Series.idxmax : Return the index of the maximum. 

13575DataFrame.sum : Return the sum over the requested axis. 

13576DataFrame.min : Return the minimum over the requested axis. 

13577DataFrame.max : Return the maximum over the requested axis. 

13578DataFrame.idxmin : Return the index of the minimum over the requested axis. 

13579DataFrame.idxmax : Return the index of the maximum over the requested axis.""" 

13580 

13581_prod_examples = """ 

13582 

13583Examples 

13584-------- 

13585By default, the product of an empty or all-NA Series is ``1`` 

13586 

13587>>> pd.Series([], dtype="float64").prod() 

135881.0 

13589 

13590This can be controlled with the ``min_count`` parameter 

13591 

13592>>> pd.Series([], dtype="float64").prod(min_count=1) 

13593nan 

13594 

13595Thanks to the ``skipna`` parameter, ``min_count`` handles all-NA and 

13596empty series identically. 

13597 

13598>>> pd.Series([np.nan]).prod() 

135991.0 

13600 

13601>>> pd.Series([np.nan]).prod(min_count=1) 

13602nan""" 

13603 

13604_min_count_stub = """\ 

13605min_count : int, default 0 

13606 The required number of valid values to perform the operation. If fewer than 

13607 ``min_count`` non-NA values are present the result will be NA. 

13608""" 

13609 

13610 

13611def make_doc(name: str, ndim: int) -> str: 

13612 """ 

13613 Generate the docstring for a Series/DataFrame reduction. 

13614 """ 

13615 if ndim == 1: 

13616 name1 = "scalar" 

13617 name2 = "Series" 

13618 axis_descr = "{index (0)}" 

13619 else: 

13620 name1 = "Series" 

13621 name2 = "DataFrame" 

13622 axis_descr = "{index (0), columns (1)}" 

13623 

13624 if name == "any": 

13625 base_doc = _bool_doc 

13626 desc = _any_desc 

13627 see_also = _any_see_also 

13628 examples = _any_examples 

13629 kwargs = {"empty_value": "False"} 

13630 elif name == "all": 

13631 base_doc = _bool_doc 

13632 desc = _all_desc 

13633 see_also = _all_see_also 

13634 examples = _all_examples 

13635 kwargs = {"empty_value": "True"} 

13636 elif name == "min": 

13637 base_doc = _num_doc 

13638 desc = ( 

13639 "Return the minimum of the values over the requested axis.\n\n" 

13640 "If you want the *index* of the minimum, use ``idxmin``. This is " 

13641 "the equivalent of the ``numpy.ndarray`` method ``argmin``." 

13642 ) 

13643 see_also = _stat_func_see_also 

13644 examples = _min_examples 

13645 kwargs = {"min_count": ""} 

13646 elif name == "max": 

13647 base_doc = _num_doc 

13648 desc = ( 

13649 "Return the maximum of the values over the requested axis.\n\n" 

13650 "If you want the *index* of the maximum, use ``idxmax``. This is " 

13651 "the equivalent of the ``numpy.ndarray`` method ``argmax``." 

13652 ) 

13653 see_also = _stat_func_see_also 

13654 examples = _max_examples 

13655 kwargs = {"min_count": ""} 

13656 

13657 elif name == "sum": 

13658 base_doc = _sum_prod_doc 

13659 desc = ( 

13660 "Return the sum of the values over the requested axis.\n\n" 

13661 "This is equivalent to the method ``numpy.sum``." 

13662 ) 

13663 see_also = _stat_func_see_also 

13664 examples = _sum_examples 

13665 kwargs = {"min_count": _min_count_stub} 

13666 

13667 elif name == "prod": 

13668 base_doc = _sum_prod_doc 

13669 desc = "Return the product of the values over the requested axis." 

13670 see_also = _stat_func_see_also 

13671 examples = _prod_examples 

13672 kwargs = {"min_count": _min_count_stub} 

13673 

13674 elif name == "median": 

13675 base_doc = _num_doc 

13676 desc = "Return the median of the values over the requested axis." 

13677 see_also = "" 

13678 examples = """ 

13679 

13680 Examples 

13681 -------- 

13682 >>> s = pd.Series([1, 2, 3]) 

13683 >>> s.median() 

13684 2.0 

13685 

13686 With a DataFrame 

13687 

13688 >>> df = pd.DataFrame({'a': [1, 2], 'b': [2, 3]}, index=['tiger', 'zebra']) 

13689 >>> df 

13690 a b 

13691 tiger 1 2 

13692 zebra 2 3 

13693 >>> df.median() 

13694 a 1.5 

13695 b 2.5 

13696 dtype: float64 

13697 

13698 Using axis=1 

13699 

13700 >>> df.median(axis=1) 

13701 tiger 1.5 

13702 zebra 2.5 

13703 dtype: float64 

13704 

13705 In this case, `numeric_only` should be set to `True` 

13706 to avoid getting an error. 

13707 

13708 >>> df = pd.DataFrame({'a': [1, 2], 'b': ['T', 'Z']}, 

13709 ... index=['tiger', 'zebra']) 

13710 >>> df.median(numeric_only=True) 

13711 a 1.5 

13712 dtype: float64""" 

13713 kwargs = {"min_count": ""} 

13714 

13715 elif name == "mean": 

13716 base_doc = _num_doc 

13717 desc = "Return the mean of the values over the requested axis." 

13718 see_also = "" 

13719 examples = """ 

13720 

13721 Examples 

13722 -------- 

13723 >>> s = pd.Series([1, 2, 3]) 

13724 >>> s.mean() 

13725 2.0 

13726 

13727 With a DataFrame 

13728 

13729 >>> df = pd.DataFrame({'a': [1, 2], 'b': [2, 3]}, index=['tiger', 'zebra']) 

13730 >>> df 

13731 a b 

13732 tiger 1 2 

13733 zebra 2 3 

13734 >>> df.mean() 

13735 a 1.5 

13736 b 2.5 

13737 dtype: float64 

13738 

13739 Using axis=1 

13740 

13741 >>> df.mean(axis=1) 

13742 tiger 1.5 

13743 zebra 2.5 

13744 dtype: float64 

13745 

13746 In this case, `numeric_only` should be set to `True` to avoid 

13747 getting an error. 

13748 

13749 >>> df = pd.DataFrame({'a': [1, 2], 'b': ['T', 'Z']}, 

13750 ... index=['tiger', 'zebra']) 

13751 >>> df.mean(numeric_only=True) 

13752 a 1.5 

13753 dtype: float64""" 

13754 kwargs = {"min_count": ""} 

13755 

13756 elif name == "var": 

13757 base_doc = _num_ddof_doc 

13758 desc = ( 

13759 "Return unbiased variance over requested axis.\n\nNormalized by " 

13760 "N-1 by default. This can be changed using the ddof argument." 

13761 ) 

13762 examples = _var_examples 

13763 see_also = "" 

13764 kwargs = {"notes": ""} 

13765 

13766 elif name == "std": 

13767 base_doc = _num_ddof_doc 

13768 desc = ( 

13769 "Return sample standard deviation over requested axis." 

13770 "\n\nNormalized by N-1 by default. This can be changed using the " 

13771 "ddof argument." 

13772 ) 

13773 examples = _std_examples 

13774 see_also = "" 

13775 kwargs = {"notes": _std_notes} 

13776 

13777 elif name == "sem": 

13778 base_doc = _num_ddof_doc 

13779 desc = ( 

13780 "Return unbiased standard error of the mean over requested " 

13781 "axis.\n\nNormalized by N-1 by default. This can be changed " 

13782 "using the ddof argument" 

13783 ) 

13784 examples = """ 

13785 

13786 Examples 

13787 -------- 

13788 >>> s = pd.Series([1, 2, 3]) 

13789 >>> s.sem().round(6) 

13790 0.57735 

13791 

13792 With a DataFrame 

13793 

13794 >>> df = pd.DataFrame({'a': [1, 2], 'b': [2, 3]}, index=['tiger', 'zebra']) 

13795 >>> df 

13796 a b 

13797 tiger 1 2 

13798 zebra 2 3 

13799 >>> df.sem() 

13800 a 0.5 

13801 b 0.5 

13802 dtype: float64 

13803 

13804 Using axis=1 

13805 

13806 >>> df.sem(axis=1) 

13807 tiger 0.5 

13808 zebra 0.5 

13809 dtype: float64 

13810 

13811 In this case, `numeric_only` should be set to `True` 

13812 to avoid getting an error. 

13813 

13814 >>> df = pd.DataFrame({'a': [1, 2], 'b': ['T', 'Z']}, 

13815 ... index=['tiger', 'zebra']) 

13816 >>> df.sem(numeric_only=True) 

13817 a 0.5 

13818 dtype: float64""" 

13819 see_also = "" 

13820 kwargs = {"notes": ""} 

13821 

13822 elif name == "skew": 

13823 base_doc = _num_doc 

13824 desc = "Return unbiased skew over requested axis.\n\nNormalized by N-1." 

13825 see_also = "" 

13826 examples = """ 

13827 

13828 Examples 

13829 -------- 

13830 >>> s = pd.Series([1, 2, 3]) 

13831 >>> s.skew() 

13832 0.0 

13833 

13834 With a DataFrame 

13835 

13836 >>> df = pd.DataFrame({'a': [1, 2, 3], 'b': [2, 3, 4], 'c': [1, 3, 5]}, 

13837 ... index=['tiger', 'zebra', 'cow']) 

13838 >>> df 

13839 a b c 

13840 tiger 1 2 1 

13841 zebra 2 3 3 

13842 cow 3 4 5 

13843 >>> df.skew() 

13844 a 0.0 

13845 b 0.0 

13846 c 0.0 

13847 dtype: float64 

13848 

13849 Using axis=1 

13850 

13851 >>> df.skew(axis=1) 

13852 tiger 1.732051 

13853 zebra -1.732051 

13854 cow 0.000000 

13855 dtype: float64 

13856 

13857 In this case, `numeric_only` should be set to `True` to avoid 

13858 getting an error. 

13859 

13860 >>> df = pd.DataFrame({'a': [1, 2, 3], 'b': ['T', 'Z', 'X']}, 

13861 ... index=['tiger', 'zebra', 'cow']) 

13862 >>> df.skew(numeric_only=True) 

13863 a 0.0 

13864 dtype: float64""" 

13865 kwargs = {"min_count": ""} 

13866 elif name == "kurt": 

13867 base_doc = _num_doc 

13868 desc = ( 

13869 "Return unbiased kurtosis over requested axis.\n\n" 

13870 "Kurtosis obtained using Fisher's definition of\n" 

13871 "kurtosis (kurtosis of normal == 0.0). Normalized " 

13872 "by N-1." 

13873 ) 

13874 see_also = "" 

13875 examples = """ 

13876 

13877 Examples 

13878 -------- 

13879 >>> s = pd.Series([1, 2, 2, 3], index=['cat', 'dog', 'dog', 'mouse']) 

13880 >>> s 

13881 cat 1 

13882 dog 2 

13883 dog 2 

13884 mouse 3 

13885 dtype: int64 

13886 >>> s.kurt() 

13887 1.5 

13888 

13889 With a DataFrame 

13890 

13891 >>> df = pd.DataFrame({'a': [1, 2, 2, 3], 'b': [3, 4, 4, 4]}, 

13892 ... index=['cat', 'dog', 'dog', 'mouse']) 

13893 >>> df 

13894 a b 

13895 cat 1 3 

13896 dog 2 4 

13897 dog 2 4 

13898 mouse 3 4 

13899 >>> df.kurt() 

13900 a 1.5 

13901 b 4.0 

13902 dtype: float64 

13903 

13904 With axis=None 

13905 

13906 >>> df.kurt(axis=None).round(6) 

13907 -0.988693 

13908 

13909 Using axis=1 

13910 

13911 >>> df = pd.DataFrame({'a': [1, 2], 'b': [3, 4], 'c': [3, 4], 'd': [1, 2]}, 

13912 ... index=['cat', 'dog']) 

13913 >>> df.kurt(axis=1) 

13914 cat -6.0 

13915 dog -6.0 

13916 dtype: float64""" 

13917 kwargs = {"min_count": ""} 

13918 

13919 elif name == "cumsum": 

13920 base_doc = _cnum_doc 

13921 desc = "sum" 

13922 see_also = "" 

13923 examples = _cumsum_examples 

13924 kwargs = {"accum_func_name": "sum"} 

13925 

13926 elif name == "cumprod": 

13927 base_doc = _cnum_doc 

13928 desc = "product" 

13929 see_also = "" 

13930 examples = _cumprod_examples 

13931 kwargs = {"accum_func_name": "prod"} 

13932 

13933 elif name == "cummin": 

13934 base_doc = _cnum_doc 

13935 desc = "minimum" 

13936 see_also = "" 

13937 examples = _cummin_examples 

13938 kwargs = {"accum_func_name": "min"} 

13939 

13940 elif name == "cummax": 

13941 base_doc = _cnum_doc 

13942 desc = "maximum" 

13943 see_also = "" 

13944 examples = _cummax_examples 

13945 kwargs = {"accum_func_name": "max"} 

13946 

13947 else: 

13948 raise NotImplementedError 

13949 

13950 docstr = base_doc.format( 

13951 desc=desc, 

13952 name=name, 

13953 name1=name1, 

13954 name2=name2, 

13955 axis_descr=axis_descr, 

13956 see_also=see_also, 

13957 examples=examples, 

13958 **kwargs, 

13959 ) 

13960 return docstr