Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pandas/core/base.py: 39%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

322 statements  

1""" 

2Base and utility classes for pandas objects. 

3""" 

4 

5from __future__ import annotations 

6 

7import textwrap 

8from typing import ( 

9 TYPE_CHECKING, 

10 Any, 

11 Generic, 

12 Hashable, 

13 Iterator, 

14 Literal, 

15 TypeVar, 

16 cast, 

17 final, 

18 overload, 

19) 

20 

21import numpy as np 

22 

23from pandas._config import using_copy_on_write 

24 

25from pandas._libs import lib 

26from pandas._typing import ( 

27 Axis, 

28 AxisInt, 

29 DtypeObj, 

30 IndexLabel, 

31 NDFrameT, 

32 Shape, 

33 npt, 

34) 

35from pandas.compat import PYPY 

36from pandas.compat.numpy import function as nv 

37from pandas.errors import AbstractMethodError 

38from pandas.util._decorators import ( 

39 cache_readonly, 

40 doc, 

41) 

42 

43from pandas.core.dtypes.cast import can_hold_element 

44from pandas.core.dtypes.common import ( 

45 is_categorical_dtype, 

46 is_dict_like, 

47 is_extension_array_dtype, 

48 is_object_dtype, 

49 is_scalar, 

50) 

51from pandas.core.dtypes.generic import ( 

52 ABCDataFrame, 

53 ABCIndex, 

54 ABCSeries, 

55) 

56from pandas.core.dtypes.missing import ( 

57 isna, 

58 remove_na_arraylike, 

59) 

60 

61from pandas.core import ( 

62 algorithms, 

63 nanops, 

64 ops, 

65) 

66from pandas.core.accessor import DirNamesMixin 

67from pandas.core.arraylike import OpsMixin 

68from pandas.core.arrays import ExtensionArray 

69from pandas.core.construction import ( 

70 ensure_wrapped_if_datetimelike, 

71 extract_array, 

72) 

73 

74if TYPE_CHECKING: 

75 from pandas._typing import ( 

76 DropKeep, 

77 NumpySorter, 

78 NumpyValueArrayLike, 

79 ScalarLike_co, 

80 ) 

81 

82 from pandas import ( 

83 Categorical, 

84 Index, 

85 Series, 

86 ) 

87 

88 

89_shared_docs: dict[str, str] = {} 

90_indexops_doc_kwargs = { 

91 "klass": "IndexOpsMixin", 

92 "inplace": "", 

93 "unique": "IndexOpsMixin", 

94 "duplicated": "IndexOpsMixin", 

95} 

96 

97_T = TypeVar("_T", bound="IndexOpsMixin") 

98 

99 

100class PandasObject(DirNamesMixin): 

101 """ 

102 Baseclass for various pandas objects. 

103 """ 

104 

105 # results from calls to methods decorated with cache_readonly get added to _cache 

106 _cache: dict[str, Any] 

107 

108 @property 

109 def _constructor(self): 

110 """ 

111 Class constructor (for this class it's just `__class__`. 

112 """ 

113 return type(self) 

114 

115 def __repr__(self) -> str: 

116 """ 

117 Return a string representation for a particular object. 

118 """ 

119 # Should be overwritten by base classes 

120 return object.__repr__(self) 

121 

122 def _reset_cache(self, key: str | None = None) -> None: 

123 """ 

124 Reset cached properties. If ``key`` is passed, only clears that key. 

125 """ 

126 if not hasattr(self, "_cache"): 

127 return 

128 if key is None: 

129 self._cache.clear() 

130 else: 

131 self._cache.pop(key, None) 

132 

133 def __sizeof__(self) -> int: 

134 """ 

135 Generates the total memory usage for an object that returns 

136 either a value or Series of values 

137 """ 

138 memory_usage = getattr(self, "memory_usage", None) 

139 if memory_usage: 

140 mem = memory_usage(deep=True) # pylint: disable=not-callable 

141 return int(mem if is_scalar(mem) else mem.sum()) 

142 

143 # no memory_usage attribute, so fall back to object's 'sizeof' 

144 return super().__sizeof__() 

145 

146 

147class NoNewAttributesMixin: 

148 """ 

149 Mixin which prevents adding new attributes. 

150 

151 Prevents additional attributes via xxx.attribute = "something" after a 

152 call to `self.__freeze()`. Mainly used to prevent the user from using 

153 wrong attributes on an accessor (`Series.cat/.str/.dt`). 

154 

155 If you really want to add a new attribute at a later time, you need to use 

156 `object.__setattr__(self, key, value)`. 

157 """ 

158 

159 def _freeze(self) -> None: 

160 """ 

161 Prevents setting additional attributes. 

162 """ 

163 object.__setattr__(self, "__frozen", True) 

164 

165 # prevent adding any attribute via s.xxx.new_attribute = ... 

166 def __setattr__(self, key: str, value) -> None: 

167 # _cache is used by a decorator 

168 # We need to check both 1.) cls.__dict__ and 2.) getattr(self, key) 

169 # because 

170 # 1.) getattr is false for attributes that raise errors 

171 # 2.) cls.__dict__ doesn't traverse into base classes 

172 if getattr(self, "__frozen", False) and not ( 

173 key == "_cache" 

174 or key in type(self).__dict__ 

175 or getattr(self, key, None) is not None 

176 ): 

177 raise AttributeError(f"You cannot add any new attribute '{key}'") 

178 object.__setattr__(self, key, value) 

179 

180 

181class SelectionMixin(Generic[NDFrameT]): 

182 """ 

183 mixin implementing the selection & aggregation interface on a group-like 

184 object sub-classes need to define: obj, exclusions 

185 """ 

186 

187 obj: NDFrameT 

188 _selection: IndexLabel | None = None 

189 exclusions: frozenset[Hashable] 

190 _internal_names = ["_cache", "__setstate__"] 

191 _internal_names_set = set(_internal_names) 

192 

193 @final 

194 @property 

195 def _selection_list(self): 

196 if not isinstance( 

197 self._selection, (list, tuple, ABCSeries, ABCIndex, np.ndarray) 

198 ): 

199 return [self._selection] 

200 return self._selection 

201 

202 @cache_readonly 

203 def _selected_obj(self): 

204 if self._selection is None or isinstance(self.obj, ABCSeries): 

205 return self.obj 

206 else: 

207 return self.obj[self._selection] 

208 

209 @final 

210 @cache_readonly 

211 def ndim(self) -> int: 

212 return self._selected_obj.ndim 

213 

214 @final 

215 @cache_readonly 

216 def _obj_with_exclusions(self): 

217 if isinstance(self.obj, ABCSeries): 

218 return self.obj 

219 

220 if self._selection is not None: 

221 return self.obj._getitem_nocopy(self._selection_list) 

222 

223 if len(self.exclusions) > 0: 

224 # equivalent to `self.obj.drop(self.exclusions, axis=1) 

225 # but this avoids consolidating and making a copy 

226 # TODO: following GH#45287 can we now use .drop directly without 

227 # making a copy? 

228 return self.obj._drop_axis(self.exclusions, axis=1, only_slice=True) 

229 else: 

230 return self.obj 

231 

232 def __getitem__(self, key): 

233 if self._selection is not None: 

234 raise IndexError(f"Column(s) {self._selection} already selected") 

235 

236 if isinstance(key, (list, tuple, ABCSeries, ABCIndex, np.ndarray)): 

237 if len(self.obj.columns.intersection(key)) != len(set(key)): 

238 bad_keys = list(set(key).difference(self.obj.columns)) 

239 raise KeyError(f"Columns not found: {str(bad_keys)[1:-1]}") 

240 return self._gotitem(list(key), ndim=2) 

241 

242 else: 

243 if key not in self.obj: 

244 raise KeyError(f"Column not found: {key}") 

245 ndim = self.obj[key].ndim 

246 return self._gotitem(key, ndim=ndim) 

247 

248 def _gotitem(self, key, ndim: int, subset=None): 

249 """ 

250 sub-classes to define 

251 return a sliced object 

252 

253 Parameters 

254 ---------- 

255 key : str / list of selections 

256 ndim : {1, 2} 

257 requested ndim of result 

258 subset : object, default None 

259 subset to act on 

260 """ 

261 raise AbstractMethodError(self) 

262 

263 def aggregate(self, func, *args, **kwargs): 

264 raise AbstractMethodError(self) 

265 

266 agg = aggregate 

267 

268 

269class IndexOpsMixin(OpsMixin): 

270 """ 

271 Common ops mixin to support a unified interface / docs for Series / Index 

272 """ 

273 

274 # ndarray compatibility 

275 __array_priority__ = 1000 

276 _hidden_attrs: frozenset[str] = frozenset( 

277 ["tolist"] # tolist is not deprecated, just suppressed in the __dir__ 

278 ) 

279 

280 @property 

281 def dtype(self) -> DtypeObj: 

282 # must be defined here as a property for mypy 

283 raise AbstractMethodError(self) 

284 

285 @property 

286 def _values(self) -> ExtensionArray | np.ndarray: 

287 # must be defined here as a property for mypy 

288 raise AbstractMethodError(self) 

289 

290 @final 

291 def transpose(self: _T, *args, **kwargs) -> _T: 

292 """ 

293 Return the transpose, which is by definition self. 

294 

295 Returns 

296 ------- 

297 %(klass)s 

298 """ 

299 nv.validate_transpose(args, kwargs) 

300 return self 

301 

302 T = property( 

303 transpose, 

304 doc=""" 

305 Return the transpose, which is by definition self. 

306 """, 

307 ) 

308 

309 @property 

310 def shape(self) -> Shape: 

311 """ 

312 Return a tuple of the shape of the underlying data. 

313 

314 Examples 

315 -------- 

316 >>> s = pd.Series([1, 2, 3]) 

317 >>> s.shape 

318 (3,) 

319 """ 

320 return self._values.shape 

321 

322 def __len__(self) -> int: 

323 # We need this defined here for mypy 

324 raise AbstractMethodError(self) 

325 

326 @property 

327 def ndim(self) -> Literal[1]: 

328 """ 

329 Number of dimensions of the underlying data, by definition 1. 

330 """ 

331 return 1 

332 

333 @final 

334 def item(self): 

335 """ 

336 Return the first element of the underlying data as a Python scalar. 

337 

338 Returns 

339 ------- 

340 scalar 

341 The first element of %(klass)s. 

342 

343 Raises 

344 ------ 

345 ValueError 

346 If the data is not length-1. 

347 """ 

348 if len(self) == 1: 

349 return next(iter(self)) 

350 raise ValueError("can only convert an array of size 1 to a Python scalar") 

351 

352 @property 

353 def nbytes(self) -> int: 

354 """ 

355 Return the number of bytes in the underlying data. 

356 """ 

357 return self._values.nbytes 

358 

359 @property 

360 def size(self) -> int: 

361 """ 

362 Return the number of elements in the underlying data. 

363 """ 

364 return len(self._values) 

365 

366 @property 

367 def array(self) -> ExtensionArray: 

368 """ 

369 The ExtensionArray of the data backing this Series or Index. 

370 

371 Returns 

372 ------- 

373 ExtensionArray 

374 An ExtensionArray of the values stored within. For extension 

375 types, this is the actual array. For NumPy native types, this 

376 is a thin (no copy) wrapper around :class:`numpy.ndarray`. 

377 

378 ``.array`` differs ``.values`` which may require converting the 

379 data to a different form. 

380 

381 See Also 

382 -------- 

383 Index.to_numpy : Similar method that always returns a NumPy array. 

384 Series.to_numpy : Similar method that always returns a NumPy array. 

385 

386 Notes 

387 ----- 

388 This table lays out the different array types for each extension 

389 dtype within pandas. 

390 

391 ================== ============================= 

392 dtype array type 

393 ================== ============================= 

394 category Categorical 

395 period PeriodArray 

396 interval IntervalArray 

397 IntegerNA IntegerArray 

398 string StringArray 

399 boolean BooleanArray 

400 datetime64[ns, tz] DatetimeArray 

401 ================== ============================= 

402 

403 For any 3rd-party extension types, the array type will be an 

404 ExtensionArray. 

405 

406 For all remaining dtypes ``.array`` will be a 

407 :class:`arrays.NumpyExtensionArray` wrapping the actual ndarray 

408 stored within. If you absolutely need a NumPy array (possibly with 

409 copying / coercing data), then use :meth:`Series.to_numpy` instead. 

410 

411 Examples 

412 -------- 

413 For regular NumPy types like int, and float, a PandasArray 

414 is returned. 

415 

416 >>> pd.Series([1, 2, 3]).array 

417 <PandasArray> 

418 [1, 2, 3] 

419 Length: 3, dtype: int64 

420 

421 For extension types, like Categorical, the actual ExtensionArray 

422 is returned 

423 

424 >>> ser = pd.Series(pd.Categorical(['a', 'b', 'a'])) 

425 >>> ser.array 

426 ['a', 'b', 'a'] 

427 Categories (2, object): ['a', 'b'] 

428 """ 

429 raise AbstractMethodError(self) 

430 

431 @final 

432 def to_numpy( 

433 self, 

434 dtype: npt.DTypeLike | None = None, 

435 copy: bool = False, 

436 na_value: object = lib.no_default, 

437 **kwargs, 

438 ) -> np.ndarray: 

439 """ 

440 A NumPy ndarray representing the values in this Series or Index. 

441 

442 Parameters 

443 ---------- 

444 dtype : str or numpy.dtype, optional 

445 The dtype to pass to :meth:`numpy.asarray`. 

446 copy : bool, default False 

447 Whether to ensure that the returned value is not a view on 

448 another array. Note that ``copy=False`` does not *ensure* that 

449 ``to_numpy()`` is no-copy. Rather, ``copy=True`` ensure that 

450 a copy is made, even if not strictly necessary. 

451 na_value : Any, optional 

452 The value to use for missing values. The default value depends 

453 on `dtype` and the type of the array. 

454 **kwargs 

455 Additional keywords passed through to the ``to_numpy`` method 

456 of the underlying array (for extension arrays). 

457 

458 Returns 

459 ------- 

460 numpy.ndarray 

461 

462 See Also 

463 -------- 

464 Series.array : Get the actual data stored within. 

465 Index.array : Get the actual data stored within. 

466 DataFrame.to_numpy : Similar method for DataFrame. 

467 

468 Notes 

469 ----- 

470 The returned array will be the same up to equality (values equal 

471 in `self` will be equal in the returned array; likewise for values 

472 that are not equal). When `self` contains an ExtensionArray, the 

473 dtype may be different. For example, for a category-dtype Series, 

474 ``to_numpy()`` will return a NumPy array and the categorical dtype 

475 will be lost. 

476 

477 For NumPy dtypes, this will be a reference to the actual data stored 

478 in this Series or Index (assuming ``copy=False``). Modifying the result 

479 in place will modify the data stored in the Series or Index (not that 

480 we recommend doing that). 

481 

482 For extension types, ``to_numpy()`` *may* require copying data and 

483 coercing the result to a NumPy type (possibly object), which may be 

484 expensive. When you need a no-copy reference to the underlying data, 

485 :attr:`Series.array` should be used instead. 

486 

487 This table lays out the different dtypes and default return types of 

488 ``to_numpy()`` for various dtypes within pandas. 

489 

490 ================== ================================ 

491 dtype array type 

492 ================== ================================ 

493 category[T] ndarray[T] (same dtype as input) 

494 period ndarray[object] (Periods) 

495 interval ndarray[object] (Intervals) 

496 IntegerNA ndarray[object] 

497 datetime64[ns] datetime64[ns] 

498 datetime64[ns, tz] ndarray[object] (Timestamps) 

499 ================== ================================ 

500 

501 Examples 

502 -------- 

503 >>> ser = pd.Series(pd.Categorical(['a', 'b', 'a'])) 

504 >>> ser.to_numpy() 

505 array(['a', 'b', 'a'], dtype=object) 

506 

507 Specify the `dtype` to control how datetime-aware data is represented. 

508 Use ``dtype=object`` to return an ndarray of pandas :class:`Timestamp` 

509 objects, each with the correct ``tz``. 

510 

511 >>> ser = pd.Series(pd.date_range('2000', periods=2, tz="CET")) 

512 >>> ser.to_numpy(dtype=object) 

513 array([Timestamp('2000-01-01 00:00:00+0100', tz='CET'), 

514 Timestamp('2000-01-02 00:00:00+0100', tz='CET')], 

515 dtype=object) 

516 

517 Or ``dtype='datetime64[ns]'`` to return an ndarray of native 

518 datetime64 values. The values are converted to UTC and the timezone 

519 info is dropped. 

520 

521 >>> ser.to_numpy(dtype="datetime64[ns]") 

522 ... # doctest: +ELLIPSIS 

523 array(['1999-12-31T23:00:00.000000000', '2000-01-01T23:00:00...'], 

524 dtype='datetime64[ns]') 

525 """ 

526 if is_extension_array_dtype(self.dtype): 

527 return self.array.to_numpy(dtype, copy=copy, na_value=na_value, **kwargs) 

528 elif kwargs: 

529 bad_keys = list(kwargs.keys())[0] 

530 raise TypeError( 

531 f"to_numpy() got an unexpected keyword argument '{bad_keys}'" 

532 ) 

533 

534 if na_value is not lib.no_default: 

535 values = self._values 

536 if not can_hold_element(values, na_value): 

537 # if we can't hold the na_value asarray either makes a copy or we 

538 # error before modifying values. The asarray later on thus won't make 

539 # another copy 

540 values = np.asarray(values, dtype=dtype) 

541 else: 

542 values = values.copy() 

543 

544 values[np.asanyarray(self.isna())] = na_value 

545 else: 

546 values = self._values 

547 

548 result = np.asarray(values, dtype=dtype) 

549 

550 if (copy and na_value is lib.no_default) or ( 

551 not copy and using_copy_on_write() 

552 ): 

553 if np.shares_memory(self._values[:2], result[:2]): 

554 # Take slices to improve performance of check 

555 if using_copy_on_write() and not copy: 

556 result = result.view() 

557 result.flags.writeable = False 

558 else: 

559 result = result.copy() 

560 

561 return result 

562 

563 @final 

564 @property 

565 def empty(self) -> bool: 

566 return not self.size 

567 

568 def max(self, axis: AxisInt | None = None, skipna: bool = True, *args, **kwargs): 

569 """ 

570 Return the maximum value of the Index. 

571 

572 Parameters 

573 ---------- 

574 axis : int, optional 

575 For compatibility with NumPy. Only 0 or None are allowed. 

576 skipna : bool, default True 

577 Exclude NA/null values when showing the result. 

578 *args, **kwargs 

579 Additional arguments and keywords for compatibility with NumPy. 

580 

581 Returns 

582 ------- 

583 scalar 

584 Maximum value. 

585 

586 See Also 

587 -------- 

588 Index.min : Return the minimum value in an Index. 

589 Series.max : Return the maximum value in a Series. 

590 DataFrame.max : Return the maximum values in a DataFrame. 

591 

592 Examples 

593 -------- 

594 >>> idx = pd.Index([3, 2, 1]) 

595 >>> idx.max() 

596 3 

597 

598 >>> idx = pd.Index(['c', 'b', 'a']) 

599 >>> idx.max() 

600 'c' 

601 

602 For a MultiIndex, the maximum is determined lexicographically. 

603 

604 >>> idx = pd.MultiIndex.from_product([('a', 'b'), (2, 1)]) 

605 >>> idx.max() 

606 ('b', 2) 

607 """ 

608 nv.validate_minmax_axis(axis) 

609 nv.validate_max(args, kwargs) 

610 return nanops.nanmax(self._values, skipna=skipna) 

611 

612 @doc(op="max", oppose="min", value="largest") 

613 def argmax( 

614 self, axis: AxisInt | None = None, skipna: bool = True, *args, **kwargs 

615 ) -> int: 

616 """ 

617 Return int position of the {value} value in the Series. 

618 

619 If the {op}imum is achieved in multiple locations, 

620 the first row position is returned. 

621 

622 Parameters 

623 ---------- 

624 axis : {{None}} 

625 Unused. Parameter needed for compatibility with DataFrame. 

626 skipna : bool, default True 

627 Exclude NA/null values when showing the result. 

628 *args, **kwargs 

629 Additional arguments and keywords for compatibility with NumPy. 

630 

631 Returns 

632 ------- 

633 int 

634 Row position of the {op}imum value. 

635 

636 See Also 

637 -------- 

638 Series.arg{op} : Return position of the {op}imum value. 

639 Series.arg{oppose} : Return position of the {oppose}imum value. 

640 numpy.ndarray.arg{op} : Equivalent method for numpy arrays. 

641 Series.idxmax : Return index label of the maximum values. 

642 Series.idxmin : Return index label of the minimum values. 

643 

644 Examples 

645 -------- 

646 Consider dataset containing cereal calories 

647 

648 >>> s = pd.Series({{'Corn Flakes': 100.0, 'Almond Delight': 110.0, 

649 ... 'Cinnamon Toast Crunch': 120.0, 'Cocoa Puff': 110.0}}) 

650 >>> s 

651 Corn Flakes 100.0 

652 Almond Delight 110.0 

653 Cinnamon Toast Crunch 120.0 

654 Cocoa Puff 110.0 

655 dtype: float64 

656 

657 >>> s.argmax() 

658 2 

659 >>> s.argmin() 

660 0 

661 

662 The maximum cereal calories is the third element and 

663 the minimum cereal calories is the first element, 

664 since series is zero-indexed. 

665 """ 

666 delegate = self._values 

667 nv.validate_minmax_axis(axis) 

668 skipna = nv.validate_argmax_with_skipna(skipna, args, kwargs) 

669 

670 if isinstance(delegate, ExtensionArray): 

671 if not skipna and delegate.isna().any(): 

672 return -1 

673 else: 

674 return delegate.argmax() 

675 else: 

676 # error: Incompatible return value type (got "Union[int, ndarray]", expected 

677 # "int") 

678 return nanops.nanargmax( # type: ignore[return-value] 

679 delegate, skipna=skipna 

680 ) 

681 

682 def min(self, axis: AxisInt | None = None, skipna: bool = True, *args, **kwargs): 

683 """ 

684 Return the minimum value of the Index. 

685 

686 Parameters 

687 ---------- 

688 axis : {None} 

689 Dummy argument for consistency with Series. 

690 skipna : bool, default True 

691 Exclude NA/null values when showing the result. 

692 *args, **kwargs 

693 Additional arguments and keywords for compatibility with NumPy. 

694 

695 Returns 

696 ------- 

697 scalar 

698 Minimum value. 

699 

700 See Also 

701 -------- 

702 Index.max : Return the maximum value of the object. 

703 Series.min : Return the minimum value in a Series. 

704 DataFrame.min : Return the minimum values in a DataFrame. 

705 

706 Examples 

707 -------- 

708 >>> idx = pd.Index([3, 2, 1]) 

709 >>> idx.min() 

710 1 

711 

712 >>> idx = pd.Index(['c', 'b', 'a']) 

713 >>> idx.min() 

714 'a' 

715 

716 For a MultiIndex, the minimum is determined lexicographically. 

717 

718 >>> idx = pd.MultiIndex.from_product([('a', 'b'), (2, 1)]) 

719 >>> idx.min() 

720 ('a', 1) 

721 """ 

722 nv.validate_minmax_axis(axis) 

723 nv.validate_min(args, kwargs) 

724 return nanops.nanmin(self._values, skipna=skipna) 

725 

726 @doc(argmax, op="min", oppose="max", value="smallest") 

727 def argmin( 

728 self, axis: AxisInt | None = None, skipna: bool = True, *args, **kwargs 

729 ) -> int: 

730 delegate = self._values 

731 nv.validate_minmax_axis(axis) 

732 skipna = nv.validate_argmin_with_skipna(skipna, args, kwargs) 

733 

734 if isinstance(delegate, ExtensionArray): 

735 if not skipna and delegate.isna().any(): 

736 return -1 

737 else: 

738 return delegate.argmin() 

739 else: 

740 # error: Incompatible return value type (got "Union[int, ndarray]", expected 

741 # "int") 

742 return nanops.nanargmin( # type: ignore[return-value] 

743 delegate, skipna=skipna 

744 ) 

745 

746 def tolist(self): 

747 """ 

748 Return a list of the values. 

749 

750 These are each a scalar type, which is a Python scalar 

751 (for str, int, float) or a pandas scalar 

752 (for Timestamp/Timedelta/Interval/Period) 

753 

754 Returns 

755 ------- 

756 list 

757 

758 See Also 

759 -------- 

760 numpy.ndarray.tolist : Return the array as an a.ndim-levels deep 

761 nested list of Python scalars. 

762 """ 

763 return self._values.tolist() 

764 

765 to_list = tolist 

766 

767 def __iter__(self) -> Iterator: 

768 """ 

769 Return an iterator of the values. 

770 

771 These are each a scalar type, which is a Python scalar 

772 (for str, int, float) or a pandas scalar 

773 (for Timestamp/Timedelta/Interval/Period) 

774 

775 Returns 

776 ------- 

777 iterator 

778 """ 

779 # We are explicitly making element iterators. 

780 if not isinstance(self._values, np.ndarray): 

781 # Check type instead of dtype to catch DTA/TDA 

782 return iter(self._values) 

783 else: 

784 return map(self._values.item, range(self._values.size)) 

785 

786 @cache_readonly 

787 def hasnans(self) -> bool: 

788 """ 

789 Return True if there are any NaNs. 

790 

791 Enables various performance speedups. 

792 

793 Returns 

794 ------- 

795 bool 

796 """ 

797 # error: Item "bool" of "Union[bool, ndarray[Any, dtype[bool_]], NDFrame]" 

798 # has no attribute "any" 

799 return bool(isna(self).any()) # type: ignore[union-attr] 

800 

801 def isna(self) -> npt.NDArray[np.bool_]: 

802 return isna(self._values) 

803 

804 def _reduce( 

805 self, 

806 op, 

807 name: str, 

808 *, 

809 axis: Axis = 0, 

810 skipna: bool = True, 

811 numeric_only=None, 

812 filter_type=None, 

813 **kwds, 

814 ): 

815 """ 

816 Perform the reduction type operation if we can. 

817 """ 

818 func = getattr(self, name, None) 

819 if func is None: 

820 raise TypeError( 

821 f"{type(self).__name__} cannot perform the operation {name}" 

822 ) 

823 return func(skipna=skipna, **kwds) 

824 

825 @final 

826 def _map_values(self, mapper, na_action=None): 

827 """ 

828 An internal function that maps values using the input 

829 correspondence (which can be a dict, Series, or function). 

830 

831 Parameters 

832 ---------- 

833 mapper : function, dict, or Series 

834 The input correspondence object 

835 na_action : {None, 'ignore'} 

836 If 'ignore', propagate NA values, without passing them to the 

837 mapping function 

838 

839 Returns 

840 ------- 

841 Union[Index, MultiIndex], inferred 

842 The output of the mapping function applied to the index. 

843 If the function returns a tuple with more than one element 

844 a MultiIndex will be returned. 

845 """ 

846 # we can fastpath dict/Series to an efficient map 

847 # as we know that we are not going to have to yield 

848 # python types 

849 if is_dict_like(mapper): 

850 if isinstance(mapper, dict) and hasattr(mapper, "__missing__"): 

851 # If a dictionary subclass defines a default value method, 

852 # convert mapper to a lookup function (GH #15999). 

853 dict_with_default = mapper 

854 mapper = lambda x: dict_with_default[ 

855 np.nan if isinstance(x, float) and np.isnan(x) else x 

856 ] 

857 else: 

858 # Dictionary does not have a default. Thus it's safe to 

859 # convert to an Series for efficiency. 

860 # we specify the keys here to handle the 

861 # possibility that they are tuples 

862 

863 # The return value of mapping with an empty mapper is 

864 # expected to be pd.Series(np.nan, ...). As np.nan is 

865 # of dtype float64 the return value of this method should 

866 # be float64 as well 

867 from pandas import Series 

868 

869 if len(mapper) == 0: 

870 mapper = Series(mapper, dtype=np.float64) 

871 else: 

872 mapper = Series(mapper) 

873 

874 if isinstance(mapper, ABCSeries): 

875 if na_action not in (None, "ignore"): 

876 msg = ( 

877 "na_action must either be 'ignore' or None, " 

878 f"{na_action} was passed" 

879 ) 

880 raise ValueError(msg) 

881 

882 if na_action == "ignore": 

883 mapper = mapper[mapper.index.notna()] 

884 

885 # Since values were input this means we came from either 

886 # a dict or a series and mapper should be an index 

887 if is_categorical_dtype(self.dtype): 

888 # use the built in categorical series mapper which saves 

889 # time by mapping the categories instead of all values 

890 

891 cat = cast("Categorical", self._values) 

892 return cat.map(mapper) 

893 

894 values = self._values 

895 

896 indexer = mapper.index.get_indexer(values) 

897 new_values = algorithms.take_nd(mapper._values, indexer) 

898 

899 return new_values 

900 

901 # we must convert to python types 

902 if is_extension_array_dtype(self.dtype) and hasattr(self._values, "map"): 

903 # GH#23179 some EAs do not have `map` 

904 values = self._values 

905 if na_action is not None: 

906 raise NotImplementedError 

907 map_f = lambda values, f: values.map(f) 

908 else: 

909 values = self._values.astype(object) 

910 if na_action == "ignore": 

911 map_f = lambda values, f: lib.map_infer_mask( 

912 values, f, isna(values).view(np.uint8) 

913 ) 

914 elif na_action is None: 

915 map_f = lib.map_infer 

916 else: 

917 msg = ( 

918 "na_action must either be 'ignore' or None, " 

919 f"{na_action} was passed" 

920 ) 

921 raise ValueError(msg) 

922 

923 # mapper is a function 

924 new_values = map_f(values, mapper) 

925 

926 return new_values 

927 

928 @final 

929 def value_counts( 

930 self, 

931 normalize: bool = False, 

932 sort: bool = True, 

933 ascending: bool = False, 

934 bins=None, 

935 dropna: bool = True, 

936 ) -> Series: 

937 """ 

938 Return a Series containing counts of unique values. 

939 

940 The resulting object will be in descending order so that the 

941 first element is the most frequently-occurring element. 

942 Excludes NA values by default. 

943 

944 Parameters 

945 ---------- 

946 normalize : bool, default False 

947 If True then the object returned will contain the relative 

948 frequencies of the unique values. 

949 sort : bool, default True 

950 Sort by frequencies. 

951 ascending : bool, default False 

952 Sort in ascending order. 

953 bins : int, optional 

954 Rather than count values, group them into half-open bins, 

955 a convenience for ``pd.cut``, only works with numeric data. 

956 dropna : bool, default True 

957 Don't include counts of NaN. 

958 

959 Returns 

960 ------- 

961 Series 

962 

963 See Also 

964 -------- 

965 Series.count: Number of non-NA elements in a Series. 

966 DataFrame.count: Number of non-NA elements in a DataFrame. 

967 DataFrame.value_counts: Equivalent method on DataFrames. 

968 

969 Examples 

970 -------- 

971 >>> index = pd.Index([3, 1, 2, 3, 4, np.nan]) 

972 >>> index.value_counts() 

973 3.0 2 

974 1.0 1 

975 2.0 1 

976 4.0 1 

977 Name: count, dtype: int64 

978 

979 With `normalize` set to `True`, returns the relative frequency by 

980 dividing all values by the sum of values. 

981 

982 >>> s = pd.Series([3, 1, 2, 3, 4, np.nan]) 

983 >>> s.value_counts(normalize=True) 

984 3.0 0.4 

985 1.0 0.2 

986 2.0 0.2 

987 4.0 0.2 

988 Name: proportion, dtype: float64 

989 

990 **bins** 

991 

992 Bins can be useful for going from a continuous variable to a 

993 categorical variable; instead of counting unique 

994 apparitions of values, divide the index in the specified 

995 number of half-open bins. 

996 

997 >>> s.value_counts(bins=3) 

998 (0.996, 2.0] 2 

999 (2.0, 3.0] 2 

1000 (3.0, 4.0] 1 

1001 Name: count, dtype: int64 

1002 

1003 **dropna** 

1004 

1005 With `dropna` set to `False` we can also see NaN index values. 

1006 

1007 >>> s.value_counts(dropna=False) 

1008 3.0 2 

1009 1.0 1 

1010 2.0 1 

1011 4.0 1 

1012 NaN 1 

1013 Name: count, dtype: int64 

1014 """ 

1015 return algorithms.value_counts( 

1016 self, 

1017 sort=sort, 

1018 ascending=ascending, 

1019 normalize=normalize, 

1020 bins=bins, 

1021 dropna=dropna, 

1022 ) 

1023 

1024 def unique(self): 

1025 values = self._values 

1026 if not isinstance(values, np.ndarray): 

1027 # i.e. ExtensionArray 

1028 result = values.unique() 

1029 else: 

1030 result = algorithms.unique1d(values) 

1031 return result 

1032 

1033 @final 

1034 def nunique(self, dropna: bool = True) -> int: 

1035 """ 

1036 Return number of unique elements in the object. 

1037 

1038 Excludes NA values by default. 

1039 

1040 Parameters 

1041 ---------- 

1042 dropna : bool, default True 

1043 Don't include NaN in the count. 

1044 

1045 Returns 

1046 ------- 

1047 int 

1048 

1049 See Also 

1050 -------- 

1051 DataFrame.nunique: Method nunique for DataFrame. 

1052 Series.count: Count non-NA/null observations in the Series. 

1053 

1054 Examples 

1055 -------- 

1056 >>> s = pd.Series([1, 3, 5, 7, 7]) 

1057 >>> s 

1058 0 1 

1059 1 3 

1060 2 5 

1061 3 7 

1062 4 7 

1063 dtype: int64 

1064 

1065 >>> s.nunique() 

1066 4 

1067 """ 

1068 uniqs = self.unique() 

1069 if dropna: 

1070 uniqs = remove_na_arraylike(uniqs) 

1071 return len(uniqs) 

1072 

1073 @property 

1074 def is_unique(self) -> bool: 

1075 """ 

1076 Return boolean if values in the object are unique. 

1077 

1078 Returns 

1079 ------- 

1080 bool 

1081 """ 

1082 return self.nunique(dropna=False) == len(self) 

1083 

1084 @property 

1085 def is_monotonic_increasing(self) -> bool: 

1086 """ 

1087 Return boolean if values in the object are monotonically increasing. 

1088 

1089 Returns 

1090 ------- 

1091 bool 

1092 """ 

1093 from pandas import Index 

1094 

1095 return Index(self).is_monotonic_increasing 

1096 

1097 @property 

1098 def is_monotonic_decreasing(self) -> bool: 

1099 """ 

1100 Return boolean if values in the object are monotonically decreasing. 

1101 

1102 Returns 

1103 ------- 

1104 bool 

1105 """ 

1106 from pandas import Index 

1107 

1108 return Index(self).is_monotonic_decreasing 

1109 

1110 @final 

1111 def _memory_usage(self, deep: bool = False) -> int: 

1112 """ 

1113 Memory usage of the values. 

1114 

1115 Parameters 

1116 ---------- 

1117 deep : bool, default False 

1118 Introspect the data deeply, interrogate 

1119 `object` dtypes for system-level memory consumption. 

1120 

1121 Returns 

1122 ------- 

1123 bytes used 

1124 

1125 See Also 

1126 -------- 

1127 numpy.ndarray.nbytes : Total bytes consumed by the elements of the 

1128 array. 

1129 

1130 Notes 

1131 ----- 

1132 Memory usage does not include memory consumed by elements that 

1133 are not components of the array if deep=False or if used on PyPy 

1134 """ 

1135 if hasattr(self.array, "memory_usage"): 

1136 return self.array.memory_usage( # pyright: ignore[reportGeneralTypeIssues] 

1137 deep=deep, 

1138 ) 

1139 

1140 v = self.array.nbytes 

1141 if deep and is_object_dtype(self) and not PYPY: 

1142 values = cast(np.ndarray, self._values) 

1143 v += lib.memory_usage_of_objects(values) 

1144 return v 

1145 

1146 @doc( 

1147 algorithms.factorize, 

1148 values="", 

1149 order="", 

1150 size_hint="", 

1151 sort=textwrap.dedent( 

1152 """\ 

1153 sort : bool, default False 

1154 Sort `uniques` and shuffle `codes` to maintain the 

1155 relationship. 

1156 """ 

1157 ), 

1158 ) 

1159 def factorize( 

1160 self, 

1161 sort: bool = False, 

1162 use_na_sentinel: bool = True, 

1163 ) -> tuple[npt.NDArray[np.intp], Index]: 

1164 codes, uniques = algorithms.factorize( 

1165 self._values, sort=sort, use_na_sentinel=use_na_sentinel 

1166 ) 

1167 if uniques.dtype == np.float16: 

1168 uniques = uniques.astype(np.float32) 

1169 

1170 if isinstance(self, ABCIndex): 

1171 # preserve e.g. MultiIndex 

1172 uniques = self._constructor(uniques) 

1173 else: 

1174 from pandas import Index 

1175 

1176 uniques = Index(uniques) 

1177 return codes, uniques 

1178 

1179 _shared_docs[ 

1180 "searchsorted" 

1181 ] = """ 

1182 Find indices where elements should be inserted to maintain order. 

1183 

1184 Find the indices into a sorted {klass} `self` such that, if the 

1185 corresponding elements in `value` were inserted before the indices, 

1186 the order of `self` would be preserved. 

1187 

1188 .. note:: 

1189 

1190 The {klass} *must* be monotonically sorted, otherwise 

1191 wrong locations will likely be returned. Pandas does *not* 

1192 check this for you. 

1193 

1194 Parameters 

1195 ---------- 

1196 value : array-like or scalar 

1197 Values to insert into `self`. 

1198 side : {{'left', 'right'}}, optional 

1199 If 'left', the index of the first suitable location found is given. 

1200 If 'right', return the last such index. If there is no suitable 

1201 index, return either 0 or N (where N is the length of `self`). 

1202 sorter : 1-D array-like, optional 

1203 Optional array of integer indices that sort `self` into ascending 

1204 order. They are typically the result of ``np.argsort``. 

1205 

1206 Returns 

1207 ------- 

1208 int or array of int 

1209 A scalar or array of insertion points with the 

1210 same shape as `value`. 

1211 

1212 See Also 

1213 -------- 

1214 sort_values : Sort by the values along either axis. 

1215 numpy.searchsorted : Similar method from NumPy. 

1216 

1217 Notes 

1218 ----- 

1219 Binary search is used to find the required insertion points. 

1220 

1221 Examples 

1222 -------- 

1223 >>> ser = pd.Series([1, 2, 3]) 

1224 >>> ser 

1225 0 1 

1226 1 2 

1227 2 3 

1228 dtype: int64 

1229 

1230 >>> ser.searchsorted(4) 

1231 3 

1232 

1233 >>> ser.searchsorted([0, 4]) 

1234 array([0, 3]) 

1235 

1236 >>> ser.searchsorted([1, 3], side='left') 

1237 array([0, 2]) 

1238 

1239 >>> ser.searchsorted([1, 3], side='right') 

1240 array([1, 3]) 

1241 

1242 >>> ser = pd.Series(pd.to_datetime(['3/11/2000', '3/12/2000', '3/13/2000'])) 

1243 >>> ser 

1244 0 2000-03-11 

1245 1 2000-03-12 

1246 2 2000-03-13 

1247 dtype: datetime64[ns] 

1248 

1249 >>> ser.searchsorted('3/14/2000') 

1250 3 

1251 

1252 >>> ser = pd.Categorical( 

1253 ... ['apple', 'bread', 'bread', 'cheese', 'milk'], ordered=True 

1254 ... ) 

1255 >>> ser 

1256 ['apple', 'bread', 'bread', 'cheese', 'milk'] 

1257 Categories (4, object): ['apple' < 'bread' < 'cheese' < 'milk'] 

1258 

1259 >>> ser.searchsorted('bread') 

1260 1 

1261 

1262 >>> ser.searchsorted(['bread'], side='right') 

1263 array([3]) 

1264 

1265 If the values are not monotonically sorted, wrong locations 

1266 may be returned: 

1267 

1268 >>> ser = pd.Series([2, 1, 3]) 

1269 >>> ser 

1270 0 2 

1271 1 1 

1272 2 3 

1273 dtype: int64 

1274 

1275 >>> ser.searchsorted(1) # doctest: +SKIP 

1276 0 # wrong result, correct would be 1 

1277 """ 

1278 

1279 # This overload is needed so that the call to searchsorted in 

1280 # pandas.core.resample.TimeGrouper._get_period_bins picks the correct result 

1281 

1282 @overload 

1283 # The following ignore is also present in numpy/__init__.pyi 

1284 # Possibly a mypy bug?? 

1285 # error: Overloaded function signatures 1 and 2 overlap with incompatible 

1286 # return types [misc] 

1287 def searchsorted( # type: ignore[misc] 

1288 self, 

1289 value: ScalarLike_co, 

1290 side: Literal["left", "right"] = ..., 

1291 sorter: NumpySorter = ..., 

1292 ) -> np.intp: 

1293 ... 

1294 

1295 @overload 

1296 def searchsorted( 

1297 self, 

1298 value: npt.ArrayLike | ExtensionArray, 

1299 side: Literal["left", "right"] = ..., 

1300 sorter: NumpySorter = ..., 

1301 ) -> npt.NDArray[np.intp]: 

1302 ... 

1303 

1304 @doc(_shared_docs["searchsorted"], klass="Index") 

1305 def searchsorted( 

1306 self, 

1307 value: NumpyValueArrayLike | ExtensionArray, 

1308 side: Literal["left", "right"] = "left", 

1309 sorter: NumpySorter = None, 

1310 ) -> npt.NDArray[np.intp] | np.intp: 

1311 if isinstance(value, ABCDataFrame): 

1312 msg = ( 

1313 "Value must be 1-D array-like or scalar, " 

1314 f"{type(value).__name__} is not supported" 

1315 ) 

1316 raise ValueError(msg) 

1317 

1318 values = self._values 

1319 if not isinstance(values, np.ndarray): 

1320 # Going through EA.searchsorted directly improves performance GH#38083 

1321 return values.searchsorted(value, side=side, sorter=sorter) 

1322 

1323 return algorithms.searchsorted( 

1324 values, 

1325 value, 

1326 side=side, 

1327 sorter=sorter, 

1328 ) 

1329 

1330 def drop_duplicates(self, *, keep: DropKeep = "first"): 

1331 duplicated = self._duplicated(keep=keep) 

1332 # error: Value of type "IndexOpsMixin" is not indexable 

1333 return self[~duplicated] # type: ignore[index] 

1334 

1335 @final 

1336 def _duplicated(self, keep: DropKeep = "first") -> npt.NDArray[np.bool_]: 

1337 return algorithms.duplicated(self._values, keep=keep) 

1338 

1339 def _arith_method(self, other, op): 

1340 res_name = ops.get_op_result_name(self, other) 

1341 

1342 lvalues = self._values 

1343 rvalues = extract_array(other, extract_numpy=True, extract_range=True) 

1344 rvalues = ops.maybe_prepare_scalar_for_op(rvalues, lvalues.shape) 

1345 rvalues = ensure_wrapped_if_datetimelike(rvalues) 

1346 

1347 with np.errstate(all="ignore"): 

1348 result = ops.arithmetic_op(lvalues, rvalues, op) 

1349 

1350 return self._construct_result(result, name=res_name) 

1351 

1352 def _construct_result(self, result, name): 

1353 """ 

1354 Construct an appropriately-wrapped result from the ArrayLike result 

1355 of an arithmetic-like operation. 

1356 """ 

1357 raise AbstractMethodError(self)