Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/pandas/core/arrays/numpy_.py: 39%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

205 statements  

1from __future__ import annotations 

2 

3from typing import ( 

4 TYPE_CHECKING, 

5 Literal, 

6) 

7 

8import numpy as np 

9 

10from pandas._libs import lib 

11from pandas._libs.tslibs import is_supported_dtype 

12from pandas.compat.numpy import function as nv 

13 

14from pandas.core.dtypes.astype import astype_array 

15from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike 

16from pandas.core.dtypes.common import pandas_dtype 

17from pandas.core.dtypes.dtypes import NumpyEADtype 

18from pandas.core.dtypes.missing import isna 

19 

20from pandas.core import ( 

21 arraylike, 

22 missing, 

23 nanops, 

24 ops, 

25) 

26from pandas.core.arraylike import OpsMixin 

27from pandas.core.arrays._mixins import NDArrayBackedExtensionArray 

28from pandas.core.construction import ensure_wrapped_if_datetimelike 

29from pandas.core.strings.object_array import ObjectStringArrayMixin 

30 

31if TYPE_CHECKING: 

32 from pandas._typing import ( 

33 AxisInt, 

34 Dtype, 

35 FillnaOptions, 

36 InterpolateOptions, 

37 NpDtype, 

38 Scalar, 

39 Self, 

40 npt, 

41 ) 

42 

43 from pandas import Index 

44 

45 

46# error: Definition of "_concat_same_type" in base class "NDArrayBacked" is 

47# incompatible with definition in base class "ExtensionArray" 

48class NumpyExtensionArray( # type: ignore[misc] 

49 OpsMixin, 

50 NDArrayBackedExtensionArray, 

51 ObjectStringArrayMixin, 

52): 

53 """ 

54 A pandas ExtensionArray for NumPy data. 

55 

56 This is mostly for internal compatibility, and is not especially 

57 useful on its own. 

58 

59 Parameters 

60 ---------- 

61 values : ndarray 

62 The NumPy ndarray to wrap. Must be 1-dimensional. 

63 copy : bool, default False 

64 Whether to copy `values`. 

65 

66 Attributes 

67 ---------- 

68 None 

69 

70 Methods 

71 ------- 

72 None 

73 

74 Examples 

75 -------- 

76 >>> pd.arrays.NumpyExtensionArray(np.array([0, 1, 2, 3])) 

77 <NumpyExtensionArray> 

78 [0, 1, 2, 3] 

79 Length: 4, dtype: int64 

80 """ 

81 

82 # If you're wondering why pd.Series(cls) doesn't put the array in an 

83 # ExtensionBlock, search for `ABCNumpyExtensionArray`. We check for 

84 # that _typ to ensure that users don't unnecessarily use EAs inside 

85 # pandas internals, which turns off things like block consolidation. 

86 _typ = "npy_extension" 

87 __array_priority__ = 1000 

88 _ndarray: np.ndarray 

89 _dtype: NumpyEADtype 

90 _internal_fill_value = np.nan 

91 

92 # ------------------------------------------------------------------------ 

93 # Constructors 

94 

95 def __init__( 

96 self, values: np.ndarray | NumpyExtensionArray, copy: bool = False 

97 ) -> None: 

98 if isinstance(values, type(self)): 

99 values = values._ndarray 

100 if not isinstance(values, np.ndarray): 

101 raise ValueError( 

102 f"'values' must be a NumPy array, not {type(values).__name__}" 

103 ) 

104 

105 if values.ndim == 0: 

106 # Technically we support 2, but do not advertise that fact. 

107 raise ValueError("NumpyExtensionArray must be 1-dimensional.") 

108 

109 if copy: 

110 values = values.copy() 

111 

112 dtype = NumpyEADtype(values.dtype) 

113 super().__init__(values, dtype) 

114 

115 @classmethod 

116 def _from_sequence( 

117 cls, scalars, *, dtype: Dtype | None = None, copy: bool = False 

118 ) -> NumpyExtensionArray: 

119 if isinstance(dtype, NumpyEADtype): 

120 dtype = dtype._dtype 

121 

122 # error: Argument "dtype" to "asarray" has incompatible type 

123 # "Union[ExtensionDtype, str, dtype[Any], dtype[floating[_64Bit]], Type[object], 

124 # None]"; expected "Union[dtype[Any], None, type, _SupportsDType, str, 

125 # Union[Tuple[Any, int], Tuple[Any, Union[int, Sequence[int]]], List[Any], 

126 # _DTypeDict, Tuple[Any, Any]]]" 

127 result = np.asarray(scalars, dtype=dtype) # type: ignore[arg-type] 

128 if ( 

129 result.ndim > 1 

130 and not hasattr(scalars, "dtype") 

131 and (dtype is None or dtype == object) 

132 ): 

133 # e.g. list-of-tuples 

134 result = construct_1d_object_array_from_listlike(scalars) 

135 

136 if copy and result is scalars: 

137 result = result.copy() 

138 return cls(result) 

139 

140 def _from_backing_data(self, arr: np.ndarray) -> NumpyExtensionArray: 

141 return type(self)(arr) 

142 

143 # ------------------------------------------------------------------------ 

144 # Data 

145 

146 @property 

147 def dtype(self) -> NumpyEADtype: 

148 return self._dtype 

149 

150 # ------------------------------------------------------------------------ 

151 # NumPy Array Interface 

152 

153 def __array__( 

154 self, dtype: NpDtype | None = None, copy: bool | None = None 

155 ) -> np.ndarray: 

156 return np.asarray(self._ndarray, dtype=dtype) 

157 

158 def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs): 

159 # Lightly modified version of 

160 # https://numpy.org/doc/stable/reference/generated/numpy.lib.mixins.NDArrayOperatorsMixin.html 

161 # The primary modification is not boxing scalar return values 

162 # in NumpyExtensionArray, since pandas' ExtensionArrays are 1-d. 

163 out = kwargs.get("out", ()) 

164 

165 result = arraylike.maybe_dispatch_ufunc_to_dunder_op( 

166 self, ufunc, method, *inputs, **kwargs 

167 ) 

168 if result is not NotImplemented: 

169 return result 

170 

171 if "out" in kwargs: 

172 # e.g. test_ufunc_unary 

173 return arraylike.dispatch_ufunc_with_out( 

174 self, ufunc, method, *inputs, **kwargs 

175 ) 

176 

177 if method == "reduce": 

178 result = arraylike.dispatch_reduction_ufunc( 

179 self, ufunc, method, *inputs, **kwargs 

180 ) 

181 if result is not NotImplemented: 

182 # e.g. tests.series.test_ufunc.TestNumpyReductions 

183 return result 

184 

185 # Defer to the implementation of the ufunc on unwrapped values. 

186 inputs = tuple( 

187 x._ndarray if isinstance(x, NumpyExtensionArray) else x for x in inputs 

188 ) 

189 if out: 

190 kwargs["out"] = tuple( 

191 x._ndarray if isinstance(x, NumpyExtensionArray) else x for x in out 

192 ) 

193 result = getattr(ufunc, method)(*inputs, **kwargs) 

194 

195 if ufunc.nout > 1: 

196 # multiple return values; re-box array-like results 

197 return tuple(type(self)(x) for x in result) 

198 elif method == "at": 

199 # no return value 

200 return None 

201 elif method == "reduce": 

202 if isinstance(result, np.ndarray): 

203 # e.g. test_np_reduce_2d 

204 return type(self)(result) 

205 

206 # e.g. test_np_max_nested_tuples 

207 return result 

208 else: 

209 # one return value; re-box array-like results 

210 return type(self)(result) 

211 

212 # ------------------------------------------------------------------------ 

213 # Pandas ExtensionArray Interface 

214 

215 def astype(self, dtype, copy: bool = True): 

216 dtype = pandas_dtype(dtype) 

217 

218 if dtype == self.dtype: 

219 if copy: 

220 return self.copy() 

221 return self 

222 

223 result = astype_array(self._ndarray, dtype=dtype, copy=copy) 

224 return result 

225 

226 def isna(self) -> np.ndarray: 

227 return isna(self._ndarray) 

228 

229 def _validate_scalar(self, fill_value): 

230 if fill_value is None: 

231 # Primarily for subclasses 

232 fill_value = self.dtype.na_value 

233 return fill_value 

234 

235 def _values_for_factorize(self) -> tuple[np.ndarray, float | None]: 

236 if self.dtype.kind in "iub": 

237 fv = None 

238 else: 

239 fv = np.nan 

240 return self._ndarray, fv 

241 

242 # Base EA class (and all other EA classes) don't have limit_area keyword 

243 # This can be removed here as well when the interpolate ffill/bfill method 

244 # deprecation is enforced 

245 def _pad_or_backfill( 

246 self, 

247 *, 

248 method: FillnaOptions, 

249 limit: int | None = None, 

250 limit_area: Literal["inside", "outside"] | None = None, 

251 copy: bool = True, 

252 ) -> Self: 

253 """ 

254 ffill or bfill along axis=0. 

255 """ 

256 if copy: 

257 out_data = self._ndarray.copy() 

258 else: 

259 out_data = self._ndarray 

260 

261 meth = missing.clean_fill_method(method) 

262 missing.pad_or_backfill_inplace( 

263 out_data.T, 

264 method=meth, 

265 axis=0, 

266 limit=limit, 

267 limit_area=limit_area, 

268 ) 

269 

270 if not copy: 

271 return self 

272 return type(self)._simple_new(out_data, dtype=self.dtype) 

273 

274 def interpolate( 

275 self, 

276 *, 

277 method: InterpolateOptions, 

278 axis: int, 

279 index: Index, 

280 limit, 

281 limit_direction, 

282 limit_area, 

283 copy: bool, 

284 **kwargs, 

285 ) -> Self: 

286 """ 

287 See NDFrame.interpolate.__doc__. 

288 """ 

289 # NB: we return type(self) even if copy=False 

290 if not copy: 

291 out_data = self._ndarray 

292 else: 

293 out_data = self._ndarray.copy() 

294 

295 # TODO: assert we have floating dtype? 

296 missing.interpolate_2d_inplace( 

297 out_data, 

298 method=method, 

299 axis=axis, 

300 index=index, 

301 limit=limit, 

302 limit_direction=limit_direction, 

303 limit_area=limit_area, 

304 **kwargs, 

305 ) 

306 if not copy: 

307 return self 

308 return type(self)._simple_new(out_data, dtype=self.dtype) 

309 

310 # ------------------------------------------------------------------------ 

311 # Reductions 

312 

313 def any( 

314 self, 

315 *, 

316 axis: AxisInt | None = None, 

317 out=None, 

318 keepdims: bool = False, 

319 skipna: bool = True, 

320 ): 

321 nv.validate_any((), {"out": out, "keepdims": keepdims}) 

322 result = nanops.nanany(self._ndarray, axis=axis, skipna=skipna) 

323 return self._wrap_reduction_result(axis, result) 

324 

325 def all( 

326 self, 

327 *, 

328 axis: AxisInt | None = None, 

329 out=None, 

330 keepdims: bool = False, 

331 skipna: bool = True, 

332 ): 

333 nv.validate_all((), {"out": out, "keepdims": keepdims}) 

334 result = nanops.nanall(self._ndarray, axis=axis, skipna=skipna) 

335 return self._wrap_reduction_result(axis, result) 

336 

337 def min( 

338 self, *, axis: AxisInt | None = None, skipna: bool = True, **kwargs 

339 ) -> Scalar: 

340 nv.validate_min((), kwargs) 

341 result = nanops.nanmin( 

342 values=self._ndarray, axis=axis, mask=self.isna(), skipna=skipna 

343 ) 

344 return self._wrap_reduction_result(axis, result) 

345 

346 def max( 

347 self, *, axis: AxisInt | None = None, skipna: bool = True, **kwargs 

348 ) -> Scalar: 

349 nv.validate_max((), kwargs) 

350 result = nanops.nanmax( 

351 values=self._ndarray, axis=axis, mask=self.isna(), skipna=skipna 

352 ) 

353 return self._wrap_reduction_result(axis, result) 

354 

355 def sum( 

356 self, 

357 *, 

358 axis: AxisInt | None = None, 

359 skipna: bool = True, 

360 min_count: int = 0, 

361 **kwargs, 

362 ) -> Scalar: 

363 nv.validate_sum((), kwargs) 

364 result = nanops.nansum( 

365 self._ndarray, axis=axis, skipna=skipna, min_count=min_count 

366 ) 

367 return self._wrap_reduction_result(axis, result) 

368 

369 def prod( 

370 self, 

371 *, 

372 axis: AxisInt | None = None, 

373 skipna: bool = True, 

374 min_count: int = 0, 

375 **kwargs, 

376 ) -> Scalar: 

377 nv.validate_prod((), kwargs) 

378 result = nanops.nanprod( 

379 self._ndarray, axis=axis, skipna=skipna, min_count=min_count 

380 ) 

381 return self._wrap_reduction_result(axis, result) 

382 

383 def mean( 

384 self, 

385 *, 

386 axis: AxisInt | None = None, 

387 dtype: NpDtype | None = None, 

388 out=None, 

389 keepdims: bool = False, 

390 skipna: bool = True, 

391 ): 

392 nv.validate_mean((), {"dtype": dtype, "out": out, "keepdims": keepdims}) 

393 result = nanops.nanmean(self._ndarray, axis=axis, skipna=skipna) 

394 return self._wrap_reduction_result(axis, result) 

395 

396 def median( 

397 self, 

398 *, 

399 axis: AxisInt | None = None, 

400 out=None, 

401 overwrite_input: bool = False, 

402 keepdims: bool = False, 

403 skipna: bool = True, 

404 ): 

405 nv.validate_median( 

406 (), {"out": out, "overwrite_input": overwrite_input, "keepdims": keepdims} 

407 ) 

408 result = nanops.nanmedian(self._ndarray, axis=axis, skipna=skipna) 

409 return self._wrap_reduction_result(axis, result) 

410 

411 def std( 

412 self, 

413 *, 

414 axis: AxisInt | None = None, 

415 dtype: NpDtype | None = None, 

416 out=None, 

417 ddof: int = 1, 

418 keepdims: bool = False, 

419 skipna: bool = True, 

420 ): 

421 nv.validate_stat_ddof_func( 

422 (), {"dtype": dtype, "out": out, "keepdims": keepdims}, fname="std" 

423 ) 

424 result = nanops.nanstd(self._ndarray, axis=axis, skipna=skipna, ddof=ddof) 

425 return self._wrap_reduction_result(axis, result) 

426 

427 def var( 

428 self, 

429 *, 

430 axis: AxisInt | None = None, 

431 dtype: NpDtype | None = None, 

432 out=None, 

433 ddof: int = 1, 

434 keepdims: bool = False, 

435 skipna: bool = True, 

436 ): 

437 nv.validate_stat_ddof_func( 

438 (), {"dtype": dtype, "out": out, "keepdims": keepdims}, fname="var" 

439 ) 

440 result = nanops.nanvar(self._ndarray, axis=axis, skipna=skipna, ddof=ddof) 

441 return self._wrap_reduction_result(axis, result) 

442 

443 def sem( 

444 self, 

445 *, 

446 axis: AxisInt | None = None, 

447 dtype: NpDtype | None = None, 

448 out=None, 

449 ddof: int = 1, 

450 keepdims: bool = False, 

451 skipna: bool = True, 

452 ): 

453 nv.validate_stat_ddof_func( 

454 (), {"dtype": dtype, "out": out, "keepdims": keepdims}, fname="sem" 

455 ) 

456 result = nanops.nansem(self._ndarray, axis=axis, skipna=skipna, ddof=ddof) 

457 return self._wrap_reduction_result(axis, result) 

458 

459 def kurt( 

460 self, 

461 *, 

462 axis: AxisInt | None = None, 

463 dtype: NpDtype | None = None, 

464 out=None, 

465 keepdims: bool = False, 

466 skipna: bool = True, 

467 ): 

468 nv.validate_stat_ddof_func( 

469 (), {"dtype": dtype, "out": out, "keepdims": keepdims}, fname="kurt" 

470 ) 

471 result = nanops.nankurt(self._ndarray, axis=axis, skipna=skipna) 

472 return self._wrap_reduction_result(axis, result) 

473 

474 def skew( 

475 self, 

476 *, 

477 axis: AxisInt | None = None, 

478 dtype: NpDtype | None = None, 

479 out=None, 

480 keepdims: bool = False, 

481 skipna: bool = True, 

482 ): 

483 nv.validate_stat_ddof_func( 

484 (), {"dtype": dtype, "out": out, "keepdims": keepdims}, fname="skew" 

485 ) 

486 result = nanops.nanskew(self._ndarray, axis=axis, skipna=skipna) 

487 return self._wrap_reduction_result(axis, result) 

488 

489 # ------------------------------------------------------------------------ 

490 # Additional Methods 

491 

492 def to_numpy( 

493 self, 

494 dtype: npt.DTypeLike | None = None, 

495 copy: bool = False, 

496 na_value: object = lib.no_default, 

497 ) -> np.ndarray: 

498 mask = self.isna() 

499 if na_value is not lib.no_default and mask.any(): 

500 result = self._ndarray.copy() 

501 result[mask] = na_value 

502 else: 

503 result = self._ndarray 

504 

505 result = np.asarray(result, dtype=dtype) 

506 

507 if copy and result is self._ndarray: 

508 result = result.copy() 

509 

510 return result 

511 

512 # ------------------------------------------------------------------------ 

513 # Ops 

514 

515 def __invert__(self) -> NumpyExtensionArray: 

516 return type(self)(~self._ndarray) 

517 

518 def __neg__(self) -> NumpyExtensionArray: 

519 return type(self)(-self._ndarray) 

520 

521 def __pos__(self) -> NumpyExtensionArray: 

522 return type(self)(+self._ndarray) 

523 

524 def __abs__(self) -> NumpyExtensionArray: 

525 return type(self)(abs(self._ndarray)) 

526 

527 def _cmp_method(self, other, op): 

528 if isinstance(other, NumpyExtensionArray): 

529 other = other._ndarray 

530 

531 other = ops.maybe_prepare_scalar_for_op(other, (len(self),)) 

532 pd_op = ops.get_array_op(op) 

533 other = ensure_wrapped_if_datetimelike(other) 

534 result = pd_op(self._ndarray, other) 

535 

536 if op is divmod or op is ops.rdivmod: 

537 a, b = result 

538 if isinstance(a, np.ndarray): 

539 # for e.g. op vs TimedeltaArray, we may already 

540 # have an ExtensionArray, in which case we do not wrap 

541 return self._wrap_ndarray_result(a), self._wrap_ndarray_result(b) 

542 return a, b 

543 

544 if isinstance(result, np.ndarray): 

545 # for e.g. multiplication vs TimedeltaArray, we may already 

546 # have an ExtensionArray, in which case we do not wrap 

547 return self._wrap_ndarray_result(result) 

548 return result 

549 

550 _arith_method = _cmp_method 

551 

552 def _wrap_ndarray_result(self, result: np.ndarray): 

553 # If we have timedelta64[ns] result, return a TimedeltaArray instead 

554 # of a NumpyExtensionArray 

555 if result.dtype.kind == "m" and is_supported_dtype(result.dtype): 

556 from pandas.core.arrays import TimedeltaArray 

557 

558 return TimedeltaArray._simple_new(result, dtype=result.dtype) 

559 return type(self)(result) 

560 

561 # ------------------------------------------------------------------------ 

562 # String methods interface 

563 _str_na_value = np.nan