Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pandas/core/arrays/numpy_.py: 30%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

186 statements  

1from __future__ import annotations 

2 

3import numpy as np 

4 

5from pandas._libs import lib 

6from pandas._libs.tslibs import ( 

7 get_unit_from_dtype, 

8 is_supported_unit, 

9) 

10from pandas._typing import ( 

11 AxisInt, 

12 Dtype, 

13 NpDtype, 

14 Scalar, 

15 npt, 

16) 

17from pandas.compat.numpy import function as nv 

18 

19from pandas.core.dtypes.astype import astype_array 

20from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike 

21from pandas.core.dtypes.common import ( 

22 is_dtype_equal, 

23 pandas_dtype, 

24) 

25from pandas.core.dtypes.dtypes import PandasDtype 

26from pandas.core.dtypes.missing import isna 

27 

28from pandas.core import ( 

29 arraylike, 

30 nanops, 

31 ops, 

32) 

33from pandas.core.arraylike import OpsMixin 

34from pandas.core.arrays._mixins import NDArrayBackedExtensionArray 

35from pandas.core.construction import ensure_wrapped_if_datetimelike 

36from pandas.core.strings.object_array import ObjectStringArrayMixin 

37 

38 

39class PandasArray( 

40 OpsMixin, 

41 NDArrayBackedExtensionArray, 

42 ObjectStringArrayMixin, 

43): 

44 """ 

45 A pandas ExtensionArray for NumPy data. 

46 

47 This is mostly for internal compatibility, and is not especially 

48 useful on its own. 

49 

50 Parameters 

51 ---------- 

52 values : ndarray 

53 The NumPy ndarray to wrap. Must be 1-dimensional. 

54 copy : bool, default False 

55 Whether to copy `values`. 

56 

57 Attributes 

58 ---------- 

59 None 

60 

61 Methods 

62 ------- 

63 None 

64 """ 

65 

66 # If you're wondering why pd.Series(cls) doesn't put the array in an 

67 # ExtensionBlock, search for `ABCPandasArray`. We check for 

68 # that _typ to ensure that users don't unnecessarily use EAs inside 

69 # pandas internals, which turns off things like block consolidation. 

70 _typ = "npy_extension" 

71 __array_priority__ = 1000 

72 _ndarray: np.ndarray 

73 _dtype: PandasDtype 

74 _internal_fill_value = np.nan 

75 

76 # ------------------------------------------------------------------------ 

77 # Constructors 

78 

79 def __init__(self, values: np.ndarray | PandasArray, copy: bool = False) -> None: 

80 if isinstance(values, type(self)): 

81 values = values._ndarray 

82 if not isinstance(values, np.ndarray): 

83 raise ValueError( 

84 f"'values' must be a NumPy array, not {type(values).__name__}" 

85 ) 

86 

87 if values.ndim == 0: 

88 # Technically we support 2, but do not advertise that fact. 

89 raise ValueError("PandasArray must be 1-dimensional.") 

90 

91 if copy: 

92 values = values.copy() 

93 

94 dtype = PandasDtype(values.dtype) 

95 super().__init__(values, dtype) 

96 

97 @classmethod 

98 def _from_sequence( 

99 cls, scalars, *, dtype: Dtype | None = None, copy: bool = False 

100 ) -> PandasArray: 

101 if isinstance(dtype, PandasDtype): 

102 dtype = dtype._dtype 

103 

104 # error: Argument "dtype" to "asarray" has incompatible type 

105 # "Union[ExtensionDtype, str, dtype[Any], dtype[floating[_64Bit]], Type[object], 

106 # None]"; expected "Union[dtype[Any], None, type, _SupportsDType, str, 

107 # Union[Tuple[Any, int], Tuple[Any, Union[int, Sequence[int]]], List[Any], 

108 # _DTypeDict, Tuple[Any, Any]]]" 

109 result = np.asarray(scalars, dtype=dtype) # type: ignore[arg-type] 

110 if ( 

111 result.ndim > 1 

112 and not hasattr(scalars, "dtype") 

113 and (dtype is None or dtype == object) 

114 ): 

115 # e.g. list-of-tuples 

116 result = construct_1d_object_array_from_listlike(scalars) 

117 

118 if copy and result is scalars: 

119 result = result.copy() 

120 return cls(result) 

121 

122 def _from_backing_data(self, arr: np.ndarray) -> PandasArray: 

123 return type(self)(arr) 

124 

125 # ------------------------------------------------------------------------ 

126 # Data 

127 

128 @property 

129 def dtype(self) -> PandasDtype: 

130 return self._dtype 

131 

132 # ------------------------------------------------------------------------ 

133 # NumPy Array Interface 

134 

135 def __array__(self, dtype: NpDtype | None = None) -> np.ndarray: 

136 return np.asarray(self._ndarray, dtype=dtype) 

137 

138 def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs): 

139 # Lightly modified version of 

140 # https://numpy.org/doc/stable/reference/generated/numpy.lib.mixins.NDArrayOperatorsMixin.html 

141 # The primary modification is not boxing scalar return values 

142 # in PandasArray, since pandas' ExtensionArrays are 1-d. 

143 out = kwargs.get("out", ()) 

144 

145 result = ops.maybe_dispatch_ufunc_to_dunder_op( 

146 self, ufunc, method, *inputs, **kwargs 

147 ) 

148 if result is not NotImplemented: 

149 return result 

150 

151 if "out" in kwargs: 

152 # e.g. test_ufunc_unary 

153 return arraylike.dispatch_ufunc_with_out( 

154 self, ufunc, method, *inputs, **kwargs 

155 ) 

156 

157 if method == "reduce": 

158 result = arraylike.dispatch_reduction_ufunc( 

159 self, ufunc, method, *inputs, **kwargs 

160 ) 

161 if result is not NotImplemented: 

162 # e.g. tests.series.test_ufunc.TestNumpyReductions 

163 return result 

164 

165 # Defer to the implementation of the ufunc on unwrapped values. 

166 inputs = tuple(x._ndarray if isinstance(x, PandasArray) else x for x in inputs) 

167 if out: 

168 kwargs["out"] = tuple( 

169 x._ndarray if isinstance(x, PandasArray) else x for x in out 

170 ) 

171 result = getattr(ufunc, method)(*inputs, **kwargs) 

172 

173 if ufunc.nout > 1: 

174 # multiple return values; re-box array-like results 

175 return tuple(type(self)(x) for x in result) 

176 elif method == "at": 

177 # no return value 

178 return None 

179 elif method == "reduce": 

180 if isinstance(result, np.ndarray): 

181 # e.g. test_np_reduce_2d 

182 return type(self)(result) 

183 

184 # e.g. test_np_max_nested_tuples 

185 return result 

186 else: 

187 # one return value; re-box array-like results 

188 return type(self)(result) 

189 

190 # ------------------------------------------------------------------------ 

191 # Pandas ExtensionArray Interface 

192 

193 def astype(self, dtype, copy: bool = True): 

194 dtype = pandas_dtype(dtype) 

195 

196 if is_dtype_equal(dtype, self.dtype): 

197 if copy: 

198 return self.copy() 

199 return self 

200 

201 result = astype_array(self._ndarray, dtype=dtype, copy=copy) 

202 return result 

203 

204 def isna(self) -> np.ndarray: 

205 return isna(self._ndarray) 

206 

207 def _validate_scalar(self, fill_value): 

208 if fill_value is None: 

209 # Primarily for subclasses 

210 fill_value = self.dtype.na_value 

211 return fill_value 

212 

213 def _values_for_factorize(self) -> tuple[np.ndarray, float | None]: 

214 if self.dtype.kind in ["i", "u", "b"]: 

215 fv = None 

216 else: 

217 fv = np.nan 

218 return self._ndarray, fv 

219 

220 # ------------------------------------------------------------------------ 

221 # Reductions 

222 

223 def any( 

224 self, 

225 *, 

226 axis: AxisInt | None = None, 

227 out=None, 

228 keepdims: bool = False, 

229 skipna: bool = True, 

230 ): 

231 nv.validate_any((), {"out": out, "keepdims": keepdims}) 

232 result = nanops.nanany(self._ndarray, axis=axis, skipna=skipna) 

233 return self._wrap_reduction_result(axis, result) 

234 

235 def all( 

236 self, 

237 *, 

238 axis: AxisInt | None = None, 

239 out=None, 

240 keepdims: bool = False, 

241 skipna: bool = True, 

242 ): 

243 nv.validate_all((), {"out": out, "keepdims": keepdims}) 

244 result = nanops.nanall(self._ndarray, axis=axis, skipna=skipna) 

245 return self._wrap_reduction_result(axis, result) 

246 

247 def min( 

248 self, *, axis: AxisInt | None = None, skipna: bool = True, **kwargs 

249 ) -> Scalar: 

250 nv.validate_min((), kwargs) 

251 result = nanops.nanmin( 

252 values=self._ndarray, axis=axis, mask=self.isna(), skipna=skipna 

253 ) 

254 return self._wrap_reduction_result(axis, result) 

255 

256 def max( 

257 self, *, axis: AxisInt | None = None, skipna: bool = True, **kwargs 

258 ) -> Scalar: 

259 nv.validate_max((), kwargs) 

260 result = nanops.nanmax( 

261 values=self._ndarray, axis=axis, mask=self.isna(), skipna=skipna 

262 ) 

263 return self._wrap_reduction_result(axis, result) 

264 

265 def sum( 

266 self, 

267 *, 

268 axis: AxisInt | None = None, 

269 skipna: bool = True, 

270 min_count: int = 0, 

271 **kwargs, 

272 ) -> Scalar: 

273 nv.validate_sum((), kwargs) 

274 result = nanops.nansum( 

275 self._ndarray, axis=axis, skipna=skipna, min_count=min_count 

276 ) 

277 return self._wrap_reduction_result(axis, result) 

278 

279 def prod( 

280 self, 

281 *, 

282 axis: AxisInt | None = None, 

283 skipna: bool = True, 

284 min_count: int = 0, 

285 **kwargs, 

286 ) -> Scalar: 

287 nv.validate_prod((), kwargs) 

288 result = nanops.nanprod( 

289 self._ndarray, axis=axis, skipna=skipna, min_count=min_count 

290 ) 

291 return self._wrap_reduction_result(axis, result) 

292 

293 def mean( 

294 self, 

295 *, 

296 axis: AxisInt | None = None, 

297 dtype: NpDtype | None = None, 

298 out=None, 

299 keepdims: bool = False, 

300 skipna: bool = True, 

301 ): 

302 nv.validate_mean((), {"dtype": dtype, "out": out, "keepdims": keepdims}) 

303 result = nanops.nanmean(self._ndarray, axis=axis, skipna=skipna) 

304 return self._wrap_reduction_result(axis, result) 

305 

306 def median( 

307 self, 

308 *, 

309 axis: AxisInt | None = None, 

310 out=None, 

311 overwrite_input: bool = False, 

312 keepdims: bool = False, 

313 skipna: bool = True, 

314 ): 

315 nv.validate_median( 

316 (), {"out": out, "overwrite_input": overwrite_input, "keepdims": keepdims} 

317 ) 

318 result = nanops.nanmedian(self._ndarray, axis=axis, skipna=skipna) 

319 return self._wrap_reduction_result(axis, result) 

320 

321 def std( 

322 self, 

323 *, 

324 axis: AxisInt | None = None, 

325 dtype: NpDtype | None = None, 

326 out=None, 

327 ddof: int = 1, 

328 keepdims: bool = False, 

329 skipna: bool = True, 

330 ): 

331 nv.validate_stat_ddof_func( 

332 (), {"dtype": dtype, "out": out, "keepdims": keepdims}, fname="std" 

333 ) 

334 result = nanops.nanstd(self._ndarray, axis=axis, skipna=skipna, ddof=ddof) 

335 return self._wrap_reduction_result(axis, result) 

336 

337 def var( 

338 self, 

339 *, 

340 axis: AxisInt | None = None, 

341 dtype: NpDtype | None = None, 

342 out=None, 

343 ddof: int = 1, 

344 keepdims: bool = False, 

345 skipna: bool = True, 

346 ): 

347 nv.validate_stat_ddof_func( 

348 (), {"dtype": dtype, "out": out, "keepdims": keepdims}, fname="var" 

349 ) 

350 result = nanops.nanvar(self._ndarray, axis=axis, skipna=skipna, ddof=ddof) 

351 return self._wrap_reduction_result(axis, result) 

352 

353 def sem( 

354 self, 

355 *, 

356 axis: AxisInt | None = None, 

357 dtype: NpDtype | None = None, 

358 out=None, 

359 ddof: int = 1, 

360 keepdims: bool = False, 

361 skipna: bool = True, 

362 ): 

363 nv.validate_stat_ddof_func( 

364 (), {"dtype": dtype, "out": out, "keepdims": keepdims}, fname="sem" 

365 ) 

366 result = nanops.nansem(self._ndarray, axis=axis, skipna=skipna, ddof=ddof) 

367 return self._wrap_reduction_result(axis, result) 

368 

369 def kurt( 

370 self, 

371 *, 

372 axis: AxisInt | None = None, 

373 dtype: NpDtype | None = None, 

374 out=None, 

375 keepdims: bool = False, 

376 skipna: bool = True, 

377 ): 

378 nv.validate_stat_ddof_func( 

379 (), {"dtype": dtype, "out": out, "keepdims": keepdims}, fname="kurt" 

380 ) 

381 result = nanops.nankurt(self._ndarray, axis=axis, skipna=skipna) 

382 return self._wrap_reduction_result(axis, result) 

383 

384 def skew( 

385 self, 

386 *, 

387 axis: AxisInt | None = None, 

388 dtype: NpDtype | None = None, 

389 out=None, 

390 keepdims: bool = False, 

391 skipna: bool = True, 

392 ): 

393 nv.validate_stat_ddof_func( 

394 (), {"dtype": dtype, "out": out, "keepdims": keepdims}, fname="skew" 

395 ) 

396 result = nanops.nanskew(self._ndarray, axis=axis, skipna=skipna) 

397 return self._wrap_reduction_result(axis, result) 

398 

399 # ------------------------------------------------------------------------ 

400 # Additional Methods 

401 

402 def to_numpy( 

403 self, 

404 dtype: npt.DTypeLike | None = None, 

405 copy: bool = False, 

406 na_value: object = lib.no_default, 

407 ) -> np.ndarray: 

408 mask = self.isna() 

409 if na_value is not lib.no_default and mask.any(): 

410 result = self._ndarray.copy() 

411 result[mask] = na_value 

412 else: 

413 result = self._ndarray 

414 

415 result = np.asarray(result, dtype=dtype) 

416 

417 if copy and result is self._ndarray: 

418 result = result.copy() 

419 

420 return result 

421 

422 # ------------------------------------------------------------------------ 

423 # Ops 

424 

425 def __invert__(self) -> PandasArray: 

426 return type(self)(~self._ndarray) 

427 

428 def __neg__(self) -> PandasArray: 

429 return type(self)(-self._ndarray) 

430 

431 def __pos__(self) -> PandasArray: 

432 return type(self)(+self._ndarray) 

433 

434 def __abs__(self) -> PandasArray: 

435 return type(self)(abs(self._ndarray)) 

436 

437 def _cmp_method(self, other, op): 

438 if isinstance(other, PandasArray): 

439 other = other._ndarray 

440 

441 other = ops.maybe_prepare_scalar_for_op(other, (len(self),)) 

442 pd_op = ops.get_array_op(op) 

443 other = ensure_wrapped_if_datetimelike(other) 

444 with np.errstate(all="ignore"): 

445 result = pd_op(self._ndarray, other) 

446 

447 if op is divmod or op is ops.rdivmod: 

448 a, b = result 

449 if isinstance(a, np.ndarray): 

450 # for e.g. op vs TimedeltaArray, we may already 

451 # have an ExtensionArray, in which case we do not wrap 

452 return self._wrap_ndarray_result(a), self._wrap_ndarray_result(b) 

453 return a, b 

454 

455 if isinstance(result, np.ndarray): 

456 # for e.g. multiplication vs TimedeltaArray, we may already 

457 # have an ExtensionArray, in which case we do not wrap 

458 return self._wrap_ndarray_result(result) 

459 return result 

460 

461 _arith_method = _cmp_method 

462 

463 def _wrap_ndarray_result(self, result: np.ndarray): 

464 # If we have timedelta64[ns] result, return a TimedeltaArray instead 

465 # of a PandasArray 

466 if result.dtype.kind == "m" and is_supported_unit( 

467 get_unit_from_dtype(result.dtype) 

468 ): 

469 from pandas.core.arrays import TimedeltaArray 

470 

471 return TimedeltaArray._simple_new(result, dtype=result.dtype) 

472 return type(self)(result) 

473 

474 # ------------------------------------------------------------------------ 

475 # String methods interface 

476 _str_na_value = np.nan