Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pandas/core/arrays/numpy

1from __future__ import annotations

3import numpy as np

5from pandas._libs import lib

6from pandas._libs.tslibs import (

7 get_unit_from_dtype,

8 is_supported_unit,

10from pandas._typing import (

11 AxisInt,

12 Dtype,

13 NpDtype,

14 Scalar,

15 npt,

16)

17from pandas.compat.numpy import function as nv

19from pandas.core.dtypes.astype import astype_array

20from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike

21from pandas.core.dtypes.common import (

22 is_dtype_equal,

23 pandas_dtype,

24)

25from pandas.core.dtypes.dtypes import PandasDtype

26from pandas.core.dtypes.missing import isna

28from pandas.core import (

29 arraylike,

30 nanops,

31 ops,

32)

33from pandas.core.arraylike import OpsMixin

34from pandas.core.arrays._mixins import NDArrayBackedExtensionArray

35from pandas.core.construction import ensure_wrapped_if_datetimelike

36from pandas.core.strings.object_array import ObjectStringArrayMixin

39class PandasArray(

40 OpsMixin,

41 NDArrayBackedExtensionArray,

42 ObjectStringArrayMixin,

43):

44 """

45 A pandas ExtensionArray for NumPy data.

47 This is mostly for internal compatibility, and is not especially

48 useful on its own.

50 Parameters

51 ----------

52 values : ndarray

53 The NumPy ndarray to wrap. Must be 1-dimensional.

54 copy : bool, default False

55 Whether to copy `values`.

57 Attributes

58 ----------

59 None

61 Methods

62 -------

63 None

64 """

66 # If you're wondering why pd.Series(cls) doesn't put the array in an

67 # ExtensionBlock, search for `ABCPandasArray`. We check for

68 # that _typ to ensure that users don't unnecessarily use EAs inside

69 # pandas internals, which turns off things like block consolidation.

70 _typ = "npy_extension"

71 __array_priority__ = 1000

72 _ndarray: np.ndarray

73 _dtype: PandasDtype

74 _internal_fill_value = np.nan

76 # ------------------------------------------------------------------------

77 # Constructors

79 def __init__(self, values: np.ndarray | PandasArray, copy: bool = False) -> None:

80 if isinstance(values, type(self)):

81 values = values._ndarray

82 if not isinstance(values, np.ndarray):

83 raise ValueError(

84 f"'values' must be a NumPy array, not {type(values).__name__}"

85 )

87 if values.ndim == 0:

88 # Technically we support 2, but do not advertise that fact.

89 raise ValueError("PandasArray must be 1-dimensional.")

91 if copy:

92 values = values.copy()

94 dtype = PandasDtype(values.dtype)

95 super().__init__(values, dtype)

97 @classmethod

98 def _from_sequence(

99 cls, scalars, *, dtype: Dtype | None = None, copy: bool = False

100 ) -> PandasArray:

101 if isinstance(dtype, PandasDtype):

102 dtype = dtype._dtype

103

104 # error: Argument "dtype" to "asarray" has incompatible type

105 # "Union[ExtensionDtype, str, dtype[Any], dtype[floating[_64Bit]], Type[object],

106 # None]"; expected "Union[dtype[Any], None, type, _SupportsDType, str,

107 # Union[Tuple[Any, int], Tuple[Any, Union[int, Sequence[int]]], List[Any],

108 # _DTypeDict, Tuple[Any, Any]]]"

109 result = np.asarray(scalars, dtype=dtype) # type: ignore[arg-type]

110 if (

111 result.ndim > 1

112 and not hasattr(scalars, "dtype")

113 and (dtype is None or dtype == object)

114 ):

115 # e.g. list-of-tuples

116 result = construct_1d_object_array_from_listlike(scalars)

117

118 if copy and result is scalars:

119 result = result.copy()

120 return cls(result)

121

122 def _from_backing_data(self, arr: np.ndarray) -> PandasArray:

123 return type(self)(arr)

124

125 # ------------------------------------------------------------------------

126 # Data

127

128 @property

129 def dtype(self) -> PandasDtype:

130 return self._dtype

131

132 # ------------------------------------------------------------------------

133 # NumPy Array Interface

134

135 def __array__(self, dtype: NpDtype | None = None) -> np.ndarray:

136 return np.asarray(self._ndarray, dtype=dtype)

137

138 def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):

139 # Lightly modified version of

140 # https://numpy.org/doc/stable/reference/generated/numpy.lib.mixins.NDArrayOperatorsMixin.html

141 # The primary modification is not boxing scalar return values

142 # in PandasArray, since pandas' ExtensionArrays are 1-d.

143 out = kwargs.get("out", ())

144

145 result = ops.maybe_dispatch_ufunc_to_dunder_op(

146 self, ufunc, method, *inputs, **kwargs

147 )

148 if result is not NotImplemented:

149 return result

150

151 if "out" in kwargs:

152 # e.g. test_ufunc_unary

153 return arraylike.dispatch_ufunc_with_out(

154 self, ufunc, method, *inputs, **kwargs

155 )

156

157 if method == "reduce":

158 result = arraylike.dispatch_reduction_ufunc(

159 self, ufunc, method, *inputs, **kwargs

160 )

161 if result is not NotImplemented:

162 # e.g. tests.series.test_ufunc.TestNumpyReductions

163 return result

164

165 # Defer to the implementation of the ufunc on unwrapped values.

166 inputs = tuple(x._ndarray if isinstance(x, PandasArray) else x for x in inputs)

167 if out:

168 kwargs["out"] = tuple(

169 x._ndarray if isinstance(x, PandasArray) else x for x in out

170 )

171 result = getattr(ufunc, method)(*inputs, **kwargs)

172

173 if ufunc.nout > 1:

174 # multiple return values; re-box array-like results

175 return tuple(type(self)(x) for x in result)

176 elif method == "at":

177 # no return value

178 return None

179 elif method == "reduce":

180 if isinstance(result, np.ndarray):

181 # e.g. test_np_reduce_2d

182 return type(self)(result)

183

184 # e.g. test_np_max_nested_tuples

185 return result

186 else:

187 # one return value; re-box array-like results

188 return type(self)(result)

189

190 # ------------------------------------------------------------------------

191 # Pandas ExtensionArray Interface

192

193 def astype(self, dtype, copy: bool = True):

194 dtype = pandas_dtype(dtype)

195

196 if is_dtype_equal(dtype, self.dtype):

197 if copy:

198 return self.copy()

199 return self

200

201 result = astype_array(self._ndarray, dtype=dtype, copy=copy)

202 return result

203

204 def isna(self) -> np.ndarray:

205 return isna(self._ndarray)

206

207 def _validate_scalar(self, fill_value):

208 if fill_value is None:

209 # Primarily for subclasses

210 fill_value = self.dtype.na_value

211 return fill_value

212

213 def _values_for_factorize(self) -> tuple[np.ndarray, float | None]:

214 if self.dtype.kind in ["i", "u", "b"]:

215 fv = None

216 else:

217 fv = np.nan

218 return self._ndarray, fv

219

220 # ------------------------------------------------------------------------

221 # Reductions

222

223 def any(

224 self,

225 *,

226 axis: AxisInt | None = None,

227 out=None,

228 keepdims: bool = False,

229 skipna: bool = True,

230 ):

231 nv.validate_any((), {"out": out, "keepdims": keepdims})

232 result = nanops.nanany(self._ndarray, axis=axis, skipna=skipna)

233 return self._wrap_reduction_result(axis, result)

234

235 def all(

236 self,

237 *,

238 axis: AxisInt | None = None,

239 out=None,

240 keepdims: bool = False,

241 skipna: bool = True,

242 ):

243 nv.validate_all((), {"out": out, "keepdims": keepdims})

244 result = nanops.nanall(self._ndarray, axis=axis, skipna=skipna)

245 return self._wrap_reduction_result(axis, result)

246

247 def min(

248 self, *, axis: AxisInt | None = None, skipna: bool = True, **kwargs

249 ) -> Scalar:

250 nv.validate_min((), kwargs)

251 result = nanops.nanmin(

252 values=self._ndarray, axis=axis, mask=self.isna(), skipna=skipna

253 )

254 return self._wrap_reduction_result(axis, result)

255

256 def max(

257 self, *, axis: AxisInt | None = None, skipna: bool = True, **kwargs

258 ) -> Scalar:

259 nv.validate_max((), kwargs)

260 result = nanops.nanmax(

261 values=self._ndarray, axis=axis, mask=self.isna(), skipna=skipna

262 )

263 return self._wrap_reduction_result(axis, result)

264

265 def sum(

266 self,

267 *,

268 axis: AxisInt | None = None,

269 skipna: bool = True,

270 min_count: int = 0,

271 **kwargs,

272 ) -> Scalar:

273 nv.validate_sum((), kwargs)

274 result = nanops.nansum(

275 self._ndarray, axis=axis, skipna=skipna, min_count=min_count

276 )

277 return self._wrap_reduction_result(axis, result)

278

279 def prod(

280 self,

281 *,

282 axis: AxisInt | None = None,

283 skipna: bool = True,

284 min_count: int = 0,

285 **kwargs,

286 ) -> Scalar:

287 nv.validate_prod((), kwargs)

288 result = nanops.nanprod(

289 self._ndarray, axis=axis, skipna=skipna, min_count=min_count

290 )

291 return self._wrap_reduction_result(axis, result)

292

293 def mean(

294 self,

295 *,

296 axis: AxisInt | None = None,

297 dtype: NpDtype | None = None,

298 out=None,

299 keepdims: bool = False,

300 skipna: bool = True,

301 ):

302 nv.validate_mean((), {"dtype": dtype, "out": out, "keepdims": keepdims})

303 result = nanops.nanmean(self._ndarray, axis=axis, skipna=skipna)

304 return self._wrap_reduction_result(axis, result)

305

306 def median(

307 self,

308 *,

309 axis: AxisInt | None = None,

310 out=None,

311 overwrite_input: bool = False,

312 keepdims: bool = False,

313 skipna: bool = True,

314 ):

315 nv.validate_median(

316 (), {"out": out, "overwrite_input": overwrite_input, "keepdims": keepdims}

317 )

318 result = nanops.nanmedian(self._ndarray, axis=axis, skipna=skipna)

319 return self._wrap_reduction_result(axis, result)

320

321 def std(

322 self,

323 *,

324 axis: AxisInt | None = None,

325 dtype: NpDtype | None = None,

326 out=None,

327 ddof: int = 1,

328 keepdims: bool = False,

329 skipna: bool = True,

330 ):

331 nv.validate_stat_ddof_func(

332 (), {"dtype": dtype, "out": out, "keepdims": keepdims}, fname="std"

333 )

334 result = nanops.nanstd(self._ndarray, axis=axis, skipna=skipna, ddof=ddof)

335 return self._wrap_reduction_result(axis, result)

336

337 def var(

338 self,

339 *,

340 axis: AxisInt | None = None,

341 dtype: NpDtype | None = None,

342 out=None,

343 ddof: int = 1,

344 keepdims: bool = False,

345 skipna: bool = True,

346 ):

347 nv.validate_stat_ddof_func(

348 (), {"dtype": dtype, "out": out, "keepdims": keepdims}, fname="var"

349 )

350 result = nanops.nanvar(self._ndarray, axis=axis, skipna=skipna, ddof=ddof)

351 return self._wrap_reduction_result(axis, result)

352

353 def sem(

354 self,

355 *,

356 axis: AxisInt | None = None,

357 dtype: NpDtype | None = None,

358 out=None,

359 ddof: int = 1,

360 keepdims: bool = False,

361 skipna: bool = True,

362 ):

363 nv.validate_stat_ddof_func(

364 (), {"dtype": dtype, "out": out, "keepdims": keepdims}, fname="sem"

365 )

366 result = nanops.nansem(self._ndarray, axis=axis, skipna=skipna, ddof=ddof)

367 return self._wrap_reduction_result(axis, result)

368

369 def kurt(

370 self,

371 *,

372 axis: AxisInt | None = None,

373 dtype: NpDtype | None = None,

374 out=None,

375 keepdims: bool = False,

376 skipna: bool = True,

377 ):

378 nv.validate_stat_ddof_func(

379 (), {"dtype": dtype, "out": out, "keepdims": keepdims}, fname="kurt"

380 )

381 result = nanops.nankurt(self._ndarray, axis=axis, skipna=skipna)

382 return self._wrap_reduction_result(axis, result)

383

384 def skew(

385 self,

386 *,

387 axis: AxisInt | None = None,

388 dtype: NpDtype | None = None,

389 out=None,

390 keepdims: bool = False,

391 skipna: bool = True,

392 ):

393 nv.validate_stat_ddof_func(

394 (), {"dtype": dtype, "out": out, "keepdims": keepdims}, fname="skew"

395 )

396 result = nanops.nanskew(self._ndarray, axis=axis, skipna=skipna)

397 return self._wrap_reduction_result(axis, result)

398

399 # ------------------------------------------------------------------------

400 # Additional Methods

401

402 def to_numpy(

403 self,

404 dtype: npt.DTypeLike | None = None,

405 copy: bool = False,

406 na_value: object = lib.no_default,

407 ) -> np.ndarray:

408 mask = self.isna()

409 if na_value is not lib.no_default and mask.any():

410 result = self._ndarray.copy()

411 result[mask] = na_value

412 else:

413 result = self._ndarray

414

415 result = np.asarray(result, dtype=dtype)

416

417 if copy and result is self._ndarray:

418 result = result.copy()

419

420 return result

421

422 # ------------------------------------------------------------------------

423 # Ops

424

425 def __invert__(self) -> PandasArray:

426 return type(self)(~self._ndarray)

427

428 def __neg__(self) -> PandasArray:

429 return type(self)(-self._ndarray)

430

431 def __pos__(self) -> PandasArray:

432 return type(self)(+self._ndarray)

433

434 def __abs__(self) -> PandasArray:

435 return type(self)(abs(self._ndarray))

436

437 def _cmp_method(self, other, op):

438 if isinstance(other, PandasArray):

439 other = other._ndarray

440

441 other = ops.maybe_prepare_scalar_for_op(other, (len(self),))

442 pd_op = ops.get_array_op(op)

443 other = ensure_wrapped_if_datetimelike(other)

444 with np.errstate(all="ignore"):

445 result = pd_op(self._ndarray, other)

446

447 if op is divmod or op is ops.rdivmod:

448 a, b = result

449 if isinstance(a, np.ndarray):

450 # for e.g. op vs TimedeltaArray, we may already

451 # have an ExtensionArray, in which case we do not wrap

452 return self._wrap_ndarray_result(a), self._wrap_ndarray_result(b)

453 return a, b

454

455 if isinstance(result, np.ndarray):

456 # for e.g. multiplication vs TimedeltaArray, we may already

457 # have an ExtensionArray, in which case we do not wrap

458 return self._wrap_ndarray_result(result)

459 return result

460

461 _arith_method = _cmp_method

462

463 def _wrap_ndarray_result(self, result: np.ndarray):

464 # If we have timedelta64[ns] result, return a TimedeltaArray instead

465 # of a PandasArray

466 if result.dtype.kind == "m" and is_supported_unit(

467 get_unit_from_dtype(result.dtype)

468 ):

469 from pandas.core.arrays import TimedeltaArray

470

471 return TimedeltaArray._simple_new(result, dtype=result.dtype)

472 return type(self)(result)

473

474 # ------------------------------------------------------------------------

475 # String methods interface

476 _str_na_value = np.nan

Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pandas/core/arrays/numpy_.py: 30%

186 statements