Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/pandas/core/internals/array

1"""

2Experimental manager based on storing a collection of 1D arrays

3"""

4from __future__ import annotations

6import itertools

7from typing import (

8 TYPE_CHECKING,

9 Callable,

10 Literal,

11)

13import numpy as np

15from pandas._libs import (

16 NaT,

17 lib,

18)

20from pandas.core.dtypes.astype import (

21 astype_array,

22 astype_array_safe,

23)

24from pandas.core.dtypes.cast import (

25 ensure_dtype_can_hold_na,

26 find_common_type,

27 infer_dtype_from_scalar,

28 np_find_common_type,

29)

30from pandas.core.dtypes.common import (

31 ensure_platform_int,

32 is_datetime64_ns_dtype,

33 is_integer,

34 is_numeric_dtype,

35 is_object_dtype,

36 is_timedelta64_ns_dtype,

37)

38from pandas.core.dtypes.dtypes import ExtensionDtype

39from pandas.core.dtypes.generic import (

40 ABCDataFrame,

41 ABCSeries,

42)

43from pandas.core.dtypes.missing import (

44 array_equals,

45 isna,

46 na_value_for_dtype,

47)

49import pandas.core.algorithms as algos

50from pandas.core.array_algos.quantile import quantile_compat

51from pandas.core.array_algos.take import take_1d

52from pandas.core.arrays import (

53 DatetimeArray,

54 ExtensionArray,

55 NumpyExtensionArray,

56 TimedeltaArray,

57)

58from pandas.core.construction import (

59 ensure_wrapped_if_datetimelike,

60 extract_array,

61 sanitize_array,

62)

63from pandas.core.indexers import (

64 maybe_convert_indices,

65 validate_indices,

66)

67from pandas.core.indexes.api import (

68 Index,

69 ensure_index,

70)

71from pandas.core.indexes.base import get_values_for_csv

72from pandas.core.internals.base import (

73 DataManager,

74 SingleDataManager,

75 ensure_np_dtype,

76 interleaved_dtype,

77)

78from pandas.core.internals.blocks import (

79 BlockPlacement,

80 ensure_block_shape,

81 external_values,

82 extract_pandas_array,

83 maybe_coerce_values,

84 new_block,

85)

86from pandas.core.internals.managers import make_na_array

88if TYPE_CHECKING:

89 from collections.abc import Hashable

91 from pandas._typing import (

92 ArrayLike,

93 AxisInt,

94 DtypeObj,

95 QuantileInterpolation,

96 Self,

97 npt,

98 )

100

101class BaseArrayManager(DataManager):

102 """

103 Core internal data structure to implement DataFrame and Series.

104

105 Alternative to the BlockManager, storing a list of 1D arrays instead of

106 Blocks.

107

108 This is *not* a public API class

109

110 Parameters

111 ----------

112 arrays : Sequence of arrays

113 axes : Sequence of Index

114 verify_integrity : bool, default True

115

116 """

117

118 __slots__ = [

119 "_axes", # private attribute, because 'axes' has different order, see below

120 "arrays",

121 ]

122

123 arrays: list[np.ndarray | ExtensionArray]

124 _axes: list[Index]

125

126 def __init__(

127 self,

128 arrays: list[np.ndarray | ExtensionArray],

129 axes: list[Index],

130 verify_integrity: bool = True,

131 ) -> None:

132 raise NotImplementedError

133

134 def make_empty(self, axes=None) -> Self:

135 """Return an empty ArrayManager with the items axis of len 0 (no columns)"""

136 if axes is None:

137 axes = [self.axes[1:], Index([])]

138

139 arrays: list[np.ndarray | ExtensionArray] = []

140 return type(self)(arrays, axes)

141

142 @property

143 def items(self) -> Index:

144 return self._axes[-1]

145

146 @property

147 # error: Signature of "axes" incompatible with supertype "DataManager"

148 def axes(self) -> list[Index]: # type: ignore[override]

149 # mypy doesn't work to override attribute with property

150 # see https://github.com/python/mypy/issues/4125

151 """Axes is BlockManager-compatible order (columns, rows)"""

152 return [self._axes[1], self._axes[0]]

153

154 @property

155 def shape_proper(self) -> tuple[int, ...]:

156 # this returns (n_rows, n_columns)

157 return tuple(len(ax) for ax in self._axes)

158

159 @staticmethod

160 def _normalize_axis(axis: AxisInt) -> int:

161 # switch axis

162 axis = 1 if axis == 0 else 0

163 return axis

164

165 def set_axis(self, axis: AxisInt, new_labels: Index) -> None:

166 # Caller is responsible for ensuring we have an Index object.

167 self._validate_set_axis(axis, new_labels)

168 axis = self._normalize_axis(axis)

169 self._axes[axis] = new_labels

170

171 def get_dtypes(self) -> npt.NDArray[np.object_]:

172 return np.array([arr.dtype for arr in self.arrays], dtype="object")

173

174 def add_references(self, mgr: BaseArrayManager) -> None:

175 """

176 Only implemented on the BlockManager level

177 """

178 return

179

180 def __getstate__(self):

181 return self.arrays, self._axes

182

183 def __setstate__(self, state) -> None:

184 self.arrays = state[0]

185 self._axes = state[1]

186

187 def __repr__(self) -> str:

188 output = type(self).__name__

189 output += f"\nIndex: {self._axes[0]}"

190 if self.ndim == 2:

191 output += f"\nColumns: {self._axes[1]}"

192 output += f"\n{len(self.arrays)} arrays:"

193 for arr in self.arrays:

194 output += f"\n{arr.dtype}"

195 return output

196

197 def apply(

198 self,

199 f,

200 align_keys: list[str] | None = None,

201 **kwargs,

202 ) -> Self:

203 """

204 Iterate over the arrays, collect and create a new ArrayManager.

205

206 Parameters

207 ----------

208 f : str or callable

209 Name of the Array method to apply.

210 align_keys: List[str] or None, default None

211 **kwargs

212 Keywords to pass to `f`

213

214 Returns

215 -------

216 ArrayManager

217 """

218 assert "filter" not in kwargs

219

220 align_keys = align_keys or []

221 result_arrays: list[ArrayLike] = []

222 # fillna: Series/DataFrame is responsible for making sure value is aligned

223

224 aligned_args = {k: kwargs[k] for k in align_keys}

225

226 if f == "apply":

227 f = kwargs.pop("func")

228

229 for i, arr in enumerate(self.arrays):

230 if aligned_args:

231 for k, obj in aligned_args.items():

232 if isinstance(obj, (ABCSeries, ABCDataFrame)):

233 # The caller is responsible for ensuring that

234 # obj.axes[-1].equals(self.items)

235 if obj.ndim == 1:

236 kwargs[k] = obj.iloc[i]

237 else:

238 kwargs[k] = obj.iloc[:, i]._values

239 else:

240 # otherwise we have an array-like

241 kwargs[k] = obj[i]

242

243 if callable(f):

244 applied = f(arr, **kwargs)

245 else:

246 applied = getattr(arr, f)(**kwargs)

247

248 result_arrays.append(applied)

249

250 new_axes = self._axes

251 return type(self)(result_arrays, new_axes)

252

253 def apply_with_block(self, f, align_keys=None, **kwargs) -> Self:

254 # switch axis to follow BlockManager logic

255 swap_axis = True

256 if f == "interpolate":

257 swap_axis = False

258 if swap_axis and "axis" in kwargs and self.ndim == 2:

259 kwargs["axis"] = 1 if kwargs["axis"] == 0 else 0

260

261 align_keys = align_keys or []

262 aligned_args = {k: kwargs[k] for k in align_keys}

263

264 result_arrays = []

265

266 for i, arr in enumerate(self.arrays):

267 if aligned_args:

268 for k, obj in aligned_args.items():

269 if isinstance(obj, (ABCSeries, ABCDataFrame)):

270 # The caller is responsible for ensuring that

271 # obj.axes[-1].equals(self.items)

272 if obj.ndim == 1:

273 if self.ndim == 2:

274 kwargs[k] = obj.iloc[slice(i, i + 1)]._values

275 else:

276 kwargs[k] = obj.iloc[:]._values

277 else:

278 kwargs[k] = obj.iloc[:, [i]]._values

279 else:

280 # otherwise we have an ndarray

281 if obj.ndim == 2:

282 kwargs[k] = obj[[i]]

283

284 if isinstance(arr.dtype, np.dtype) and not isinstance(arr, np.ndarray):

285 # i.e. TimedeltaArray, DatetimeArray with tz=None. Need to

286 # convert for the Block constructors.

287 arr = np.asarray(arr)

288

289 arr = maybe_coerce_values(arr)

290 if self.ndim == 2:

291 arr = ensure_block_shape(arr, 2)

292 bp = BlockPlacement(slice(0, 1, 1))

293 block = new_block(arr, placement=bp, ndim=2)

294 else:

295 bp = BlockPlacement(slice(0, len(self), 1))

296 block = new_block(arr, placement=bp, ndim=1)

297

298 applied = getattr(block, f)(**kwargs)

299 if isinstance(applied, list):

300 applied = applied[0]

301 arr = applied.values

302 if self.ndim == 2 and arr.ndim == 2:

303 # 2D for np.ndarray or DatetimeArray/TimedeltaArray

304 assert len(arr) == 1

305 # error: No overload variant of "__getitem__" of "ExtensionArray"

306 # matches argument type "Tuple[int, slice]"

307 arr = arr[0, :] # type: ignore[call-overload]

308 result_arrays.append(arr)

309

310 return type(self)(result_arrays, self._axes)

311

312 def setitem(self, indexer, value, warn: bool = True) -> Self:

313 return self.apply_with_block("setitem", indexer=indexer, value=value)

314

315 def diff(self, n: int) -> Self:

316 assert self.ndim == 2 # caller ensures

317 return self.apply(algos.diff, n=n)

318

319 def astype(self, dtype, copy: bool | None = False, errors: str = "raise") -> Self:

320 if copy is None:

321 copy = True

322

323 return self.apply(astype_array_safe, dtype=dtype, copy=copy, errors=errors)

324

325 def convert(self, copy: bool | None) -> Self:

326 if copy is None:

327 copy = True

328

329 def _convert(arr):

330 if is_object_dtype(arr.dtype):

331 # extract NumpyExtensionArray for tests that patch

332 # NumpyExtensionArray._typ

333 arr = np.asarray(arr)

334 result = lib.maybe_convert_objects(

335 arr,

336 convert_non_numeric=True,

337 )

338 if result is arr and copy:

339 return arr.copy()

340 return result

341 else:

342 return arr.copy() if copy else arr

343

344 return self.apply(_convert)

345

346 def get_values_for_csv(

347 self, *, float_format, date_format, decimal, na_rep: str = "nan", quoting=None

348 ) -> Self:

349 return self.apply(

350 get_values_for_csv,

351 na_rep=na_rep,

352 quoting=quoting,

353 float_format=float_format,

354 date_format=date_format,

355 decimal=decimal,

356 )

357

358 @property

359 def any_extension_types(self) -> bool:

360 """Whether any of the blocks in this manager are extension blocks"""

361 return False # any(block.is_extension for block in self.blocks)

362

363 @property

364 def is_view(self) -> bool:

365 """return a boolean if we are a single block and are a view"""

366 # TODO what is this used for?

367 return False

368

369 @property

370 def is_single_block(self) -> bool:

371 return len(self.arrays) == 1

372

373 def _get_data_subset(self, predicate: Callable) -> Self:

374 indices = [i for i, arr in enumerate(self.arrays) if predicate(arr)]

375 arrays = [self.arrays[i] for i in indices]

376 # TODO copy?

377 # Note: using Index.take ensures we can retain e.g. DatetimeIndex.freq,

378 # see test_describe_datetime_columns

379 taker = np.array(indices, dtype="intp")

380 new_cols = self._axes[1].take(taker)

381 new_axes = [self._axes[0], new_cols]

382 return type(self)(arrays, new_axes, verify_integrity=False)

383

384 def get_bool_data(self, copy: bool = False) -> Self:

385 """

386 Select columns that are bool-dtype and object-dtype columns that are all-bool.

387

388 Parameters

389 ----------

390 copy : bool, default False

391 Whether to copy the blocks

392 """

393 return self._get_data_subset(lambda x: x.dtype == np.dtype(bool))

394

395 def get_numeric_data(self, copy: bool = False) -> Self:

396 """

397 Select columns that have a numeric dtype.

398

399 Parameters

400 ----------

401 copy : bool, default False

402 Whether to copy the blocks

403 """

404 return self._get_data_subset(

405 lambda arr: is_numeric_dtype(arr.dtype)

406 or getattr(arr.dtype, "_is_numeric", False)

407 )

408

409 def copy(self, deep: bool | Literal["all"] | None = True) -> Self:

410 """

411 Make deep or shallow copy of ArrayManager

412

413 Parameters

414 ----------

415 deep : bool or string, default True

416 If False, return shallow copy (do not copy data)

417 If 'all', copy data and a deep copy of the index

418

419 Returns

420 -------

421 BlockManager

422 """

423 if deep is None:

424 # ArrayManager does not yet support CoW, so deep=None always means

425 # deep=True for now

426 deep = True

427

428 # this preserves the notion of view copying of axes

429 if deep:

430 # hit in e.g. tests.io.json.test_pandas

431

432 def copy_func(ax):

433 return ax.copy(deep=True) if deep == "all" else ax.view()

434

435 new_axes = [copy_func(ax) for ax in self._axes]

436 else:

437 new_axes = list(self._axes)

438

439 if deep:

440 new_arrays = [arr.copy() for arr in self.arrays]

441 else:

442 new_arrays = list(self.arrays)

443 return type(self)(new_arrays, new_axes, verify_integrity=False)

444

445 def reindex_indexer(

446 self,

447 new_axis,

448 indexer,

449 axis: AxisInt,

450 fill_value=None,

451 allow_dups: bool = False,

452 copy: bool | None = True,

453 # ignored keywords

454 only_slice: bool = False,

455 # ArrayManager specific keywords

456 use_na_proxy: bool = False,

457 ) -> Self:

458 axis = self._normalize_axis(axis)

459 return self._reindex_indexer(

460 new_axis,

461 indexer,

462 axis,

463 fill_value,

464 allow_dups,

465 copy,

466 use_na_proxy,

467 )

468

469 def _reindex_indexer(

470 self,

471 new_axis,

472 indexer: npt.NDArray[np.intp] | None,

473 axis: AxisInt,

474 fill_value=None,

475 allow_dups: bool = False,

476 copy: bool | None = True,

477 use_na_proxy: bool = False,

478 ) -> Self:

479 """

480 Parameters

481 ----------

482 new_axis : Index

483 indexer : ndarray[intp] or None

484 axis : int

485 fill_value : object, default None

486 allow_dups : bool, default False

487 copy : bool, default True

488

489

490 pandas-indexer with -1's only.

491 """

492 if copy is None:

493 # ArrayManager does not yet support CoW, so deep=None always means

494 # deep=True for now

495 copy = True

496

497 if indexer is None:

498 if new_axis is self._axes[axis] and not copy:

499 return self

500

501 result = self.copy(deep=copy)

502 result._axes = list(self._axes)

503 result._axes[axis] = new_axis

504 return result

505

506 # some axes don't allow reindexing with dups

507 if not allow_dups:

508 self._axes[axis]._validate_can_reindex(indexer)

509

510 if axis >= self.ndim:

511 raise IndexError("Requested axis not found in manager")

512

513 if axis == 1:

514 new_arrays = []

515 for i in indexer:

516 if i == -1:

517 arr = self._make_na_array(

518 fill_value=fill_value, use_na_proxy=use_na_proxy

519 )

520 else:

521 arr = self.arrays[i]

522 if copy:

523 arr = arr.copy()

524 new_arrays.append(arr)

525

526 else:

527 validate_indices(indexer, len(self._axes[0]))

528 indexer = ensure_platform_int(indexer)

529 mask = indexer == -1

530 needs_masking = mask.any()

531 new_arrays = [

532 take_1d(

533 arr,

534 indexer,

535 allow_fill=needs_masking,

536 fill_value=fill_value,

537 mask=mask,

538 # if fill_value is not None else blk.fill_value

539 )

540 for arr in self.arrays

541 ]

542

543 new_axes = list(self._axes)

544 new_axes[axis] = new_axis

545

546 return type(self)(new_arrays, new_axes, verify_integrity=False)

547

548 def take(

549 self,

550 indexer: npt.NDArray[np.intp],

551 axis: AxisInt = 1,

552 verify: bool = True,

553 ) -> Self:

554 """

555 Take items along any axis.

556 """

557 assert isinstance(indexer, np.ndarray), type(indexer)

558 assert indexer.dtype == np.intp, indexer.dtype

559

560 axis = self._normalize_axis(axis)

561

562 if not indexer.ndim == 1:

563 raise ValueError("indexer should be 1-dimensional")

564

565 n = self.shape_proper[axis]

566 indexer = maybe_convert_indices(indexer, n, verify=verify)

567

568 new_labels = self._axes[axis].take(indexer)

569 return self._reindex_indexer(

570 new_axis=new_labels, indexer=indexer, axis=axis, allow_dups=True

571 )

572

573 def _make_na_array(self, fill_value=None, use_na_proxy: bool = False):

574 if use_na_proxy:

575 assert fill_value is None

576 return NullArrayProxy(self.shape_proper[0])

577

578 if fill_value is None:

579 fill_value = np.nan

580

581 dtype, fill_value = infer_dtype_from_scalar(fill_value)

582 array_values = make_na_array(dtype, self.shape_proper[:1], fill_value)

583 return array_values

584

585 def _equal_values(self, other) -> bool:

586 """

587 Used in .equals defined in base class. Only check the column values

588 assuming shape and indexes have already been checked.

589 """

590 for left, right in zip(self.arrays, other.arrays):

591 if not array_equals(left, right):

592 return False

593 return True

594

595 # TODO

596 # to_dict

597

598

599class ArrayManager(BaseArrayManager):

600 @property

601 def ndim(self) -> Literal[2]:

602 return 2

603

604 def __init__(

605 self,

606 arrays: list[np.ndarray | ExtensionArray],

607 axes: list[Index],

608 verify_integrity: bool = True,

609 ) -> None:

610 # Note: we are storing the axes in "_axes" in the (row, columns) order

611 # which contrasts the order how it is stored in BlockManager

612 self._axes = axes

613 self.arrays = arrays

614

615 if verify_integrity:

616 self._axes = [ensure_index(ax) for ax in axes]

617 arrays = [extract_pandas_array(x, None, 1)[0] for x in arrays]

618 self.arrays = [maybe_coerce_values(arr) for arr in arrays]

619 self._verify_integrity()

620

621 def _verify_integrity(self) -> None:

622 n_rows, n_columns = self.shape_proper

623 if not len(self.arrays) == n_columns:

624 raise ValueError(

625 "Number of passed arrays must equal the size of the column Index: "

626 f"{len(self.arrays)} arrays vs {n_columns} columns."

627 )

628 for arr in self.arrays:

629 if not len(arr) == n_rows:

630 raise ValueError(

631 "Passed arrays should have the same length as the rows Index: "

632 f"{len(arr)} vs {n_rows} rows"

633 )

634 if not isinstance(arr, (np.ndarray, ExtensionArray)):

635 raise ValueError(

636 "Passed arrays should be np.ndarray or ExtensionArray instances, "

637 f"got {type(arr)} instead"

638 )

639 if not arr.ndim == 1:

640 raise ValueError(

641 "Passed arrays should be 1-dimensional, got array with "

642 f"{arr.ndim} dimensions instead."

643 )

644

645 # --------------------------------------------------------------------

646 # Indexing

647

648 def fast_xs(self, loc: int) -> SingleArrayManager:

649 """

650 Return the array corresponding to `frame.iloc[loc]`.

651

652 Parameters

653 ----------

654 loc : int

655

656 Returns

657 -------

658 np.ndarray or ExtensionArray

659 """

660 dtype = interleaved_dtype([arr.dtype for arr in self.arrays])

661

662 values = [arr[loc] for arr in self.arrays]

663 if isinstance(dtype, ExtensionDtype):

664 result = dtype.construct_array_type()._from_sequence(values, dtype=dtype)

665 # for datetime64/timedelta64, the np.ndarray constructor cannot handle pd.NaT

666 elif is_datetime64_ns_dtype(dtype):

667 result = DatetimeArray._from_sequence(values, dtype=dtype)._ndarray

668 elif is_timedelta64_ns_dtype(dtype):

669 result = TimedeltaArray._from_sequence(values, dtype=dtype)._ndarray

670 else:

671 result = np.array(values, dtype=dtype)

672 return SingleArrayManager([result], [self._axes[1]])

673

674 def get_slice(self, slobj: slice, axis: AxisInt = 0) -> ArrayManager:

675 axis = self._normalize_axis(axis)

676

677 if axis == 0:

678 arrays = [arr[slobj] for arr in self.arrays]

679 elif axis == 1:

680 arrays = self.arrays[slobj]

681

682 new_axes = list(self._axes)

683 new_axes[axis] = new_axes[axis]._getitem_slice(slobj)

684

685 return type(self)(arrays, new_axes, verify_integrity=False)

686

687 def iget(self, i: int) -> SingleArrayManager:

688 """

689 Return the data as a SingleArrayManager.

690 """

691 values = self.arrays[i]

692 return SingleArrayManager([values], [self._axes[0]])

693

694 def iget_values(self, i: int) -> ArrayLike:

695 """

696 Return the data for column i as the values (ndarray or ExtensionArray).

697 """

698 return self.arrays[i]

699

700 @property

701 def column_arrays(self) -> list[ArrayLike]:

702 """

703 Used in the JSON C code to access column arrays.

704 """

705

706 return [np.asarray(arr) for arr in self.arrays]

707

708 def iset(

709 self,

710 loc: int | slice | np.ndarray,

711 value: ArrayLike,

712 inplace: bool = False,

713 refs=None,

714 ) -> None:

715 """

716 Set new column(s).

717

718 This changes the ArrayManager in-place, but replaces (an) existing

719 column(s), not changing column values in-place).

720

721 Parameters

722 ----------

723 loc : integer, slice or boolean mask

724 Positional location (already bounds checked)

725 value : np.ndarray or ExtensionArray

726 inplace : bool, default False

727 Whether overwrite existing array as opposed to replacing it.

728 """

729 # single column -> single integer index

730 if lib.is_integer(loc):

731 # TODO can we avoid needing to unpack this here? That means converting

732 # DataFrame into 1D array when loc is an integer

733 if isinstance(value, np.ndarray) and value.ndim == 2:

734 assert value.shape[1] == 1

735 value = value[:, 0]

736

737 # TODO we receive a datetime/timedelta64 ndarray from DataFrame._iset_item

738 # but we should avoid that and pass directly the proper array

739 value = maybe_coerce_values(value)

740

741 assert isinstance(value, (np.ndarray, ExtensionArray))

742 assert value.ndim == 1

743 assert len(value) == len(self._axes[0])

744 self.arrays[loc] = value

745 return

746

747 # multiple columns -> convert slice or array to integer indices

748 elif isinstance(loc, slice):

749 indices: range | np.ndarray = range(

750 loc.start if loc.start is not None else 0,

751 loc.stop if loc.stop is not None else self.shape_proper[1],

752 loc.step if loc.step is not None else 1,

753 )

754 else:

755 assert isinstance(loc, np.ndarray)

756 assert loc.dtype == "bool"

757 indices = np.nonzero(loc)[0]

758

759 assert value.ndim == 2

760 assert value.shape[0] == len(self._axes[0])

761

762 for value_idx, mgr_idx in enumerate(indices):

763 # error: No overload variant of "__getitem__" of "ExtensionArray" matches

764 # argument type "Tuple[slice, int]"

765 value_arr = value[:, value_idx] # type: ignore[call-overload]

766 self.arrays[mgr_idx] = value_arr

767 return

768

769 def column_setitem(

770 self, loc: int, idx: int | slice | np.ndarray, value, inplace_only: bool = False

771 ) -> None:

772 """

773 Set values ("setitem") into a single column (not setting the full column).

774

775 This is a method on the ArrayManager level, to avoid creating an

776 intermediate Series at the DataFrame level (`s = df[loc]; s[idx] = value`)

777 """

778 if not is_integer(loc):

779 raise TypeError("The column index should be an integer")

780 arr = self.arrays[loc]

781 mgr = SingleArrayManager([arr], [self._axes[0]])

782 if inplace_only:

783 mgr.setitem_inplace(idx, value)

784 else:

785 new_mgr = mgr.setitem((idx,), value)

786 # update existing ArrayManager in-place

787 self.arrays[loc] = new_mgr.arrays[0]

788

789 def insert(self, loc: int, item: Hashable, value: ArrayLike, refs=None) -> None:

790 """

791 Insert item at selected position.

792

793 Parameters

794 ----------

795 loc : int

796 item : hashable

797 value : np.ndarray or ExtensionArray

798 """

799 # insert to the axis; this could possibly raise a TypeError

800 new_axis = self.items.insert(loc, item)

801

802 value = extract_array(value, extract_numpy=True)

803 if value.ndim == 2:

804 if value.shape[0] == 1:

805 # error: No overload variant of "__getitem__" of "ExtensionArray"

806 # matches argument type "Tuple[int, slice]"

807 value = value[0, :] # type: ignore[call-overload]

808 else:

809 raise ValueError(

810 f"Expected a 1D array, got an array with shape {value.shape}"

811 )

812 value = maybe_coerce_values(value)

813

814 # TODO self.arrays can be empty

815 # assert len(value) == len(self.arrays[0])

816

817 # TODO is this copy needed?

818 arrays = self.arrays.copy()

819 arrays.insert(loc, value)

820

821 self.arrays = arrays

822 self._axes[1] = new_axis

823

824 def idelete(self, indexer) -> ArrayManager:

825 """

826 Delete selected locations in-place (new block and array, same BlockManager)

827 """

828 to_keep = np.ones(self.shape[0], dtype=np.bool_)

829 to_keep[indexer] = False

830

831 self.arrays = [self.arrays[i] for i in np.nonzero(to_keep)[0]]

832 self._axes = [self._axes[0], self._axes[1][to_keep]]

833 return self

834

835 # --------------------------------------------------------------------

836 # Array-wise Operation

837

838 def grouped_reduce(self, func: Callable) -> Self:

839 """

840 Apply grouped reduction function columnwise, returning a new ArrayManager.

841

842 Parameters

843 ----------

844 func : grouped reduction function

845

846 Returns

847 -------

848 ArrayManager

849 """

850 result_arrays: list[np.ndarray] = []

851 result_indices: list[int] = []

852

853 for i, arr in enumerate(self.arrays):

854 # grouped_reduce functions all expect 2D arrays

855 arr = ensure_block_shape(arr, ndim=2)

856 res = func(arr)

857 if res.ndim == 2:

858 # reverse of ensure_block_shape

859 assert res.shape[0] == 1

860 res = res[0]

861

862 result_arrays.append(res)

863 result_indices.append(i)

864

865 if len(result_arrays) == 0:

866 nrows = 0

867 else:

868 nrows = result_arrays[0].shape[0]

869 index = Index(range(nrows))

870

871 columns = self.items

872

873 # error: Argument 1 to "ArrayManager" has incompatible type "List[ndarray]";

874 # expected "List[Union[ndarray, ExtensionArray]]"

875 return type(self)(result_arrays, [index, columns]) # type: ignore[arg-type]

876

877 def reduce(self, func: Callable) -> Self:

878 """

879 Apply reduction function column-wise, returning a single-row ArrayManager.

880

881 Parameters

882 ----------

883 func : reduction function

884

885 Returns

886 -------

887 ArrayManager

888 """

889 result_arrays: list[np.ndarray] = []

890 for i, arr in enumerate(self.arrays):

891 res = func(arr, axis=0)

892

893 # TODO NaT doesn't preserve dtype, so we need to ensure to create

894 # a timedelta result array if original was timedelta

895 # what if datetime results in timedelta? (eg std)

896 dtype = arr.dtype if res is NaT else None

897 result_arrays.append(

898 sanitize_array([res], None, dtype=dtype) # type: ignore[arg-type]

899 )

900

901 index = Index._simple_new(np.array([None], dtype=object)) # placeholder

902 columns = self.items

903

904 # error: Argument 1 to "ArrayManager" has incompatible type "List[ndarray]";

905 # expected "List[Union[ndarray, ExtensionArray]]"

906 new_mgr = type(self)(result_arrays, [index, columns]) # type: ignore[arg-type]

907 return new_mgr

908

909 def operate_blockwise(self, other: ArrayManager, array_op) -> ArrayManager:

910 """

911 Apply array_op blockwise with another (aligned) BlockManager.

912 """

913 # TODO what if `other` is BlockManager ?

914 left_arrays = self.arrays

915 right_arrays = other.arrays

916 result_arrays = [

917 array_op(left, right) for left, right in zip(left_arrays, right_arrays)

918 ]

919 return type(self)(result_arrays, self._axes)

920

921 def quantile(

922 self,

923 *,

924 qs: Index, # with dtype float64

925 transposed: bool = False,

926 interpolation: QuantileInterpolation = "linear",

927 ) -> ArrayManager:

928 arrs = [ensure_block_shape(x, 2) for x in self.arrays]

929 new_arrs = [

930 quantile_compat(x, np.asarray(qs._values), interpolation) for x in arrs

931 ]

932 for i, arr in enumerate(new_arrs):

933 if arr.ndim == 2:

934 assert arr.shape[0] == 1, arr.shape

935 new_arrs[i] = arr[0]

936

937 axes = [qs, self._axes[1]]

938 return type(self)(new_arrs, axes)

939

940 # ----------------------------------------------------------------

941

942 def unstack(self, unstacker, fill_value) -> ArrayManager:

943 """

944 Return a BlockManager with all blocks unstacked.

945

946 Parameters

947 ----------

948 unstacker : reshape._Unstacker

949 fill_value : Any

950 fill_value for newly introduced missing values.

951

952 Returns

953 -------

954 unstacked : BlockManager

955 """

956 indexer, _ = unstacker._indexer_and_to_sort

957 if unstacker.mask.all():

958 new_indexer = indexer

959 allow_fill = False

960 new_mask2D = None

961 needs_masking = None

962 else:

963 new_indexer = np.full(unstacker.mask.shape, -1)

964 new_indexer[unstacker.mask] = indexer

965 allow_fill = True

966 # calculating the full mask once and passing it to take_1d is faster

967 # than letting take_1d calculate it in each repeated call

968 new_mask2D = (~unstacker.mask).reshape(*unstacker.full_shape)

969 needs_masking = new_mask2D.any(axis=0)

970 new_indexer2D = new_indexer.reshape(*unstacker.full_shape)

971 new_indexer2D = ensure_platform_int(new_indexer2D)

972

973 new_arrays = []

974 for arr in self.arrays:

975 for i in range(unstacker.full_shape[1]):

976 if allow_fill:

977 # error: Value of type "Optional[Any]" is not indexable [index]

978 new_arr = take_1d(

979 arr,

980 new_indexer2D[:, i],

981 allow_fill=needs_masking[i], # type: ignore[index]

982 fill_value=fill_value,

983 mask=new_mask2D[:, i], # type: ignore[index]

984 )

985 else:

986 new_arr = take_1d(arr, new_indexer2D[:, i], allow_fill=False)

987 new_arrays.append(new_arr)

988

989 new_index = unstacker.new_index

990 new_columns = unstacker.get_new_columns(self._axes[1])

991 new_axes = [new_index, new_columns]

992

993 return type(self)(new_arrays, new_axes, verify_integrity=False)

994

995 def as_array(

996 self,

997 dtype=None,

998 copy: bool = False,

999 na_value: object = lib.no_default,

1000 ) -> np.ndarray:

1001 """

1002 Convert the blockmanager data into an numpy array.

1003

1004 Parameters

1005 ----------

1006 dtype : object, default None

1007 Data type of the return array.

1008 copy : bool, default False

1009 If True then guarantee that a copy is returned. A value of

1010 False does not guarantee that the underlying data is not

1011 copied.

1012 na_value : object, default lib.no_default

1013 Value to be used as the missing value sentinel.

1014

1015 Returns

1016 -------

1017 arr : ndarray

1018 """

1019 if len(self.arrays) == 0:

1020 empty_arr = np.empty(self.shape, dtype=float)

1021 return empty_arr.transpose()

1022

1023 # We want to copy when na_value is provided to avoid

1024 # mutating the original object

1025 copy = copy or na_value is not lib.no_default

1026

1027 if not dtype:

1028 dtype = interleaved_dtype([arr.dtype for arr in self.arrays])

1029

1030 dtype = ensure_np_dtype(dtype)

1031

1032 result = np.empty(self.shape_proper, dtype=dtype)

1033

1034 for i, arr in enumerate(self.arrays):

1035 arr = arr.astype(dtype, copy=copy)

1036 result[:, i] = arr

1037

1038 if na_value is not lib.no_default:

1039 result[isna(result)] = na_value

1040

1041 return result

1042

1043 @classmethod

1044 def concat_horizontal(cls, mgrs: list[Self], axes: list[Index]) -> Self:

1045 """

1046 Concatenate uniformly-indexed ArrayManagers horizontally.

1047 """

1048 # concatting along the columns -> combine reindexed arrays in a single manager

1049 arrays = list(itertools.chain.from_iterable([mgr.arrays for mgr in mgrs]))

1050 new_mgr = cls(arrays, [axes[1], axes[0]], verify_integrity=False)

1051 return new_mgr

1052

1053 @classmethod

1054 def concat_vertical(cls, mgrs: list[Self], axes: list[Index]) -> Self:

1055 """

1056 Concatenate uniformly-indexed ArrayManagers vertically.

1057 """

1058 # concatting along the rows -> concat the reindexed arrays

1059 # TODO(ArrayManager) doesn't yet preserve the correct dtype

1060 arrays = [

1061 concat_arrays([mgrs[i].arrays[j] for i in range(len(mgrs))])

1062 for j in range(len(mgrs[0].arrays))

1063 ]

1064 new_mgr = cls(arrays, [axes[1], axes[0]], verify_integrity=False)

1065 return new_mgr

1066

1067

1068class SingleArrayManager(BaseArrayManager, SingleDataManager):

1069 __slots__ = [

1070 "_axes", # private attribute, because 'axes' has different order, see below

1071 "arrays",

1072 ]

1073

1074 arrays: list[np.ndarray | ExtensionArray]

1075 _axes: list[Index]

1076

1077 @property

1078 def ndim(self) -> Literal[1]:

1079 return 1

1080

1081 def __init__(

1082 self,

1083 arrays: list[np.ndarray | ExtensionArray],

1084 axes: list[Index],

1085 verify_integrity: bool = True,

1086 ) -> None:

1087 self._axes = axes

1088 self.arrays = arrays

1089

1090 if verify_integrity:

1091 assert len(axes) == 1

1092 assert len(arrays) == 1

1093 self._axes = [ensure_index(ax) for ax in self._axes]

1094 arr = arrays[0]

1095 arr = maybe_coerce_values(arr)

1096 arr = extract_pandas_array(arr, None, 1)[0]

1097 self.arrays = [arr]

1098 self._verify_integrity()

1099

1100 def _verify_integrity(self) -> None:

1101 (n_rows,) = self.shape

1102 assert len(self.arrays) == 1

1103 arr = self.arrays[0]

1104 assert len(arr) == n_rows

1105 if not arr.ndim == 1:

1106 raise ValueError(

1107 "Passed array should be 1-dimensional, got array with "

1108 f"{arr.ndim} dimensions instead."

1109 )

1110

1111 @staticmethod

1112 def _normalize_axis(axis):

1113 return axis

1114

1115 def make_empty(self, axes=None) -> Self:

1116 """Return an empty ArrayManager with index/array of length 0"""

1117 if axes is None:

1118 axes = [Index([], dtype=object)]

1119 array: np.ndarray = np.array([], dtype=self.dtype)

1120 return type(self)([array], axes)

1121

1122 @classmethod

1123 def from_array(cls, array, index) -> SingleArrayManager:

1124 return cls([array], [index])

1125

1126 # error: Cannot override writeable attribute with read-only property

1127 @property

1128 def axes(self) -> list[Index]: # type: ignore[override]

1129 return self._axes

1130

1131 @property

1132 def index(self) -> Index:

1133 return self._axes[0]

1134

1135 @property

1136 def dtype(self):

1137 return self.array.dtype

1138

1139 def external_values(self):

1140 """The array that Series.values returns"""

1141 return external_values(self.array)

1142

1143 def internal_values(self):

1144 """The array that Series._values returns"""

1145 return self.array

1146

1147 def array_values(self):

1148 """The array that Series.array returns"""

1149 arr = self.array

1150 if isinstance(arr, np.ndarray):

1151 arr = NumpyExtensionArray(arr)

1152 return arr

1153

1154 @property

1155 def _can_hold_na(self) -> bool:

1156 if isinstance(self.array, np.ndarray):

1157 return self.array.dtype.kind not in "iub"

1158 else:

1159 # ExtensionArray

1160 return self.array._can_hold_na

1161

1162 @property

1163 def is_single_block(self) -> bool:

1164 return True

1165

1166 def fast_xs(self, loc: int) -> SingleArrayManager:

1167 raise NotImplementedError("Use series._values[loc] instead")

1168

1169 def get_slice(self, slobj: slice, axis: AxisInt = 0) -> SingleArrayManager:

1170 if axis >= self.ndim:

1171 raise IndexError("Requested axis not found in manager")

1172

1173 new_array = self.array[slobj]

1174 new_index = self.index._getitem_slice(slobj)

1175 return type(self)([new_array], [new_index], verify_integrity=False)

1176

1177 def get_rows_with_mask(self, indexer: npt.NDArray[np.bool_]) -> SingleArrayManager:

1178 new_array = self.array[indexer]

1179 new_index = self.index[indexer]

1180 return type(self)([new_array], [new_index])

1181

1182 # error: Signature of "apply" incompatible with supertype "BaseArrayManager"

1183 def apply(self, func, **kwargs) -> Self: # type: ignore[override]

1184 if callable(func):

1185 new_array = func(self.array, **kwargs)

1186 else:

1187 new_array = getattr(self.array, func)(**kwargs)

1188 return type(self)([new_array], self._axes)

1189

1190 def setitem(self, indexer, value, warn: bool = True) -> SingleArrayManager:

1191 """

1192 Set values with indexer.

1193

1194 For SingleArrayManager, this backs s[indexer] = value

1195

1196 See `setitem_inplace` for a version that works inplace and doesn't

1197 return a new Manager.

1198 """

1199 if isinstance(indexer, np.ndarray) and indexer.ndim > self.ndim:

1200 raise ValueError(f"Cannot set values with ndim > {self.ndim}")

1201 return self.apply_with_block("setitem", indexer=indexer, value=value)

1202

1203 def idelete(self, indexer) -> SingleArrayManager:

1204 """

1205 Delete selected locations in-place (new array, same ArrayManager)

1206 """

1207 to_keep = np.ones(self.shape[0], dtype=np.bool_)

1208 to_keep[indexer] = False

1209

1210 self.arrays = [self.arrays[0][to_keep]]

1211 self._axes = [self._axes[0][to_keep]]

1212 return self

1213

1214 def _get_data_subset(self, predicate: Callable) -> SingleArrayManager:

1215 # used in get_numeric_data / get_bool_data

1216 if predicate(self.array):

1217 return type(self)(self.arrays, self._axes, verify_integrity=False)

1218 else:

1219 return self.make_empty()

1220

1221 def set_values(self, values: ArrayLike) -> None:

1222 """

1223 Set (replace) the values of the SingleArrayManager in place.

1224

1225 Use at your own risk! This does not check if the passed values are

1226 valid for the current SingleArrayManager (length, dtype, etc).

1227 """

1228 self.arrays[0] = values

1229

1230 def to_2d_mgr(self, columns: Index) -> ArrayManager:

1231 """

1232 Manager analogue of Series.to_frame

1233 """

1234 arrays = [self.arrays[0]]

1235 axes = [self.axes[0], columns]

1236

1237 return ArrayManager(arrays, axes, verify_integrity=False)

1238

1239

1240class NullArrayProxy:

1241 """

1242 Proxy object for an all-NA array.

1243

1244 Only stores the length of the array, and not the dtype. The dtype

1245 will only be known when actually concatenating (after determining the

1246 common dtype, for which this proxy is ignored).

1247 Using this object avoids that the internals/concat.py needs to determine

1248 the proper dtype and array type.

1249 """

1250

1251 ndim = 1

1252

1253 def __init__(self, n: int) -> None:

1254 self.n = n

1255

1256 @property

1257 def shape(self) -> tuple[int]:

1258 return (self.n,)

1259

1260 def to_array(self, dtype: DtypeObj) -> ArrayLike:

1261 """

1262 Helper function to create the actual all-NA array from the NullArrayProxy

1263 object.

1264

1265 Parameters

1266 ----------

1267 arr : NullArrayProxy

1268 dtype : the dtype for the resulting array

1269

1270 Returns

1271 -------

1272 np.ndarray or ExtensionArray

1273 """

1274 if isinstance(dtype, ExtensionDtype):

1275 empty = dtype.construct_array_type()._from_sequence([], dtype=dtype)

1276 indexer = -np.ones(self.n, dtype=np.intp)

1277 return empty.take(indexer, allow_fill=True)

1278 else:

1279 # when introducing missing values, int becomes float, bool becomes object

1280 dtype = ensure_dtype_can_hold_na(dtype)

1281 fill_value = na_value_for_dtype(dtype)

1282 arr = np.empty(self.n, dtype=dtype)

1283 arr.fill(fill_value)

1284 return ensure_wrapped_if_datetimelike(arr)

1285

1286

1287def concat_arrays(to_concat: list) -> ArrayLike:

1288 """

1289 Alternative for concat_compat but specialized for use in the ArrayManager.

1290

1291 Differences: only deals with 1D arrays (no axis keyword), assumes

1292 ensure_wrapped_if_datetimelike and does not skip empty arrays to determine

1293 the dtype.

1294 In addition ensures that all NullArrayProxies get replaced with actual

1295 arrays.

1296

1297 Parameters

1298 ----------

1299 to_concat : list of arrays

1300

1301 Returns

1302 -------

1303 np.ndarray or ExtensionArray

1304 """

1305 # ignore the all-NA proxies to determine the resulting dtype

1306 to_concat_no_proxy = [x for x in to_concat if not isinstance(x, NullArrayProxy)]

1307

1308 dtypes = {x.dtype for x in to_concat_no_proxy}

1309 single_dtype = len(dtypes) == 1

1310

1311 if single_dtype:

1312 target_dtype = to_concat_no_proxy[0].dtype

1313 elif all(lib.is_np_dtype(x, "iub") for x in dtypes):

1314 # GH#42092

1315 target_dtype = np_find_common_type(*dtypes)

1316 else:

1317 target_dtype = find_common_type([arr.dtype for arr in to_concat_no_proxy])

1318

1319 to_concat = [

1320 arr.to_array(target_dtype)

1321 if isinstance(arr, NullArrayProxy)

1322 else astype_array(arr, target_dtype, copy=False)

1323 for arr in to_concat

1324 ]

1325

1326 if isinstance(to_concat[0], ExtensionArray):

1327 cls = type(to_concat[0])

1328 return cls._concat_same_type(to_concat)

1329

1330 result = np.concatenate(to_concat)

1331

1332 # TODO decide on exact behaviour (we shouldn't do this only for empty result)

1333 # see https://github.com/pandas-dev/pandas/issues/39817

1334 if len(result) == 0:

1335 # all empties -> check for bool to not coerce to float

1336 kinds = {obj.dtype.kind for obj in to_concat_no_proxy}

1337 if len(kinds) != 1:

1338 if "b" in kinds:

1339 result = result.astype(object)

1340 return result

Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/pandas/core/internals/array_manager.py: 23%

571 statements