Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/pandas/core/internals/array_manager.py: 23%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

571 statements  

1""" 

2Experimental manager based on storing a collection of 1D arrays 

3""" 

4from __future__ import annotations 

5 

6import itertools 

7from typing import ( 

8 TYPE_CHECKING, 

9 Callable, 

10 Literal, 

11) 

12 

13import numpy as np 

14 

15from pandas._libs import ( 

16 NaT, 

17 lib, 

18) 

19 

20from pandas.core.dtypes.astype import ( 

21 astype_array, 

22 astype_array_safe, 

23) 

24from pandas.core.dtypes.cast import ( 

25 ensure_dtype_can_hold_na, 

26 find_common_type, 

27 infer_dtype_from_scalar, 

28 np_find_common_type, 

29) 

30from pandas.core.dtypes.common import ( 

31 ensure_platform_int, 

32 is_datetime64_ns_dtype, 

33 is_integer, 

34 is_numeric_dtype, 

35 is_object_dtype, 

36 is_timedelta64_ns_dtype, 

37) 

38from pandas.core.dtypes.dtypes import ExtensionDtype 

39from pandas.core.dtypes.generic import ( 

40 ABCDataFrame, 

41 ABCSeries, 

42) 

43from pandas.core.dtypes.missing import ( 

44 array_equals, 

45 isna, 

46 na_value_for_dtype, 

47) 

48 

49import pandas.core.algorithms as algos 

50from pandas.core.array_algos.quantile import quantile_compat 

51from pandas.core.array_algos.take import take_1d 

52from pandas.core.arrays import ( 

53 DatetimeArray, 

54 ExtensionArray, 

55 NumpyExtensionArray, 

56 TimedeltaArray, 

57) 

58from pandas.core.construction import ( 

59 ensure_wrapped_if_datetimelike, 

60 extract_array, 

61 sanitize_array, 

62) 

63from pandas.core.indexers import ( 

64 maybe_convert_indices, 

65 validate_indices, 

66) 

67from pandas.core.indexes.api import ( 

68 Index, 

69 ensure_index, 

70) 

71from pandas.core.indexes.base import get_values_for_csv 

72from pandas.core.internals.base import ( 

73 DataManager, 

74 SingleDataManager, 

75 ensure_np_dtype, 

76 interleaved_dtype, 

77) 

78from pandas.core.internals.blocks import ( 

79 BlockPlacement, 

80 ensure_block_shape, 

81 external_values, 

82 extract_pandas_array, 

83 maybe_coerce_values, 

84 new_block, 

85) 

86from pandas.core.internals.managers import make_na_array 

87 

88if TYPE_CHECKING: 

89 from collections.abc import Hashable 

90 

91 from pandas._typing import ( 

92 ArrayLike, 

93 AxisInt, 

94 DtypeObj, 

95 QuantileInterpolation, 

96 Self, 

97 npt, 

98 ) 

99 

100 

101class BaseArrayManager(DataManager): 

102 """ 

103 Core internal data structure to implement DataFrame and Series. 

104 

105 Alternative to the BlockManager, storing a list of 1D arrays instead of 

106 Blocks. 

107 

108 This is *not* a public API class 

109 

110 Parameters 

111 ---------- 

112 arrays : Sequence of arrays 

113 axes : Sequence of Index 

114 verify_integrity : bool, default True 

115 

116 """ 

117 

118 __slots__ = [ 

119 "_axes", # private attribute, because 'axes' has different order, see below 

120 "arrays", 

121 ] 

122 

123 arrays: list[np.ndarray | ExtensionArray] 

124 _axes: list[Index] 

125 

126 def __init__( 

127 self, 

128 arrays: list[np.ndarray | ExtensionArray], 

129 axes: list[Index], 

130 verify_integrity: bool = True, 

131 ) -> None: 

132 raise NotImplementedError 

133 

134 def make_empty(self, axes=None) -> Self: 

135 """Return an empty ArrayManager with the items axis of len 0 (no columns)""" 

136 if axes is None: 

137 axes = [self.axes[1:], Index([])] 

138 

139 arrays: list[np.ndarray | ExtensionArray] = [] 

140 return type(self)(arrays, axes) 

141 

142 @property 

143 def items(self) -> Index: 

144 return self._axes[-1] 

145 

146 @property 

147 # error: Signature of "axes" incompatible with supertype "DataManager" 

148 def axes(self) -> list[Index]: # type: ignore[override] 

149 # mypy doesn't work to override attribute with property 

150 # see https://github.com/python/mypy/issues/4125 

151 """Axes is BlockManager-compatible order (columns, rows)""" 

152 return [self._axes[1], self._axes[0]] 

153 

154 @property 

155 def shape_proper(self) -> tuple[int, ...]: 

156 # this returns (n_rows, n_columns) 

157 return tuple(len(ax) for ax in self._axes) 

158 

159 @staticmethod 

160 def _normalize_axis(axis: AxisInt) -> int: 

161 # switch axis 

162 axis = 1 if axis == 0 else 0 

163 return axis 

164 

165 def set_axis(self, axis: AxisInt, new_labels: Index) -> None: 

166 # Caller is responsible for ensuring we have an Index object. 

167 self._validate_set_axis(axis, new_labels) 

168 axis = self._normalize_axis(axis) 

169 self._axes[axis] = new_labels 

170 

171 def get_dtypes(self) -> npt.NDArray[np.object_]: 

172 return np.array([arr.dtype for arr in self.arrays], dtype="object") 

173 

174 def add_references(self, mgr: BaseArrayManager) -> None: 

175 """ 

176 Only implemented on the BlockManager level 

177 """ 

178 return 

179 

180 def __getstate__(self): 

181 return self.arrays, self._axes 

182 

183 def __setstate__(self, state) -> None: 

184 self.arrays = state[0] 

185 self._axes = state[1] 

186 

187 def __repr__(self) -> str: 

188 output = type(self).__name__ 

189 output += f"\nIndex: {self._axes[0]}" 

190 if self.ndim == 2: 

191 output += f"\nColumns: {self._axes[1]}" 

192 output += f"\n{len(self.arrays)} arrays:" 

193 for arr in self.arrays: 

194 output += f"\n{arr.dtype}" 

195 return output 

196 

197 def apply( 

198 self, 

199 f, 

200 align_keys: list[str] | None = None, 

201 **kwargs, 

202 ) -> Self: 

203 """ 

204 Iterate over the arrays, collect and create a new ArrayManager. 

205 

206 Parameters 

207 ---------- 

208 f : str or callable 

209 Name of the Array method to apply. 

210 align_keys: List[str] or None, default None 

211 **kwargs 

212 Keywords to pass to `f` 

213 

214 Returns 

215 ------- 

216 ArrayManager 

217 """ 

218 assert "filter" not in kwargs 

219 

220 align_keys = align_keys or [] 

221 result_arrays: list[ArrayLike] = [] 

222 # fillna: Series/DataFrame is responsible for making sure value is aligned 

223 

224 aligned_args = {k: kwargs[k] for k in align_keys} 

225 

226 if f == "apply": 

227 f = kwargs.pop("func") 

228 

229 for i, arr in enumerate(self.arrays): 

230 if aligned_args: 

231 for k, obj in aligned_args.items(): 

232 if isinstance(obj, (ABCSeries, ABCDataFrame)): 

233 # The caller is responsible for ensuring that 

234 # obj.axes[-1].equals(self.items) 

235 if obj.ndim == 1: 

236 kwargs[k] = obj.iloc[i] 

237 else: 

238 kwargs[k] = obj.iloc[:, i]._values 

239 else: 

240 # otherwise we have an array-like 

241 kwargs[k] = obj[i] 

242 

243 if callable(f): 

244 applied = f(arr, **kwargs) 

245 else: 

246 applied = getattr(arr, f)(**kwargs) 

247 

248 result_arrays.append(applied) 

249 

250 new_axes = self._axes 

251 return type(self)(result_arrays, new_axes) 

252 

253 def apply_with_block(self, f, align_keys=None, **kwargs) -> Self: 

254 # switch axis to follow BlockManager logic 

255 swap_axis = True 

256 if f == "interpolate": 

257 swap_axis = False 

258 if swap_axis and "axis" in kwargs and self.ndim == 2: 

259 kwargs["axis"] = 1 if kwargs["axis"] == 0 else 0 

260 

261 align_keys = align_keys or [] 

262 aligned_args = {k: kwargs[k] for k in align_keys} 

263 

264 result_arrays = [] 

265 

266 for i, arr in enumerate(self.arrays): 

267 if aligned_args: 

268 for k, obj in aligned_args.items(): 

269 if isinstance(obj, (ABCSeries, ABCDataFrame)): 

270 # The caller is responsible for ensuring that 

271 # obj.axes[-1].equals(self.items) 

272 if obj.ndim == 1: 

273 if self.ndim == 2: 

274 kwargs[k] = obj.iloc[slice(i, i + 1)]._values 

275 else: 

276 kwargs[k] = obj.iloc[:]._values 

277 else: 

278 kwargs[k] = obj.iloc[:, [i]]._values 

279 else: 

280 # otherwise we have an ndarray 

281 if obj.ndim == 2: 

282 kwargs[k] = obj[[i]] 

283 

284 if isinstance(arr.dtype, np.dtype) and not isinstance(arr, np.ndarray): 

285 # i.e. TimedeltaArray, DatetimeArray with tz=None. Need to 

286 # convert for the Block constructors. 

287 arr = np.asarray(arr) 

288 

289 arr = maybe_coerce_values(arr) 

290 if self.ndim == 2: 

291 arr = ensure_block_shape(arr, 2) 

292 bp = BlockPlacement(slice(0, 1, 1)) 

293 block = new_block(arr, placement=bp, ndim=2) 

294 else: 

295 bp = BlockPlacement(slice(0, len(self), 1)) 

296 block = new_block(arr, placement=bp, ndim=1) 

297 

298 applied = getattr(block, f)(**kwargs) 

299 if isinstance(applied, list): 

300 applied = applied[0] 

301 arr = applied.values 

302 if self.ndim == 2 and arr.ndim == 2: 

303 # 2D for np.ndarray or DatetimeArray/TimedeltaArray 

304 assert len(arr) == 1 

305 # error: No overload variant of "__getitem__" of "ExtensionArray" 

306 # matches argument type "Tuple[int, slice]" 

307 arr = arr[0, :] # type: ignore[call-overload] 

308 result_arrays.append(arr) 

309 

310 return type(self)(result_arrays, self._axes) 

311 

312 def setitem(self, indexer, value, warn: bool = True) -> Self: 

313 return self.apply_with_block("setitem", indexer=indexer, value=value) 

314 

315 def diff(self, n: int) -> Self: 

316 assert self.ndim == 2 # caller ensures 

317 return self.apply(algos.diff, n=n) 

318 

319 def astype(self, dtype, copy: bool | None = False, errors: str = "raise") -> Self: 

320 if copy is None: 

321 copy = True 

322 

323 return self.apply(astype_array_safe, dtype=dtype, copy=copy, errors=errors) 

324 

325 def convert(self, copy: bool | None) -> Self: 

326 if copy is None: 

327 copy = True 

328 

329 def _convert(arr): 

330 if is_object_dtype(arr.dtype): 

331 # extract NumpyExtensionArray for tests that patch 

332 # NumpyExtensionArray._typ 

333 arr = np.asarray(arr) 

334 result = lib.maybe_convert_objects( 

335 arr, 

336 convert_non_numeric=True, 

337 ) 

338 if result is arr and copy: 

339 return arr.copy() 

340 return result 

341 else: 

342 return arr.copy() if copy else arr 

343 

344 return self.apply(_convert) 

345 

346 def get_values_for_csv( 

347 self, *, float_format, date_format, decimal, na_rep: str = "nan", quoting=None 

348 ) -> Self: 

349 return self.apply( 

350 get_values_for_csv, 

351 na_rep=na_rep, 

352 quoting=quoting, 

353 float_format=float_format, 

354 date_format=date_format, 

355 decimal=decimal, 

356 ) 

357 

358 @property 

359 def any_extension_types(self) -> bool: 

360 """Whether any of the blocks in this manager are extension blocks""" 

361 return False # any(block.is_extension for block in self.blocks) 

362 

363 @property 

364 def is_view(self) -> bool: 

365 """return a boolean if we are a single block and are a view""" 

366 # TODO what is this used for? 

367 return False 

368 

369 @property 

370 def is_single_block(self) -> bool: 

371 return len(self.arrays) == 1 

372 

373 def _get_data_subset(self, predicate: Callable) -> Self: 

374 indices = [i for i, arr in enumerate(self.arrays) if predicate(arr)] 

375 arrays = [self.arrays[i] for i in indices] 

376 # TODO copy? 

377 # Note: using Index.take ensures we can retain e.g. DatetimeIndex.freq, 

378 # see test_describe_datetime_columns 

379 taker = np.array(indices, dtype="intp") 

380 new_cols = self._axes[1].take(taker) 

381 new_axes = [self._axes[0], new_cols] 

382 return type(self)(arrays, new_axes, verify_integrity=False) 

383 

384 def get_bool_data(self, copy: bool = False) -> Self: 

385 """ 

386 Select columns that are bool-dtype and object-dtype columns that are all-bool. 

387 

388 Parameters 

389 ---------- 

390 copy : bool, default False 

391 Whether to copy the blocks 

392 """ 

393 return self._get_data_subset(lambda x: x.dtype == np.dtype(bool)) 

394 

395 def get_numeric_data(self, copy: bool = False) -> Self: 

396 """ 

397 Select columns that have a numeric dtype. 

398 

399 Parameters 

400 ---------- 

401 copy : bool, default False 

402 Whether to copy the blocks 

403 """ 

404 return self._get_data_subset( 

405 lambda arr: is_numeric_dtype(arr.dtype) 

406 or getattr(arr.dtype, "_is_numeric", False) 

407 ) 

408 

409 def copy(self, deep: bool | Literal["all"] | None = True) -> Self: 

410 """ 

411 Make deep or shallow copy of ArrayManager 

412 

413 Parameters 

414 ---------- 

415 deep : bool or string, default True 

416 If False, return shallow copy (do not copy data) 

417 If 'all', copy data and a deep copy of the index 

418 

419 Returns 

420 ------- 

421 BlockManager 

422 """ 

423 if deep is None: 

424 # ArrayManager does not yet support CoW, so deep=None always means 

425 # deep=True for now 

426 deep = True 

427 

428 # this preserves the notion of view copying of axes 

429 if deep: 

430 # hit in e.g. tests.io.json.test_pandas 

431 

432 def copy_func(ax): 

433 return ax.copy(deep=True) if deep == "all" else ax.view() 

434 

435 new_axes = [copy_func(ax) for ax in self._axes] 

436 else: 

437 new_axes = list(self._axes) 

438 

439 if deep: 

440 new_arrays = [arr.copy() for arr in self.arrays] 

441 else: 

442 new_arrays = list(self.arrays) 

443 return type(self)(new_arrays, new_axes, verify_integrity=False) 

444 

445 def reindex_indexer( 

446 self, 

447 new_axis, 

448 indexer, 

449 axis: AxisInt, 

450 fill_value=None, 

451 allow_dups: bool = False, 

452 copy: bool | None = True, 

453 # ignored keywords 

454 only_slice: bool = False, 

455 # ArrayManager specific keywords 

456 use_na_proxy: bool = False, 

457 ) -> Self: 

458 axis = self._normalize_axis(axis) 

459 return self._reindex_indexer( 

460 new_axis, 

461 indexer, 

462 axis, 

463 fill_value, 

464 allow_dups, 

465 copy, 

466 use_na_proxy, 

467 ) 

468 

469 def _reindex_indexer( 

470 self, 

471 new_axis, 

472 indexer: npt.NDArray[np.intp] | None, 

473 axis: AxisInt, 

474 fill_value=None, 

475 allow_dups: bool = False, 

476 copy: bool | None = True, 

477 use_na_proxy: bool = False, 

478 ) -> Self: 

479 """ 

480 Parameters 

481 ---------- 

482 new_axis : Index 

483 indexer : ndarray[intp] or None 

484 axis : int 

485 fill_value : object, default None 

486 allow_dups : bool, default False 

487 copy : bool, default True 

488 

489 

490 pandas-indexer with -1's only. 

491 """ 

492 if copy is None: 

493 # ArrayManager does not yet support CoW, so deep=None always means 

494 # deep=True for now 

495 copy = True 

496 

497 if indexer is None: 

498 if new_axis is self._axes[axis] and not copy: 

499 return self 

500 

501 result = self.copy(deep=copy) 

502 result._axes = list(self._axes) 

503 result._axes[axis] = new_axis 

504 return result 

505 

506 # some axes don't allow reindexing with dups 

507 if not allow_dups: 

508 self._axes[axis]._validate_can_reindex(indexer) 

509 

510 if axis >= self.ndim: 

511 raise IndexError("Requested axis not found in manager") 

512 

513 if axis == 1: 

514 new_arrays = [] 

515 for i in indexer: 

516 if i == -1: 

517 arr = self._make_na_array( 

518 fill_value=fill_value, use_na_proxy=use_na_proxy 

519 ) 

520 else: 

521 arr = self.arrays[i] 

522 if copy: 

523 arr = arr.copy() 

524 new_arrays.append(arr) 

525 

526 else: 

527 validate_indices(indexer, len(self._axes[0])) 

528 indexer = ensure_platform_int(indexer) 

529 mask = indexer == -1 

530 needs_masking = mask.any() 

531 new_arrays = [ 

532 take_1d( 

533 arr, 

534 indexer, 

535 allow_fill=needs_masking, 

536 fill_value=fill_value, 

537 mask=mask, 

538 # if fill_value is not None else blk.fill_value 

539 ) 

540 for arr in self.arrays 

541 ] 

542 

543 new_axes = list(self._axes) 

544 new_axes[axis] = new_axis 

545 

546 return type(self)(new_arrays, new_axes, verify_integrity=False) 

547 

548 def take( 

549 self, 

550 indexer: npt.NDArray[np.intp], 

551 axis: AxisInt = 1, 

552 verify: bool = True, 

553 ) -> Self: 

554 """ 

555 Take items along any axis. 

556 """ 

557 assert isinstance(indexer, np.ndarray), type(indexer) 

558 assert indexer.dtype == np.intp, indexer.dtype 

559 

560 axis = self._normalize_axis(axis) 

561 

562 if not indexer.ndim == 1: 

563 raise ValueError("indexer should be 1-dimensional") 

564 

565 n = self.shape_proper[axis] 

566 indexer = maybe_convert_indices(indexer, n, verify=verify) 

567 

568 new_labels = self._axes[axis].take(indexer) 

569 return self._reindex_indexer( 

570 new_axis=new_labels, indexer=indexer, axis=axis, allow_dups=True 

571 ) 

572 

573 def _make_na_array(self, fill_value=None, use_na_proxy: bool = False): 

574 if use_na_proxy: 

575 assert fill_value is None 

576 return NullArrayProxy(self.shape_proper[0]) 

577 

578 if fill_value is None: 

579 fill_value = np.nan 

580 

581 dtype, fill_value = infer_dtype_from_scalar(fill_value) 

582 array_values = make_na_array(dtype, self.shape_proper[:1], fill_value) 

583 return array_values 

584 

585 def _equal_values(self, other) -> bool: 

586 """ 

587 Used in .equals defined in base class. Only check the column values 

588 assuming shape and indexes have already been checked. 

589 """ 

590 for left, right in zip(self.arrays, other.arrays): 

591 if not array_equals(left, right): 

592 return False 

593 return True 

594 

595 # TODO 

596 # to_dict 

597 

598 

599class ArrayManager(BaseArrayManager): 

600 @property 

601 def ndim(self) -> Literal[2]: 

602 return 2 

603 

604 def __init__( 

605 self, 

606 arrays: list[np.ndarray | ExtensionArray], 

607 axes: list[Index], 

608 verify_integrity: bool = True, 

609 ) -> None: 

610 # Note: we are storing the axes in "_axes" in the (row, columns) order 

611 # which contrasts the order how it is stored in BlockManager 

612 self._axes = axes 

613 self.arrays = arrays 

614 

615 if verify_integrity: 

616 self._axes = [ensure_index(ax) for ax in axes] 

617 arrays = [extract_pandas_array(x, None, 1)[0] for x in arrays] 

618 self.arrays = [maybe_coerce_values(arr) for arr in arrays] 

619 self._verify_integrity() 

620 

621 def _verify_integrity(self) -> None: 

622 n_rows, n_columns = self.shape_proper 

623 if not len(self.arrays) == n_columns: 

624 raise ValueError( 

625 "Number of passed arrays must equal the size of the column Index: " 

626 f"{len(self.arrays)} arrays vs {n_columns} columns." 

627 ) 

628 for arr in self.arrays: 

629 if not len(arr) == n_rows: 

630 raise ValueError( 

631 "Passed arrays should have the same length as the rows Index: " 

632 f"{len(arr)} vs {n_rows} rows" 

633 ) 

634 if not isinstance(arr, (np.ndarray, ExtensionArray)): 

635 raise ValueError( 

636 "Passed arrays should be np.ndarray or ExtensionArray instances, " 

637 f"got {type(arr)} instead" 

638 ) 

639 if not arr.ndim == 1: 

640 raise ValueError( 

641 "Passed arrays should be 1-dimensional, got array with " 

642 f"{arr.ndim} dimensions instead." 

643 ) 

644 

645 # -------------------------------------------------------------------- 

646 # Indexing 

647 

648 def fast_xs(self, loc: int) -> SingleArrayManager: 

649 """ 

650 Return the array corresponding to `frame.iloc[loc]`. 

651 

652 Parameters 

653 ---------- 

654 loc : int 

655 

656 Returns 

657 ------- 

658 np.ndarray or ExtensionArray 

659 """ 

660 dtype = interleaved_dtype([arr.dtype for arr in self.arrays]) 

661 

662 values = [arr[loc] for arr in self.arrays] 

663 if isinstance(dtype, ExtensionDtype): 

664 result = dtype.construct_array_type()._from_sequence(values, dtype=dtype) 

665 # for datetime64/timedelta64, the np.ndarray constructor cannot handle pd.NaT 

666 elif is_datetime64_ns_dtype(dtype): 

667 result = DatetimeArray._from_sequence(values, dtype=dtype)._ndarray 

668 elif is_timedelta64_ns_dtype(dtype): 

669 result = TimedeltaArray._from_sequence(values, dtype=dtype)._ndarray 

670 else: 

671 result = np.array(values, dtype=dtype) 

672 return SingleArrayManager([result], [self._axes[1]]) 

673 

674 def get_slice(self, slobj: slice, axis: AxisInt = 0) -> ArrayManager: 

675 axis = self._normalize_axis(axis) 

676 

677 if axis == 0: 

678 arrays = [arr[slobj] for arr in self.arrays] 

679 elif axis == 1: 

680 arrays = self.arrays[slobj] 

681 

682 new_axes = list(self._axes) 

683 new_axes[axis] = new_axes[axis]._getitem_slice(slobj) 

684 

685 return type(self)(arrays, new_axes, verify_integrity=False) 

686 

687 def iget(self, i: int) -> SingleArrayManager: 

688 """ 

689 Return the data as a SingleArrayManager. 

690 """ 

691 values = self.arrays[i] 

692 return SingleArrayManager([values], [self._axes[0]]) 

693 

694 def iget_values(self, i: int) -> ArrayLike: 

695 """ 

696 Return the data for column i as the values (ndarray or ExtensionArray). 

697 """ 

698 return self.arrays[i] 

699 

700 @property 

701 def column_arrays(self) -> list[ArrayLike]: 

702 """ 

703 Used in the JSON C code to access column arrays. 

704 """ 

705 

706 return [np.asarray(arr) for arr in self.arrays] 

707 

708 def iset( 

709 self, 

710 loc: int | slice | np.ndarray, 

711 value: ArrayLike, 

712 inplace: bool = False, 

713 refs=None, 

714 ) -> None: 

715 """ 

716 Set new column(s). 

717 

718 This changes the ArrayManager in-place, but replaces (an) existing 

719 column(s), not changing column values in-place). 

720 

721 Parameters 

722 ---------- 

723 loc : integer, slice or boolean mask 

724 Positional location (already bounds checked) 

725 value : np.ndarray or ExtensionArray 

726 inplace : bool, default False 

727 Whether overwrite existing array as opposed to replacing it. 

728 """ 

729 # single column -> single integer index 

730 if lib.is_integer(loc): 

731 # TODO can we avoid needing to unpack this here? That means converting 

732 # DataFrame into 1D array when loc is an integer 

733 if isinstance(value, np.ndarray) and value.ndim == 2: 

734 assert value.shape[1] == 1 

735 value = value[:, 0] 

736 

737 # TODO we receive a datetime/timedelta64 ndarray from DataFrame._iset_item 

738 # but we should avoid that and pass directly the proper array 

739 value = maybe_coerce_values(value) 

740 

741 assert isinstance(value, (np.ndarray, ExtensionArray)) 

742 assert value.ndim == 1 

743 assert len(value) == len(self._axes[0]) 

744 self.arrays[loc] = value 

745 return 

746 

747 # multiple columns -> convert slice or array to integer indices 

748 elif isinstance(loc, slice): 

749 indices: range | np.ndarray = range( 

750 loc.start if loc.start is not None else 0, 

751 loc.stop if loc.stop is not None else self.shape_proper[1], 

752 loc.step if loc.step is not None else 1, 

753 ) 

754 else: 

755 assert isinstance(loc, np.ndarray) 

756 assert loc.dtype == "bool" 

757 indices = np.nonzero(loc)[0] 

758 

759 assert value.ndim == 2 

760 assert value.shape[0] == len(self._axes[0]) 

761 

762 for value_idx, mgr_idx in enumerate(indices): 

763 # error: No overload variant of "__getitem__" of "ExtensionArray" matches 

764 # argument type "Tuple[slice, int]" 

765 value_arr = value[:, value_idx] # type: ignore[call-overload] 

766 self.arrays[mgr_idx] = value_arr 

767 return 

768 

769 def column_setitem( 

770 self, loc: int, idx: int | slice | np.ndarray, value, inplace_only: bool = False 

771 ) -> None: 

772 """ 

773 Set values ("setitem") into a single column (not setting the full column). 

774 

775 This is a method on the ArrayManager level, to avoid creating an 

776 intermediate Series at the DataFrame level (`s = df[loc]; s[idx] = value`) 

777 """ 

778 if not is_integer(loc): 

779 raise TypeError("The column index should be an integer") 

780 arr = self.arrays[loc] 

781 mgr = SingleArrayManager([arr], [self._axes[0]]) 

782 if inplace_only: 

783 mgr.setitem_inplace(idx, value) 

784 else: 

785 new_mgr = mgr.setitem((idx,), value) 

786 # update existing ArrayManager in-place 

787 self.arrays[loc] = new_mgr.arrays[0] 

788 

789 def insert(self, loc: int, item: Hashable, value: ArrayLike, refs=None) -> None: 

790 """ 

791 Insert item at selected position. 

792 

793 Parameters 

794 ---------- 

795 loc : int 

796 item : hashable 

797 value : np.ndarray or ExtensionArray 

798 """ 

799 # insert to the axis; this could possibly raise a TypeError 

800 new_axis = self.items.insert(loc, item) 

801 

802 value = extract_array(value, extract_numpy=True) 

803 if value.ndim == 2: 

804 if value.shape[0] == 1: 

805 # error: No overload variant of "__getitem__" of "ExtensionArray" 

806 # matches argument type "Tuple[int, slice]" 

807 value = value[0, :] # type: ignore[call-overload] 

808 else: 

809 raise ValueError( 

810 f"Expected a 1D array, got an array with shape {value.shape}" 

811 ) 

812 value = maybe_coerce_values(value) 

813 

814 # TODO self.arrays can be empty 

815 # assert len(value) == len(self.arrays[0]) 

816 

817 # TODO is this copy needed? 

818 arrays = self.arrays.copy() 

819 arrays.insert(loc, value) 

820 

821 self.arrays = arrays 

822 self._axes[1] = new_axis 

823 

824 def idelete(self, indexer) -> ArrayManager: 

825 """ 

826 Delete selected locations in-place (new block and array, same BlockManager) 

827 """ 

828 to_keep = np.ones(self.shape[0], dtype=np.bool_) 

829 to_keep[indexer] = False 

830 

831 self.arrays = [self.arrays[i] for i in np.nonzero(to_keep)[0]] 

832 self._axes = [self._axes[0], self._axes[1][to_keep]] 

833 return self 

834 

835 # -------------------------------------------------------------------- 

836 # Array-wise Operation 

837 

838 def grouped_reduce(self, func: Callable) -> Self: 

839 """ 

840 Apply grouped reduction function columnwise, returning a new ArrayManager. 

841 

842 Parameters 

843 ---------- 

844 func : grouped reduction function 

845 

846 Returns 

847 ------- 

848 ArrayManager 

849 """ 

850 result_arrays: list[np.ndarray] = [] 

851 result_indices: list[int] = [] 

852 

853 for i, arr in enumerate(self.arrays): 

854 # grouped_reduce functions all expect 2D arrays 

855 arr = ensure_block_shape(arr, ndim=2) 

856 res = func(arr) 

857 if res.ndim == 2: 

858 # reverse of ensure_block_shape 

859 assert res.shape[0] == 1 

860 res = res[0] 

861 

862 result_arrays.append(res) 

863 result_indices.append(i) 

864 

865 if len(result_arrays) == 0: 

866 nrows = 0 

867 else: 

868 nrows = result_arrays[0].shape[0] 

869 index = Index(range(nrows)) 

870 

871 columns = self.items 

872 

873 # error: Argument 1 to "ArrayManager" has incompatible type "List[ndarray]"; 

874 # expected "List[Union[ndarray, ExtensionArray]]" 

875 return type(self)(result_arrays, [index, columns]) # type: ignore[arg-type] 

876 

877 def reduce(self, func: Callable) -> Self: 

878 """ 

879 Apply reduction function column-wise, returning a single-row ArrayManager. 

880 

881 Parameters 

882 ---------- 

883 func : reduction function 

884 

885 Returns 

886 ------- 

887 ArrayManager 

888 """ 

889 result_arrays: list[np.ndarray] = [] 

890 for i, arr in enumerate(self.arrays): 

891 res = func(arr, axis=0) 

892 

893 # TODO NaT doesn't preserve dtype, so we need to ensure to create 

894 # a timedelta result array if original was timedelta 

895 # what if datetime results in timedelta? (eg std) 

896 dtype = arr.dtype if res is NaT else None 

897 result_arrays.append( 

898 sanitize_array([res], None, dtype=dtype) # type: ignore[arg-type] 

899 ) 

900 

901 index = Index._simple_new(np.array([None], dtype=object)) # placeholder 

902 columns = self.items 

903 

904 # error: Argument 1 to "ArrayManager" has incompatible type "List[ndarray]"; 

905 # expected "List[Union[ndarray, ExtensionArray]]" 

906 new_mgr = type(self)(result_arrays, [index, columns]) # type: ignore[arg-type] 

907 return new_mgr 

908 

909 def operate_blockwise(self, other: ArrayManager, array_op) -> ArrayManager: 

910 """ 

911 Apply array_op blockwise with another (aligned) BlockManager. 

912 """ 

913 # TODO what if `other` is BlockManager ? 

914 left_arrays = self.arrays 

915 right_arrays = other.arrays 

916 result_arrays = [ 

917 array_op(left, right) for left, right in zip(left_arrays, right_arrays) 

918 ] 

919 return type(self)(result_arrays, self._axes) 

920 

921 def quantile( 

922 self, 

923 *, 

924 qs: Index, # with dtype float64 

925 transposed: bool = False, 

926 interpolation: QuantileInterpolation = "linear", 

927 ) -> ArrayManager: 

928 arrs = [ensure_block_shape(x, 2) for x in self.arrays] 

929 new_arrs = [ 

930 quantile_compat(x, np.asarray(qs._values), interpolation) for x in arrs 

931 ] 

932 for i, arr in enumerate(new_arrs): 

933 if arr.ndim == 2: 

934 assert arr.shape[0] == 1, arr.shape 

935 new_arrs[i] = arr[0] 

936 

937 axes = [qs, self._axes[1]] 

938 return type(self)(new_arrs, axes) 

939 

940 # ---------------------------------------------------------------- 

941 

942 def unstack(self, unstacker, fill_value) -> ArrayManager: 

943 """ 

944 Return a BlockManager with all blocks unstacked. 

945 

946 Parameters 

947 ---------- 

948 unstacker : reshape._Unstacker 

949 fill_value : Any 

950 fill_value for newly introduced missing values. 

951 

952 Returns 

953 ------- 

954 unstacked : BlockManager 

955 """ 

956 indexer, _ = unstacker._indexer_and_to_sort 

957 if unstacker.mask.all(): 

958 new_indexer = indexer 

959 allow_fill = False 

960 new_mask2D = None 

961 needs_masking = None 

962 else: 

963 new_indexer = np.full(unstacker.mask.shape, -1) 

964 new_indexer[unstacker.mask] = indexer 

965 allow_fill = True 

966 # calculating the full mask once and passing it to take_1d is faster 

967 # than letting take_1d calculate it in each repeated call 

968 new_mask2D = (~unstacker.mask).reshape(*unstacker.full_shape) 

969 needs_masking = new_mask2D.any(axis=0) 

970 new_indexer2D = new_indexer.reshape(*unstacker.full_shape) 

971 new_indexer2D = ensure_platform_int(new_indexer2D) 

972 

973 new_arrays = [] 

974 for arr in self.arrays: 

975 for i in range(unstacker.full_shape[1]): 

976 if allow_fill: 

977 # error: Value of type "Optional[Any]" is not indexable [index] 

978 new_arr = take_1d( 

979 arr, 

980 new_indexer2D[:, i], 

981 allow_fill=needs_masking[i], # type: ignore[index] 

982 fill_value=fill_value, 

983 mask=new_mask2D[:, i], # type: ignore[index] 

984 ) 

985 else: 

986 new_arr = take_1d(arr, new_indexer2D[:, i], allow_fill=False) 

987 new_arrays.append(new_arr) 

988 

989 new_index = unstacker.new_index 

990 new_columns = unstacker.get_new_columns(self._axes[1]) 

991 new_axes = [new_index, new_columns] 

992 

993 return type(self)(new_arrays, new_axes, verify_integrity=False) 

994 

995 def as_array( 

996 self, 

997 dtype=None, 

998 copy: bool = False, 

999 na_value: object = lib.no_default, 

1000 ) -> np.ndarray: 

1001 """ 

1002 Convert the blockmanager data into an numpy array. 

1003 

1004 Parameters 

1005 ---------- 

1006 dtype : object, default None 

1007 Data type of the return array. 

1008 copy : bool, default False 

1009 If True then guarantee that a copy is returned. A value of 

1010 False does not guarantee that the underlying data is not 

1011 copied. 

1012 na_value : object, default lib.no_default 

1013 Value to be used as the missing value sentinel. 

1014 

1015 Returns 

1016 ------- 

1017 arr : ndarray 

1018 """ 

1019 if len(self.arrays) == 0: 

1020 empty_arr = np.empty(self.shape, dtype=float) 

1021 return empty_arr.transpose() 

1022 

1023 # We want to copy when na_value is provided to avoid 

1024 # mutating the original object 

1025 copy = copy or na_value is not lib.no_default 

1026 

1027 if not dtype: 

1028 dtype = interleaved_dtype([arr.dtype for arr in self.arrays]) 

1029 

1030 dtype = ensure_np_dtype(dtype) 

1031 

1032 result = np.empty(self.shape_proper, dtype=dtype) 

1033 

1034 for i, arr in enumerate(self.arrays): 

1035 arr = arr.astype(dtype, copy=copy) 

1036 result[:, i] = arr 

1037 

1038 if na_value is not lib.no_default: 

1039 result[isna(result)] = na_value 

1040 

1041 return result 

1042 

1043 @classmethod 

1044 def concat_horizontal(cls, mgrs: list[Self], axes: list[Index]) -> Self: 

1045 """ 

1046 Concatenate uniformly-indexed ArrayManagers horizontally. 

1047 """ 

1048 # concatting along the columns -> combine reindexed arrays in a single manager 

1049 arrays = list(itertools.chain.from_iterable([mgr.arrays for mgr in mgrs])) 

1050 new_mgr = cls(arrays, [axes[1], axes[0]], verify_integrity=False) 

1051 return new_mgr 

1052 

1053 @classmethod 

1054 def concat_vertical(cls, mgrs: list[Self], axes: list[Index]) -> Self: 

1055 """ 

1056 Concatenate uniformly-indexed ArrayManagers vertically. 

1057 """ 

1058 # concatting along the rows -> concat the reindexed arrays 

1059 # TODO(ArrayManager) doesn't yet preserve the correct dtype 

1060 arrays = [ 

1061 concat_arrays([mgrs[i].arrays[j] for i in range(len(mgrs))]) 

1062 for j in range(len(mgrs[0].arrays)) 

1063 ] 

1064 new_mgr = cls(arrays, [axes[1], axes[0]], verify_integrity=False) 

1065 return new_mgr 

1066 

1067 

1068class SingleArrayManager(BaseArrayManager, SingleDataManager): 

1069 __slots__ = [ 

1070 "_axes", # private attribute, because 'axes' has different order, see below 

1071 "arrays", 

1072 ] 

1073 

1074 arrays: list[np.ndarray | ExtensionArray] 

1075 _axes: list[Index] 

1076 

1077 @property 

1078 def ndim(self) -> Literal[1]: 

1079 return 1 

1080 

1081 def __init__( 

1082 self, 

1083 arrays: list[np.ndarray | ExtensionArray], 

1084 axes: list[Index], 

1085 verify_integrity: bool = True, 

1086 ) -> None: 

1087 self._axes = axes 

1088 self.arrays = arrays 

1089 

1090 if verify_integrity: 

1091 assert len(axes) == 1 

1092 assert len(arrays) == 1 

1093 self._axes = [ensure_index(ax) for ax in self._axes] 

1094 arr = arrays[0] 

1095 arr = maybe_coerce_values(arr) 

1096 arr = extract_pandas_array(arr, None, 1)[0] 

1097 self.arrays = [arr] 

1098 self._verify_integrity() 

1099 

1100 def _verify_integrity(self) -> None: 

1101 (n_rows,) = self.shape 

1102 assert len(self.arrays) == 1 

1103 arr = self.arrays[0] 

1104 assert len(arr) == n_rows 

1105 if not arr.ndim == 1: 

1106 raise ValueError( 

1107 "Passed array should be 1-dimensional, got array with " 

1108 f"{arr.ndim} dimensions instead." 

1109 ) 

1110 

1111 @staticmethod 

1112 def _normalize_axis(axis): 

1113 return axis 

1114 

1115 def make_empty(self, axes=None) -> Self: 

1116 """Return an empty ArrayManager with index/array of length 0""" 

1117 if axes is None: 

1118 axes = [Index([], dtype=object)] 

1119 array: np.ndarray = np.array([], dtype=self.dtype) 

1120 return type(self)([array], axes) 

1121 

1122 @classmethod 

1123 def from_array(cls, array, index) -> SingleArrayManager: 

1124 return cls([array], [index]) 

1125 

1126 # error: Cannot override writeable attribute with read-only property 

1127 @property 

1128 def axes(self) -> list[Index]: # type: ignore[override] 

1129 return self._axes 

1130 

1131 @property 

1132 def index(self) -> Index: 

1133 return self._axes[0] 

1134 

1135 @property 

1136 def dtype(self): 

1137 return self.array.dtype 

1138 

1139 def external_values(self): 

1140 """The array that Series.values returns""" 

1141 return external_values(self.array) 

1142 

1143 def internal_values(self): 

1144 """The array that Series._values returns""" 

1145 return self.array 

1146 

1147 def array_values(self): 

1148 """The array that Series.array returns""" 

1149 arr = self.array 

1150 if isinstance(arr, np.ndarray): 

1151 arr = NumpyExtensionArray(arr) 

1152 return arr 

1153 

1154 @property 

1155 def _can_hold_na(self) -> bool: 

1156 if isinstance(self.array, np.ndarray): 

1157 return self.array.dtype.kind not in "iub" 

1158 else: 

1159 # ExtensionArray 

1160 return self.array._can_hold_na 

1161 

1162 @property 

1163 def is_single_block(self) -> bool: 

1164 return True 

1165 

1166 def fast_xs(self, loc: int) -> SingleArrayManager: 

1167 raise NotImplementedError("Use series._values[loc] instead") 

1168 

1169 def get_slice(self, slobj: slice, axis: AxisInt = 0) -> SingleArrayManager: 

1170 if axis >= self.ndim: 

1171 raise IndexError("Requested axis not found in manager") 

1172 

1173 new_array = self.array[slobj] 

1174 new_index = self.index._getitem_slice(slobj) 

1175 return type(self)([new_array], [new_index], verify_integrity=False) 

1176 

1177 def get_rows_with_mask(self, indexer: npt.NDArray[np.bool_]) -> SingleArrayManager: 

1178 new_array = self.array[indexer] 

1179 new_index = self.index[indexer] 

1180 return type(self)([new_array], [new_index]) 

1181 

1182 # error: Signature of "apply" incompatible with supertype "BaseArrayManager" 

1183 def apply(self, func, **kwargs) -> Self: # type: ignore[override] 

1184 if callable(func): 

1185 new_array = func(self.array, **kwargs) 

1186 else: 

1187 new_array = getattr(self.array, func)(**kwargs) 

1188 return type(self)([new_array], self._axes) 

1189 

1190 def setitem(self, indexer, value, warn: bool = True) -> SingleArrayManager: 

1191 """ 

1192 Set values with indexer. 

1193 

1194 For SingleArrayManager, this backs s[indexer] = value 

1195 

1196 See `setitem_inplace` for a version that works inplace and doesn't 

1197 return a new Manager. 

1198 """ 

1199 if isinstance(indexer, np.ndarray) and indexer.ndim > self.ndim: 

1200 raise ValueError(f"Cannot set values with ndim > {self.ndim}") 

1201 return self.apply_with_block("setitem", indexer=indexer, value=value) 

1202 

1203 def idelete(self, indexer) -> SingleArrayManager: 

1204 """ 

1205 Delete selected locations in-place (new array, same ArrayManager) 

1206 """ 

1207 to_keep = np.ones(self.shape[0], dtype=np.bool_) 

1208 to_keep[indexer] = False 

1209 

1210 self.arrays = [self.arrays[0][to_keep]] 

1211 self._axes = [self._axes[0][to_keep]] 

1212 return self 

1213 

1214 def _get_data_subset(self, predicate: Callable) -> SingleArrayManager: 

1215 # used in get_numeric_data / get_bool_data 

1216 if predicate(self.array): 

1217 return type(self)(self.arrays, self._axes, verify_integrity=False) 

1218 else: 

1219 return self.make_empty() 

1220 

1221 def set_values(self, values: ArrayLike) -> None: 

1222 """ 

1223 Set (replace) the values of the SingleArrayManager in place. 

1224 

1225 Use at your own risk! This does not check if the passed values are 

1226 valid for the current SingleArrayManager (length, dtype, etc). 

1227 """ 

1228 self.arrays[0] = values 

1229 

1230 def to_2d_mgr(self, columns: Index) -> ArrayManager: 

1231 """ 

1232 Manager analogue of Series.to_frame 

1233 """ 

1234 arrays = [self.arrays[0]] 

1235 axes = [self.axes[0], columns] 

1236 

1237 return ArrayManager(arrays, axes, verify_integrity=False) 

1238 

1239 

1240class NullArrayProxy: 

1241 """ 

1242 Proxy object for an all-NA array. 

1243 

1244 Only stores the length of the array, and not the dtype. The dtype 

1245 will only be known when actually concatenating (after determining the 

1246 common dtype, for which this proxy is ignored). 

1247 Using this object avoids that the internals/concat.py needs to determine 

1248 the proper dtype and array type. 

1249 """ 

1250 

1251 ndim = 1 

1252 

1253 def __init__(self, n: int) -> None: 

1254 self.n = n 

1255 

1256 @property 

1257 def shape(self) -> tuple[int]: 

1258 return (self.n,) 

1259 

1260 def to_array(self, dtype: DtypeObj) -> ArrayLike: 

1261 """ 

1262 Helper function to create the actual all-NA array from the NullArrayProxy 

1263 object. 

1264 

1265 Parameters 

1266 ---------- 

1267 arr : NullArrayProxy 

1268 dtype : the dtype for the resulting array 

1269 

1270 Returns 

1271 ------- 

1272 np.ndarray or ExtensionArray 

1273 """ 

1274 if isinstance(dtype, ExtensionDtype): 

1275 empty = dtype.construct_array_type()._from_sequence([], dtype=dtype) 

1276 indexer = -np.ones(self.n, dtype=np.intp) 

1277 return empty.take(indexer, allow_fill=True) 

1278 else: 

1279 # when introducing missing values, int becomes float, bool becomes object 

1280 dtype = ensure_dtype_can_hold_na(dtype) 

1281 fill_value = na_value_for_dtype(dtype) 

1282 arr = np.empty(self.n, dtype=dtype) 

1283 arr.fill(fill_value) 

1284 return ensure_wrapped_if_datetimelike(arr) 

1285 

1286 

1287def concat_arrays(to_concat: list) -> ArrayLike: 

1288 """ 

1289 Alternative for concat_compat but specialized for use in the ArrayManager. 

1290 

1291 Differences: only deals with 1D arrays (no axis keyword), assumes 

1292 ensure_wrapped_if_datetimelike and does not skip empty arrays to determine 

1293 the dtype. 

1294 In addition ensures that all NullArrayProxies get replaced with actual 

1295 arrays. 

1296 

1297 Parameters 

1298 ---------- 

1299 to_concat : list of arrays 

1300 

1301 Returns 

1302 ------- 

1303 np.ndarray or ExtensionArray 

1304 """ 

1305 # ignore the all-NA proxies to determine the resulting dtype 

1306 to_concat_no_proxy = [x for x in to_concat if not isinstance(x, NullArrayProxy)] 

1307 

1308 dtypes = {x.dtype for x in to_concat_no_proxy} 

1309 single_dtype = len(dtypes) == 1 

1310 

1311 if single_dtype: 

1312 target_dtype = to_concat_no_proxy[0].dtype 

1313 elif all(lib.is_np_dtype(x, "iub") for x in dtypes): 

1314 # GH#42092 

1315 target_dtype = np_find_common_type(*dtypes) 

1316 else: 

1317 target_dtype = find_common_type([arr.dtype for arr in to_concat_no_proxy]) 

1318 

1319 to_concat = [ 

1320 arr.to_array(target_dtype) 

1321 if isinstance(arr, NullArrayProxy) 

1322 else astype_array(arr, target_dtype, copy=False) 

1323 for arr in to_concat 

1324 ] 

1325 

1326 if isinstance(to_concat[0], ExtensionArray): 

1327 cls = type(to_concat[0]) 

1328 return cls._concat_same_type(to_concat) 

1329 

1330 result = np.concatenate(to_concat) 

1331 

1332 # TODO decide on exact behaviour (we shouldn't do this only for empty result) 

1333 # see https://github.com/pandas-dev/pandas/issues/39817 

1334 if len(result) == 0: 

1335 # all empties -> check for bool to not coerce to float 

1336 kinds = {obj.dtype.kind for obj in to_concat_no_proxy} 

1337 if len(kinds) != 1: 

1338 if "b" in kinds: 

1339 result = result.astype(object) 

1340 return result