Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pandas/core/internals/array_manager.py: 24%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

583 statements  

1""" 

2Experimental manager based on storing a collection of 1D arrays 

3""" 

4from __future__ import annotations 

5 

6from typing import ( 

7 Any, 

8 Callable, 

9 Hashable, 

10 Literal, 

11 TypeVar, 

12) 

13 

14import numpy as np 

15 

16from pandas._libs import ( 

17 NaT, 

18 algos as libalgos, 

19 lib, 

20) 

21from pandas._typing import ( 

22 ArrayLike, 

23 AxisInt, 

24 DtypeObj, 

25 QuantileInterpolation, 

26 npt, 

27) 

28from pandas.util._validators import validate_bool_kwarg 

29 

30from pandas.core.dtypes.astype import astype_array_safe 

31from pandas.core.dtypes.cast import ( 

32 ensure_dtype_can_hold_na, 

33 infer_dtype_from_scalar, 

34) 

35from pandas.core.dtypes.common import ( 

36 ensure_platform_int, 

37 is_datetime64_ns_dtype, 

38 is_dtype_equal, 

39 is_extension_array_dtype, 

40 is_integer, 

41 is_numeric_dtype, 

42 is_object_dtype, 

43 is_timedelta64_ns_dtype, 

44) 

45from pandas.core.dtypes.dtypes import ( 

46 ExtensionDtype, 

47 PandasDtype, 

48) 

49from pandas.core.dtypes.generic import ( 

50 ABCDataFrame, 

51 ABCSeries, 

52) 

53from pandas.core.dtypes.missing import ( 

54 array_equals, 

55 isna, 

56 na_value_for_dtype, 

57) 

58 

59import pandas.core.algorithms as algos 

60from pandas.core.array_algos.quantile import quantile_compat 

61from pandas.core.array_algos.take import take_1d 

62from pandas.core.arrays import ( 

63 DatetimeArray, 

64 ExtensionArray, 

65 PandasArray, 

66 TimedeltaArray, 

67) 

68from pandas.core.arrays.sparse import SparseDtype 

69from pandas.core.construction import ( 

70 ensure_wrapped_if_datetimelike, 

71 extract_array, 

72 sanitize_array, 

73) 

74from pandas.core.indexers import ( 

75 maybe_convert_indices, 

76 validate_indices, 

77) 

78from pandas.core.indexes.api import ( 

79 Index, 

80 ensure_index, 

81) 

82from pandas.core.internals.base import ( 

83 DataManager, 

84 SingleDataManager, 

85 interleaved_dtype, 

86) 

87from pandas.core.internals.blocks import ( 

88 ensure_block_shape, 

89 external_values, 

90 extract_pandas_array, 

91 maybe_coerce_values, 

92 new_block, 

93 to_native_types, 

94) 

95 

96T = TypeVar("T", bound="BaseArrayManager") 

97 

98 

99class BaseArrayManager(DataManager): 

100 """ 

101 Core internal data structure to implement DataFrame and Series. 

102 

103 Alternative to the BlockManager, storing a list of 1D arrays instead of 

104 Blocks. 

105 

106 This is *not* a public API class 

107 

108 Parameters 

109 ---------- 

110 arrays : Sequence of arrays 

111 axes : Sequence of Index 

112 verify_integrity : bool, default True 

113 

114 """ 

115 

116 __slots__ = [ 

117 "_axes", # private attribute, because 'axes' has different order, see below 

118 "arrays", 

119 ] 

120 

121 arrays: list[np.ndarray | ExtensionArray] 

122 _axes: list[Index] 

123 

124 def __init__( 

125 self, 

126 arrays: list[np.ndarray | ExtensionArray], 

127 axes: list[Index], 

128 verify_integrity: bool = True, 

129 ) -> None: 

130 raise NotImplementedError 

131 

132 def make_empty(self: T, axes=None) -> T: 

133 """Return an empty ArrayManager with the items axis of len 0 (no columns)""" 

134 if axes is None: 

135 axes = [self.axes[1:], Index([])] 

136 

137 arrays: list[np.ndarray | ExtensionArray] = [] 

138 return type(self)(arrays, axes) 

139 

140 @property 

141 def items(self) -> Index: 

142 return self._axes[-1] 

143 

144 @property 

145 # error: Signature of "axes" incompatible with supertype "DataManager" 

146 def axes(self) -> list[Index]: # type: ignore[override] 

147 # mypy doesn't work to override attribute with property 

148 # see https://github.com/python/mypy/issues/4125 

149 """Axes is BlockManager-compatible order (columns, rows)""" 

150 return [self._axes[1], self._axes[0]] 

151 

152 @property 

153 def shape_proper(self) -> tuple[int, ...]: 

154 # this returns (n_rows, n_columns) 

155 return tuple(len(ax) for ax in self._axes) 

156 

157 @staticmethod 

158 def _normalize_axis(axis: AxisInt) -> int: 

159 # switch axis 

160 axis = 1 if axis == 0 else 0 

161 return axis 

162 

163 def set_axis(self, axis: AxisInt, new_labels: Index) -> None: 

164 # Caller is responsible for ensuring we have an Index object. 

165 self._validate_set_axis(axis, new_labels) 

166 axis = self._normalize_axis(axis) 

167 self._axes[axis] = new_labels 

168 

169 def get_dtypes(self) -> np.ndarray: 

170 return np.array([arr.dtype for arr in self.arrays], dtype="object") 

171 

172 def add_references(self, mgr: BaseArrayManager) -> None: 

173 """ 

174 Only implemented on the BlockManager level 

175 """ 

176 return 

177 

178 def __getstate__(self): 

179 return self.arrays, self._axes 

180 

181 def __setstate__(self, state) -> None: 

182 self.arrays = state[0] 

183 self._axes = state[1] 

184 

185 def __repr__(self) -> str: 

186 output = type(self).__name__ 

187 output += f"\nIndex: {self._axes[0]}" 

188 if self.ndim == 2: 

189 output += f"\nColumns: {self._axes[1]}" 

190 output += f"\n{len(self.arrays)} arrays:" 

191 for arr in self.arrays: 

192 output += f"\n{arr.dtype}" 

193 return output 

194 

195 def apply( 

196 self: T, 

197 f, 

198 align_keys: list[str] | None = None, 

199 **kwargs, 

200 ) -> T: 

201 """ 

202 Iterate over the arrays, collect and create a new ArrayManager. 

203 

204 Parameters 

205 ---------- 

206 f : str or callable 

207 Name of the Array method to apply. 

208 align_keys: List[str] or None, default None 

209 **kwargs 

210 Keywords to pass to `f` 

211 

212 Returns 

213 ------- 

214 ArrayManager 

215 """ 

216 assert "filter" not in kwargs 

217 

218 align_keys = align_keys or [] 

219 result_arrays: list[np.ndarray] = [] 

220 # fillna: Series/DataFrame is responsible for making sure value is aligned 

221 

222 aligned_args = {k: kwargs[k] for k in align_keys} 

223 

224 if f == "apply": 

225 f = kwargs.pop("func") 

226 

227 for i, arr in enumerate(self.arrays): 

228 if aligned_args: 

229 for k, obj in aligned_args.items(): 

230 if isinstance(obj, (ABCSeries, ABCDataFrame)): 

231 # The caller is responsible for ensuring that 

232 # obj.axes[-1].equals(self.items) 

233 if obj.ndim == 1: 

234 kwargs[k] = obj.iloc[i] 

235 else: 

236 kwargs[k] = obj.iloc[:, i]._values 

237 else: 

238 # otherwise we have an array-like 

239 kwargs[k] = obj[i] 

240 

241 if callable(f): 

242 applied = f(arr, **kwargs) 

243 else: 

244 applied = getattr(arr, f)(**kwargs) 

245 

246 # if not isinstance(applied, ExtensionArray): 

247 # # TODO not all EA operations return new EAs (eg astype) 

248 # applied = array(applied) 

249 result_arrays.append(applied) 

250 

251 new_axes = self._axes 

252 

253 # error: Argument 1 to "ArrayManager" has incompatible type "List[ndarray]"; 

254 # expected "List[Union[ndarray, ExtensionArray]]" 

255 return type(self)(result_arrays, new_axes) # type: ignore[arg-type] 

256 

257 def apply_with_block( 

258 self: T, f, align_keys=None, swap_axis: bool = True, **kwargs 

259 ) -> T: 

260 # switch axis to follow BlockManager logic 

261 if swap_axis and "axis" in kwargs and self.ndim == 2: 

262 kwargs["axis"] = 1 if kwargs["axis"] == 0 else 0 

263 

264 align_keys = align_keys or [] 

265 aligned_args = {k: kwargs[k] for k in align_keys} 

266 

267 result_arrays = [] 

268 

269 for i, arr in enumerate(self.arrays): 

270 if aligned_args: 

271 for k, obj in aligned_args.items(): 

272 if isinstance(obj, (ABCSeries, ABCDataFrame)): 

273 # The caller is responsible for ensuring that 

274 # obj.axes[-1].equals(self.items) 

275 if obj.ndim == 1: 

276 if self.ndim == 2: 

277 kwargs[k] = obj.iloc[slice(i, i + 1)]._values 

278 else: 

279 kwargs[k] = obj.iloc[:]._values 

280 else: 

281 kwargs[k] = obj.iloc[:, [i]]._values 

282 else: 

283 # otherwise we have an ndarray 

284 if obj.ndim == 2: 

285 kwargs[k] = obj[[i]] 

286 

287 if isinstance(arr.dtype, np.dtype) and not isinstance(arr, np.ndarray): 

288 # i.e. TimedeltaArray, DatetimeArray with tz=None. Need to 

289 # convert for the Block constructors. 

290 arr = np.asarray(arr) 

291 

292 if self.ndim == 2: 

293 arr = ensure_block_shape(arr, 2) 

294 block = new_block(arr, placement=slice(0, 1, 1), ndim=2) 

295 else: 

296 block = new_block(arr, placement=slice(0, len(self), 1), ndim=1) 

297 

298 applied = getattr(block, f)(**kwargs) 

299 if isinstance(applied, list): 

300 applied = applied[0] 

301 arr = applied.values 

302 if self.ndim == 2 and arr.ndim == 2: 

303 # 2D for np.ndarray or DatetimeArray/TimedeltaArray 

304 assert len(arr) == 1 

305 # error: No overload variant of "__getitem__" of "ExtensionArray" 

306 # matches argument type "Tuple[int, slice]" 

307 arr = arr[0, :] # type: ignore[call-overload] 

308 result_arrays.append(arr) 

309 

310 return type(self)(result_arrays, self._axes) 

311 

312 def where(self: T, other, cond, align: bool) -> T: 

313 if align: 

314 align_keys = ["other", "cond"] 

315 else: 

316 align_keys = ["cond"] 

317 other = extract_array(other, extract_numpy=True) 

318 

319 return self.apply_with_block( 

320 "where", 

321 align_keys=align_keys, 

322 other=other, 

323 cond=cond, 

324 ) 

325 

326 def round(self: T, decimals: int, using_cow: bool = False) -> T: 

327 return self.apply_with_block("round", decimals=decimals, using_cow=using_cow) 

328 

329 def setitem(self: T, indexer, value) -> T: 

330 return self.apply_with_block("setitem", indexer=indexer, value=value) 

331 

332 def putmask(self: T, mask, new, align: bool = True) -> T: 

333 if align: 

334 align_keys = ["new", "mask"] 

335 else: 

336 align_keys = ["mask"] 

337 new = extract_array(new, extract_numpy=True) 

338 

339 return self.apply_with_block( 

340 "putmask", 

341 align_keys=align_keys, 

342 mask=mask, 

343 new=new, 

344 ) 

345 

346 def diff(self: T, n: int, axis: AxisInt) -> T: 

347 assert self.ndim == 2 and axis == 0 # caller ensures 

348 return self.apply(algos.diff, n=n, axis=axis) 

349 

350 def interpolate(self: T, **kwargs) -> T: 

351 return self.apply_with_block("interpolate", swap_axis=False, **kwargs) 

352 

353 def shift(self: T, periods: int, axis: AxisInt, fill_value) -> T: 

354 if fill_value is lib.no_default: 

355 fill_value = None 

356 

357 if axis == 1 and self.ndim == 2: 

358 # TODO column-wise shift 

359 raise NotImplementedError 

360 

361 return self.apply_with_block( 

362 "shift", periods=periods, axis=axis, fill_value=fill_value 

363 ) 

364 

365 def fillna(self: T, value, limit, inplace: bool, downcast) -> T: 

366 if limit is not None: 

367 # Do this validation even if we go through one of the no-op paths 

368 limit = libalgos.validate_limit(None, limit=limit) 

369 

370 return self.apply_with_block( 

371 "fillna", value=value, limit=limit, inplace=inplace, downcast=downcast 

372 ) 

373 

374 def astype(self: T, dtype, copy: bool | None = False, errors: str = "raise") -> T: 

375 if copy is None: 

376 copy = True 

377 

378 return self.apply(astype_array_safe, dtype=dtype, copy=copy, errors=errors) 

379 

380 def convert(self: T, copy: bool | None) -> T: 

381 if copy is None: 

382 copy = True 

383 

384 def _convert(arr): 

385 if is_object_dtype(arr.dtype): 

386 # extract PandasArray for tests that patch PandasArray._typ 

387 arr = np.asarray(arr) 

388 result = lib.maybe_convert_objects( 

389 arr, 

390 convert_datetime=True, 

391 convert_timedelta=True, 

392 convert_period=True, 

393 convert_interval=True, 

394 ) 

395 if result is arr and copy: 

396 return arr.copy() 

397 return result 

398 else: 

399 return arr.copy() if copy else arr 

400 

401 return self.apply(_convert) 

402 

403 def replace_regex(self: T, **kwargs) -> T: 

404 return self.apply_with_block("_replace_regex", **kwargs) 

405 

406 def replace(self: T, to_replace, value, inplace: bool) -> T: 

407 inplace = validate_bool_kwarg(inplace, "inplace") 

408 assert np.ndim(value) == 0, value 

409 # TODO "replace" is right now implemented on the blocks, we should move 

410 # it to general array algos so it can be reused here 

411 return self.apply_with_block( 

412 "replace", value=value, to_replace=to_replace, inplace=inplace 

413 ) 

414 

415 def replace_list( 

416 self: T, 

417 src_list: list[Any], 

418 dest_list: list[Any], 

419 inplace: bool = False, 

420 regex: bool = False, 

421 ) -> T: 

422 """do a list replace""" 

423 inplace = validate_bool_kwarg(inplace, "inplace") 

424 

425 return self.apply_with_block( 

426 "replace_list", 

427 src_list=src_list, 

428 dest_list=dest_list, 

429 inplace=inplace, 

430 regex=regex, 

431 ) 

432 

433 def to_native_types(self: T, **kwargs) -> T: 

434 return self.apply(to_native_types, **kwargs) 

435 

436 @property 

437 def is_mixed_type(self) -> bool: 

438 return True 

439 

440 @property 

441 def is_numeric_mixed_type(self) -> bool: 

442 return all(is_numeric_dtype(t) for t in self.get_dtypes()) 

443 

444 @property 

445 def any_extension_types(self) -> bool: 

446 """Whether any of the blocks in this manager are extension blocks""" 

447 return False # any(block.is_extension for block in self.blocks) 

448 

449 @property 

450 def is_view(self) -> bool: 

451 """return a boolean if we are a single block and are a view""" 

452 # TODO what is this used for? 

453 return False 

454 

455 @property 

456 def is_single_block(self) -> bool: 

457 return len(self.arrays) == 1 

458 

459 def _get_data_subset(self: T, predicate: Callable) -> T: 

460 indices = [i for i, arr in enumerate(self.arrays) if predicate(arr)] 

461 arrays = [self.arrays[i] for i in indices] 

462 # TODO copy? 

463 # Note: using Index.take ensures we can retain e.g. DatetimeIndex.freq, 

464 # see test_describe_datetime_columns 

465 taker = np.array(indices, dtype="intp") 

466 new_cols = self._axes[1].take(taker) 

467 new_axes = [self._axes[0], new_cols] 

468 return type(self)(arrays, new_axes, verify_integrity=False) 

469 

470 def get_bool_data(self: T, copy: bool = False) -> T: 

471 """ 

472 Select columns that are bool-dtype and object-dtype columns that are all-bool. 

473 

474 Parameters 

475 ---------- 

476 copy : bool, default False 

477 Whether to copy the blocks 

478 """ 

479 return self._get_data_subset(lambda x: x.dtype == np.dtype(bool)) 

480 

481 def get_numeric_data(self: T, copy: bool = False) -> T: 

482 """ 

483 Select columns that have a numeric dtype. 

484 

485 Parameters 

486 ---------- 

487 copy : bool, default False 

488 Whether to copy the blocks 

489 """ 

490 return self._get_data_subset( 

491 lambda arr: is_numeric_dtype(arr.dtype) 

492 or getattr(arr.dtype, "_is_numeric", False) 

493 ) 

494 

495 def copy(self: T, deep: bool | Literal["all"] | None = True) -> T: 

496 """ 

497 Make deep or shallow copy of ArrayManager 

498 

499 Parameters 

500 ---------- 

501 deep : bool or string, default True 

502 If False, return shallow copy (do not copy data) 

503 If 'all', copy data and a deep copy of the index 

504 

505 Returns 

506 ------- 

507 BlockManager 

508 """ 

509 if deep is None: 

510 # ArrayManager does not yet support CoW, so deep=None always means 

511 # deep=True for now 

512 deep = True 

513 

514 # this preserves the notion of view copying of axes 

515 if deep: 

516 # hit in e.g. tests.io.json.test_pandas 

517 

518 def copy_func(ax): 

519 return ax.copy(deep=True) if deep == "all" else ax.view() 

520 

521 new_axes = [copy_func(ax) for ax in self._axes] 

522 else: 

523 new_axes = list(self._axes) 

524 

525 if deep: 

526 new_arrays = [arr.copy() for arr in self.arrays] 

527 else: 

528 new_arrays = list(self.arrays) 

529 return type(self)(new_arrays, new_axes, verify_integrity=False) 

530 

531 def reindex_indexer( 

532 self: T, 

533 new_axis, 

534 indexer, 

535 axis: AxisInt, 

536 fill_value=None, 

537 allow_dups: bool = False, 

538 copy: bool | None = True, 

539 # ignored keywords 

540 only_slice: bool = False, 

541 # ArrayManager specific keywords 

542 use_na_proxy: bool = False, 

543 ) -> T: 

544 axis = self._normalize_axis(axis) 

545 return self._reindex_indexer( 

546 new_axis, 

547 indexer, 

548 axis, 

549 fill_value, 

550 allow_dups, 

551 copy, 

552 use_na_proxy, 

553 ) 

554 

555 def _reindex_indexer( 

556 self: T, 

557 new_axis, 

558 indexer: npt.NDArray[np.intp] | None, 

559 axis: AxisInt, 

560 fill_value=None, 

561 allow_dups: bool = False, 

562 copy: bool | None = True, 

563 use_na_proxy: bool = False, 

564 ) -> T: 

565 """ 

566 Parameters 

567 ---------- 

568 new_axis : Index 

569 indexer : ndarray[intp] or None 

570 axis : int 

571 fill_value : object, default None 

572 allow_dups : bool, default False 

573 copy : bool, default True 

574 

575 

576 pandas-indexer with -1's only. 

577 """ 

578 if copy is None: 

579 # ArrayManager does not yet support CoW, so deep=None always means 

580 # deep=True for now 

581 copy = True 

582 

583 if indexer is None: 

584 if new_axis is self._axes[axis] and not copy: 

585 return self 

586 

587 result = self.copy(deep=copy) 

588 result._axes = list(self._axes) 

589 result._axes[axis] = new_axis 

590 return result 

591 

592 # some axes don't allow reindexing with dups 

593 if not allow_dups: 

594 self._axes[axis]._validate_can_reindex(indexer) 

595 

596 if axis >= self.ndim: 

597 raise IndexError("Requested axis not found in manager") 

598 

599 if axis == 1: 

600 new_arrays = [] 

601 for i in indexer: 

602 if i == -1: 

603 arr = self._make_na_array( 

604 fill_value=fill_value, use_na_proxy=use_na_proxy 

605 ) 

606 else: 

607 arr = self.arrays[i] 

608 if copy: 

609 arr = arr.copy() 

610 new_arrays.append(arr) 

611 

612 else: 

613 validate_indices(indexer, len(self._axes[0])) 

614 indexer = ensure_platform_int(indexer) 

615 mask = indexer == -1 

616 needs_masking = mask.any() 

617 new_arrays = [ 

618 take_1d( 

619 arr, 

620 indexer, 

621 allow_fill=needs_masking, 

622 fill_value=fill_value, 

623 mask=mask, 

624 # if fill_value is not None else blk.fill_value 

625 ) 

626 for arr in self.arrays 

627 ] 

628 

629 new_axes = list(self._axes) 

630 new_axes[axis] = new_axis 

631 

632 return type(self)(new_arrays, new_axes, verify_integrity=False) 

633 

634 def take( 

635 self: T, 

636 indexer, 

637 axis: AxisInt = 1, 

638 verify: bool = True, 

639 convert_indices: bool = True, 

640 ) -> T: 

641 """ 

642 Take items along any axis. 

643 """ 

644 axis = self._normalize_axis(axis) 

645 

646 indexer = ( 

647 np.arange(indexer.start, indexer.stop, indexer.step, dtype="int64") 

648 if isinstance(indexer, slice) 

649 else np.asanyarray(indexer, dtype="int64") 

650 ) 

651 

652 if not indexer.ndim == 1: 

653 raise ValueError("indexer should be 1-dimensional") 

654 

655 n = self.shape_proper[axis] 

656 if convert_indices: 

657 indexer = maybe_convert_indices(indexer, n, verify=verify) 

658 

659 new_labels = self._axes[axis].take(indexer) 

660 return self._reindex_indexer( 

661 new_axis=new_labels, indexer=indexer, axis=axis, allow_dups=True 

662 ) 

663 

664 def _make_na_array(self, fill_value=None, use_na_proxy: bool = False): 

665 if use_na_proxy: 

666 assert fill_value is None 

667 return NullArrayProxy(self.shape_proper[0]) 

668 

669 if fill_value is None: 

670 fill_value = np.nan 

671 

672 dtype, fill_value = infer_dtype_from_scalar(fill_value) 

673 # error: Argument "dtype" to "empty" has incompatible type "Union[dtype[Any], 

674 # ExtensionDtype]"; expected "Union[dtype[Any], None, type, _SupportsDType, str, 

675 # Union[Tuple[Any, int], Tuple[Any, Union[int, Sequence[int]]], List[Any], 

676 # _DTypeDict, Tuple[Any, Any]]]" 

677 values = np.empty(self.shape_proper[0], dtype=dtype) # type: ignore[arg-type] 

678 values.fill(fill_value) 

679 return values 

680 

681 def _equal_values(self, other) -> bool: 

682 """ 

683 Used in .equals defined in base class. Only check the column values 

684 assuming shape and indexes have already been checked. 

685 """ 

686 for left, right in zip(self.arrays, other.arrays): 

687 if not array_equals(left, right): 

688 return False 

689 return True 

690 

691 # TODO 

692 # to_dict 

693 

694 

695class ArrayManager(BaseArrayManager): 

696 @property 

697 def ndim(self) -> Literal[2]: 

698 return 2 

699 

700 def __init__( 

701 self, 

702 arrays: list[np.ndarray | ExtensionArray], 

703 axes: list[Index], 

704 verify_integrity: bool = True, 

705 ) -> None: 

706 # Note: we are storing the axes in "_axes" in the (row, columns) order 

707 # which contrasts the order how it is stored in BlockManager 

708 self._axes = axes 

709 self.arrays = arrays 

710 

711 if verify_integrity: 

712 self._axes = [ensure_index(ax) for ax in axes] 

713 arrays = [extract_pandas_array(x, None, 1)[0] for x in arrays] 

714 self.arrays = [maybe_coerce_values(arr) for arr in arrays] 

715 self._verify_integrity() 

716 

717 def _verify_integrity(self) -> None: 

718 n_rows, n_columns = self.shape_proper 

719 if not len(self.arrays) == n_columns: 

720 raise ValueError( 

721 "Number of passed arrays must equal the size of the column Index: " 

722 f"{len(self.arrays)} arrays vs {n_columns} columns." 

723 ) 

724 for arr in self.arrays: 

725 if not len(arr) == n_rows: 

726 raise ValueError( 

727 "Passed arrays should have the same length as the rows Index: " 

728 f"{len(arr)} vs {n_rows} rows" 

729 ) 

730 if not isinstance(arr, (np.ndarray, ExtensionArray)): 

731 raise ValueError( 

732 "Passed arrays should be np.ndarray or ExtensionArray instances, " 

733 f"got {type(arr)} instead" 

734 ) 

735 if not arr.ndim == 1: 

736 raise ValueError( 

737 "Passed arrays should be 1-dimensional, got array with " 

738 f"{arr.ndim} dimensions instead." 

739 ) 

740 

741 # -------------------------------------------------------------------- 

742 # Indexing 

743 

744 def fast_xs(self, loc: int) -> SingleArrayManager: 

745 """ 

746 Return the array corresponding to `frame.iloc[loc]`. 

747 

748 Parameters 

749 ---------- 

750 loc : int 

751 

752 Returns 

753 ------- 

754 np.ndarray or ExtensionArray 

755 """ 

756 dtype = interleaved_dtype([arr.dtype for arr in self.arrays]) 

757 

758 values = [arr[loc] for arr in self.arrays] 

759 if isinstance(dtype, ExtensionDtype): 

760 result = dtype.construct_array_type()._from_sequence(values, dtype=dtype) 

761 # for datetime64/timedelta64, the np.ndarray constructor cannot handle pd.NaT 

762 elif is_datetime64_ns_dtype(dtype): 

763 result = DatetimeArray._from_sequence(values, dtype=dtype)._ndarray 

764 elif is_timedelta64_ns_dtype(dtype): 

765 result = TimedeltaArray._from_sequence(values, dtype=dtype)._ndarray 

766 else: 

767 result = np.array(values, dtype=dtype) 

768 return SingleArrayManager([result], [self._axes[1]]) 

769 

770 def get_slice(self, slobj: slice, axis: AxisInt = 0) -> ArrayManager: 

771 axis = self._normalize_axis(axis) 

772 

773 if axis == 0: 

774 arrays = [arr[slobj] for arr in self.arrays] 

775 elif axis == 1: 

776 arrays = self.arrays[slobj] 

777 

778 new_axes = list(self._axes) 

779 new_axes[axis] = new_axes[axis]._getitem_slice(slobj) 

780 

781 return type(self)(arrays, new_axes, verify_integrity=False) 

782 

783 def iget(self, i: int) -> SingleArrayManager: 

784 """ 

785 Return the data as a SingleArrayManager. 

786 """ 

787 values = self.arrays[i] 

788 return SingleArrayManager([values], [self._axes[0]]) 

789 

790 def iget_values(self, i: int) -> ArrayLike: 

791 """ 

792 Return the data for column i as the values (ndarray or ExtensionArray). 

793 """ 

794 return self.arrays[i] 

795 

796 @property 

797 def column_arrays(self) -> list[ArrayLike]: 

798 """ 

799 Used in the JSON C code to access column arrays. 

800 """ 

801 

802 return [np.asarray(arr) for arr in self.arrays] 

803 

804 def iset( 

805 self, loc: int | slice | np.ndarray, value: ArrayLike, inplace: bool = False 

806 ) -> None: 

807 """ 

808 Set new column(s). 

809 

810 This changes the ArrayManager in-place, but replaces (an) existing 

811 column(s), not changing column values in-place). 

812 

813 Parameters 

814 ---------- 

815 loc : integer, slice or boolean mask 

816 Positional location (already bounds checked) 

817 value : np.ndarray or ExtensionArray 

818 inplace : bool, default False 

819 Whether overwrite existing array as opposed to replacing it. 

820 """ 

821 # single column -> single integer index 

822 if lib.is_integer(loc): 

823 # TODO can we avoid needing to unpack this here? That means converting 

824 # DataFrame into 1D array when loc is an integer 

825 if isinstance(value, np.ndarray) and value.ndim == 2: 

826 assert value.shape[1] == 1 

827 value = value[:, 0] 

828 

829 # TODO we receive a datetime/timedelta64 ndarray from DataFrame._iset_item 

830 # but we should avoid that and pass directly the proper array 

831 value = maybe_coerce_values(value) 

832 

833 assert isinstance(value, (np.ndarray, ExtensionArray)) 

834 assert value.ndim == 1 

835 assert len(value) == len(self._axes[0]) 

836 self.arrays[loc] = value 

837 return 

838 

839 # multiple columns -> convert slice or array to integer indices 

840 elif isinstance(loc, slice): 

841 indices = range( 

842 loc.start if loc.start is not None else 0, 

843 loc.stop if loc.stop is not None else self.shape_proper[1], 

844 loc.step if loc.step is not None else 1, 

845 ) 

846 else: 

847 assert isinstance(loc, np.ndarray) 

848 assert loc.dtype == "bool" 

849 # error: Incompatible types in assignment (expression has type "ndarray", 

850 # variable has type "range") 

851 indices = np.nonzero(loc)[0] # type: ignore[assignment] 

852 

853 assert value.ndim == 2 

854 assert value.shape[0] == len(self._axes[0]) 

855 

856 for value_idx, mgr_idx in enumerate(indices): 

857 # error: No overload variant of "__getitem__" of "ExtensionArray" matches 

858 # argument type "Tuple[slice, int]" 

859 value_arr = value[:, value_idx] # type: ignore[call-overload] 

860 self.arrays[mgr_idx] = value_arr 

861 return 

862 

863 def column_setitem( 

864 self, loc: int, idx: int | slice | np.ndarray, value, inplace_only: bool = False 

865 ) -> None: 

866 """ 

867 Set values ("setitem") into a single column (not setting the full column). 

868 

869 This is a method on the ArrayManager level, to avoid creating an 

870 intermediate Series at the DataFrame level (`s = df[loc]; s[idx] = value`) 

871 """ 

872 if not is_integer(loc): 

873 raise TypeError("The column index should be an integer") 

874 arr = self.arrays[loc] 

875 mgr = SingleArrayManager([arr], [self._axes[0]]) 

876 if inplace_only: 

877 mgr.setitem_inplace(idx, value) 

878 else: 

879 new_mgr = mgr.setitem((idx,), value) 

880 # update existing ArrayManager in-place 

881 self.arrays[loc] = new_mgr.arrays[0] 

882 

883 def insert(self, loc: int, item: Hashable, value: ArrayLike) -> None: 

884 """ 

885 Insert item at selected position. 

886 

887 Parameters 

888 ---------- 

889 loc : int 

890 item : hashable 

891 value : np.ndarray or ExtensionArray 

892 """ 

893 # insert to the axis; this could possibly raise a TypeError 

894 new_axis = self.items.insert(loc, item) 

895 

896 value = extract_array(value, extract_numpy=True) 

897 if value.ndim == 2: 

898 if value.shape[0] == 1: 

899 # error: No overload variant of "__getitem__" of "ExtensionArray" 

900 # matches argument type "Tuple[int, slice]" 

901 value = value[0, :] # type: ignore[call-overload] 

902 else: 

903 raise ValueError( 

904 f"Expected a 1D array, got an array with shape {value.shape}" 

905 ) 

906 value = maybe_coerce_values(value) 

907 

908 # TODO self.arrays can be empty 

909 # assert len(value) == len(self.arrays[0]) 

910 

911 # TODO is this copy needed? 

912 arrays = self.arrays.copy() 

913 arrays.insert(loc, value) 

914 

915 self.arrays = arrays 

916 self._axes[1] = new_axis 

917 

918 def idelete(self, indexer) -> ArrayManager: 

919 """ 

920 Delete selected locations in-place (new block and array, same BlockManager) 

921 """ 

922 to_keep = np.ones(self.shape[0], dtype=np.bool_) 

923 to_keep[indexer] = False 

924 

925 self.arrays = [self.arrays[i] for i in np.nonzero(to_keep)[0]] 

926 self._axes = [self._axes[0], self._axes[1][to_keep]] 

927 return self 

928 

929 # -------------------------------------------------------------------- 

930 # Array-wise Operation 

931 

932 def grouped_reduce(self: T, func: Callable) -> T: 

933 """ 

934 Apply grouped reduction function columnwise, returning a new ArrayManager. 

935 

936 Parameters 

937 ---------- 

938 func : grouped reduction function 

939 

940 Returns 

941 ------- 

942 ArrayManager 

943 """ 

944 result_arrays: list[np.ndarray] = [] 

945 result_indices: list[int] = [] 

946 

947 for i, arr in enumerate(self.arrays): 

948 # grouped_reduce functions all expect 2D arrays 

949 arr = ensure_block_shape(arr, ndim=2) 

950 res = func(arr) 

951 if res.ndim == 2: 

952 # reverse of ensure_block_shape 

953 assert res.shape[0] == 1 

954 res = res[0] 

955 

956 result_arrays.append(res) 

957 result_indices.append(i) 

958 

959 if len(result_arrays) == 0: 

960 nrows = 0 

961 else: 

962 nrows = result_arrays[0].shape[0] 

963 index = Index(range(nrows)) 

964 

965 columns = self.items 

966 

967 # error: Argument 1 to "ArrayManager" has incompatible type "List[ndarray]"; 

968 # expected "List[Union[ndarray, ExtensionArray]]" 

969 return type(self)(result_arrays, [index, columns]) # type: ignore[arg-type] 

970 

971 def reduce(self: T, func: Callable) -> T: 

972 """ 

973 Apply reduction function column-wise, returning a single-row ArrayManager. 

974 

975 Parameters 

976 ---------- 

977 func : reduction function 

978 

979 Returns 

980 ------- 

981 ArrayManager 

982 """ 

983 result_arrays: list[np.ndarray] = [] 

984 for i, arr in enumerate(self.arrays): 

985 res = func(arr, axis=0) 

986 

987 # TODO NaT doesn't preserve dtype, so we need to ensure to create 

988 # a timedelta result array if original was timedelta 

989 # what if datetime results in timedelta? (eg std) 

990 dtype = arr.dtype if res is NaT else None 

991 result_arrays.append( 

992 sanitize_array([res], None, dtype=dtype) # type: ignore[arg-type] 

993 ) 

994 

995 index = Index._simple_new(np.array([None], dtype=object)) # placeholder 

996 columns = self.items 

997 

998 # error: Argument 1 to "ArrayManager" has incompatible type "List[ndarray]"; 

999 # expected "List[Union[ndarray, ExtensionArray]]" 

1000 new_mgr = type(self)(result_arrays, [index, columns]) # type: ignore[arg-type] 

1001 return new_mgr 

1002 

1003 def operate_blockwise(self, other: ArrayManager, array_op) -> ArrayManager: 

1004 """ 

1005 Apply array_op blockwise with another (aligned) BlockManager. 

1006 """ 

1007 # TODO what if `other` is BlockManager ? 

1008 left_arrays = self.arrays 

1009 right_arrays = other.arrays 

1010 result_arrays = [ 

1011 array_op(left, right) for left, right in zip(left_arrays, right_arrays) 

1012 ] 

1013 return type(self)(result_arrays, self._axes) 

1014 

1015 def quantile( 

1016 self, 

1017 *, 

1018 qs: Index, # with dtype float64 

1019 axis: AxisInt = 0, 

1020 transposed: bool = False, 

1021 interpolation: QuantileInterpolation = "linear", 

1022 ) -> ArrayManager: 

1023 arrs = [ensure_block_shape(x, 2) for x in self.arrays] 

1024 assert axis == 1 

1025 new_arrs = [ 

1026 quantile_compat(x, np.asarray(qs._values), interpolation) for x in arrs 

1027 ] 

1028 for i, arr in enumerate(new_arrs): 

1029 if arr.ndim == 2: 

1030 assert arr.shape[0] == 1, arr.shape 

1031 new_arrs[i] = arr[0] 

1032 

1033 axes = [qs, self._axes[1]] 

1034 return type(self)(new_arrs, axes) 

1035 

1036 # ---------------------------------------------------------------- 

1037 

1038 def unstack(self, unstacker, fill_value) -> ArrayManager: 

1039 """ 

1040 Return a BlockManager with all blocks unstacked. 

1041 

1042 Parameters 

1043 ---------- 

1044 unstacker : reshape._Unstacker 

1045 fill_value : Any 

1046 fill_value for newly introduced missing values. 

1047 

1048 Returns 

1049 ------- 

1050 unstacked : BlockManager 

1051 """ 

1052 indexer, _ = unstacker._indexer_and_to_sort 

1053 if unstacker.mask.all(): 

1054 new_indexer = indexer 

1055 allow_fill = False 

1056 new_mask2D = None 

1057 needs_masking = None 

1058 else: 

1059 new_indexer = np.full(unstacker.mask.shape, -1) 

1060 new_indexer[unstacker.mask] = indexer 

1061 allow_fill = True 

1062 # calculating the full mask once and passing it to take_1d is faster 

1063 # than letting take_1d calculate it in each repeated call 

1064 new_mask2D = (~unstacker.mask).reshape(*unstacker.full_shape) 

1065 needs_masking = new_mask2D.any(axis=0) 

1066 new_indexer2D = new_indexer.reshape(*unstacker.full_shape) 

1067 new_indexer2D = ensure_platform_int(new_indexer2D) 

1068 

1069 new_arrays = [] 

1070 for arr in self.arrays: 

1071 for i in range(unstacker.full_shape[1]): 

1072 if allow_fill: 

1073 # error: Value of type "Optional[Any]" is not indexable [index] 

1074 new_arr = take_1d( 

1075 arr, 

1076 new_indexer2D[:, i], 

1077 allow_fill=needs_masking[i], # type: ignore[index] 

1078 fill_value=fill_value, 

1079 mask=new_mask2D[:, i], # type: ignore[index] 

1080 ) 

1081 else: 

1082 new_arr = take_1d(arr, new_indexer2D[:, i], allow_fill=False) 

1083 new_arrays.append(new_arr) 

1084 

1085 new_index = unstacker.new_index 

1086 new_columns = unstacker.get_new_columns(self._axes[1]) 

1087 new_axes = [new_index, new_columns] 

1088 

1089 return type(self)(new_arrays, new_axes, verify_integrity=False) 

1090 

1091 def as_array( 

1092 self, 

1093 dtype=None, 

1094 copy: bool = False, 

1095 na_value: object = lib.no_default, 

1096 ) -> np.ndarray: 

1097 """ 

1098 Convert the blockmanager data into an numpy array. 

1099 

1100 Parameters 

1101 ---------- 

1102 dtype : object, default None 

1103 Data type of the return array. 

1104 copy : bool, default False 

1105 If True then guarantee that a copy is returned. A value of 

1106 False does not guarantee that the underlying data is not 

1107 copied. 

1108 na_value : object, default lib.no_default 

1109 Value to be used as the missing value sentinel. 

1110 

1111 Returns 

1112 ------- 

1113 arr : ndarray 

1114 """ 

1115 if len(self.arrays) == 0: 

1116 empty_arr = np.empty(self.shape, dtype=float) 

1117 return empty_arr.transpose() 

1118 

1119 # We want to copy when na_value is provided to avoid 

1120 # mutating the original object 

1121 copy = copy or na_value is not lib.no_default 

1122 

1123 if not dtype: 

1124 dtype = interleaved_dtype([arr.dtype for arr in self.arrays]) 

1125 

1126 if isinstance(dtype, SparseDtype): 

1127 dtype = dtype.subtype 

1128 elif isinstance(dtype, PandasDtype): 

1129 dtype = dtype.numpy_dtype 

1130 elif is_extension_array_dtype(dtype): 

1131 dtype = "object" 

1132 elif is_dtype_equal(dtype, str): 

1133 dtype = "object" 

1134 

1135 result = np.empty(self.shape_proper, dtype=dtype) 

1136 

1137 for i, arr in enumerate(self.arrays): 

1138 arr = arr.astype(dtype, copy=copy) 

1139 result[:, i] = arr 

1140 

1141 if na_value is not lib.no_default: 

1142 result[isna(result)] = na_value 

1143 

1144 return result 

1145 

1146 

1147class SingleArrayManager(BaseArrayManager, SingleDataManager): 

1148 __slots__ = [ 

1149 "_axes", # private attribute, because 'axes' has different order, see below 

1150 "arrays", 

1151 ] 

1152 

1153 arrays: list[np.ndarray | ExtensionArray] 

1154 _axes: list[Index] 

1155 

1156 @property 

1157 def ndim(self) -> Literal[1]: 

1158 return 1 

1159 

1160 def __init__( 

1161 self, 

1162 arrays: list[np.ndarray | ExtensionArray], 

1163 axes: list[Index], 

1164 verify_integrity: bool = True, 

1165 ) -> None: 

1166 self._axes = axes 

1167 self.arrays = arrays 

1168 

1169 if verify_integrity: 

1170 assert len(axes) == 1 

1171 assert len(arrays) == 1 

1172 self._axes = [ensure_index(ax) for ax in self._axes] 

1173 arr = arrays[0] 

1174 arr = maybe_coerce_values(arr) 

1175 arr = extract_pandas_array(arr, None, 1)[0] 

1176 self.arrays = [arr] 

1177 self._verify_integrity() 

1178 

1179 def _verify_integrity(self) -> None: 

1180 (n_rows,) = self.shape 

1181 assert len(self.arrays) == 1 

1182 arr = self.arrays[0] 

1183 assert len(arr) == n_rows 

1184 if not arr.ndim == 1: 

1185 raise ValueError( 

1186 "Passed array should be 1-dimensional, got array with " 

1187 f"{arr.ndim} dimensions instead." 

1188 ) 

1189 

1190 @staticmethod 

1191 def _normalize_axis(axis): 

1192 return axis 

1193 

1194 def make_empty(self, axes=None) -> SingleArrayManager: 

1195 """Return an empty ArrayManager with index/array of length 0""" 

1196 if axes is None: 

1197 axes = [Index([], dtype=object)] 

1198 array: np.ndarray = np.array([], dtype=self.dtype) 

1199 return type(self)([array], axes) 

1200 

1201 @classmethod 

1202 def from_array(cls, array, index) -> SingleArrayManager: 

1203 return cls([array], [index]) 

1204 

1205 @property 

1206 def axes(self): 

1207 return self._axes 

1208 

1209 @property 

1210 def index(self) -> Index: 

1211 return self._axes[0] 

1212 

1213 @property 

1214 def dtype(self): 

1215 return self.array.dtype 

1216 

1217 def external_values(self): 

1218 """The array that Series.values returns""" 

1219 return external_values(self.array) 

1220 

1221 def internal_values(self): 

1222 """The array that Series._values returns""" 

1223 return self.array 

1224 

1225 def array_values(self): 

1226 """The array that Series.array returns""" 

1227 arr = self.array 

1228 if isinstance(arr, np.ndarray): 

1229 arr = PandasArray(arr) 

1230 return arr 

1231 

1232 @property 

1233 def _can_hold_na(self) -> bool: 

1234 if isinstance(self.array, np.ndarray): 

1235 return self.array.dtype.kind not in ["b", "i", "u"] 

1236 else: 

1237 # ExtensionArray 

1238 return self.array._can_hold_na 

1239 

1240 @property 

1241 def is_single_block(self) -> bool: 

1242 return True 

1243 

1244 def fast_xs(self, loc: int) -> SingleArrayManager: 

1245 raise NotImplementedError("Use series._values[loc] instead") 

1246 

1247 def get_slice(self, slobj: slice, axis: AxisInt = 0) -> SingleArrayManager: 

1248 if axis >= self.ndim: 

1249 raise IndexError("Requested axis not found in manager") 

1250 

1251 new_array = self.array[slobj] 

1252 new_index = self.index._getitem_slice(slobj) 

1253 return type(self)([new_array], [new_index], verify_integrity=False) 

1254 

1255 def getitem_mgr(self, indexer) -> SingleArrayManager: 

1256 new_array = self.array[indexer] 

1257 new_index = self.index[indexer] 

1258 return type(self)([new_array], [new_index]) 

1259 

1260 def apply(self, func, **kwargs): 

1261 if callable(func): 

1262 new_array = func(self.array, **kwargs) 

1263 else: 

1264 new_array = getattr(self.array, func)(**kwargs) 

1265 return type(self)([new_array], self._axes) 

1266 

1267 def setitem(self, indexer, value) -> SingleArrayManager: 

1268 """ 

1269 Set values with indexer. 

1270 

1271 For SingleArrayManager, this backs s[indexer] = value 

1272 

1273 See `setitem_inplace` for a version that works inplace and doesn't 

1274 return a new Manager. 

1275 """ 

1276 if isinstance(indexer, np.ndarray) and indexer.ndim > self.ndim: 

1277 raise ValueError(f"Cannot set values with ndim > {self.ndim}") 

1278 return self.apply_with_block("setitem", indexer=indexer, value=value) 

1279 

1280 def idelete(self, indexer) -> SingleArrayManager: 

1281 """ 

1282 Delete selected locations in-place (new array, same ArrayManager) 

1283 """ 

1284 to_keep = np.ones(self.shape[0], dtype=np.bool_) 

1285 to_keep[indexer] = False 

1286 

1287 self.arrays = [self.arrays[0][to_keep]] 

1288 self._axes = [self._axes[0][to_keep]] 

1289 return self 

1290 

1291 def _get_data_subset(self, predicate: Callable) -> SingleArrayManager: 

1292 # used in get_numeric_data / get_bool_data 

1293 if predicate(self.array): 

1294 return type(self)(self.arrays, self._axes, verify_integrity=False) 

1295 else: 

1296 return self.make_empty() 

1297 

1298 def set_values(self, values: ArrayLike) -> None: 

1299 """ 

1300 Set (replace) the values of the SingleArrayManager in place. 

1301 

1302 Use at your own risk! This does not check if the passed values are 

1303 valid for the current SingleArrayManager (length, dtype, etc). 

1304 """ 

1305 self.arrays[0] = values 

1306 

1307 def to_2d_mgr(self, columns: Index) -> ArrayManager: 

1308 """ 

1309 Manager analogue of Series.to_frame 

1310 """ 

1311 arrays = [self.arrays[0]] 

1312 axes = [self.axes[0], columns] 

1313 

1314 return ArrayManager(arrays, axes, verify_integrity=False) 

1315 

1316 

1317class NullArrayProxy: 

1318 """ 

1319 Proxy object for an all-NA array. 

1320 

1321 Only stores the length of the array, and not the dtype. The dtype 

1322 will only be known when actually concatenating (after determining the 

1323 common dtype, for which this proxy is ignored). 

1324 Using this object avoids that the internals/concat.py needs to determine 

1325 the proper dtype and array type. 

1326 """ 

1327 

1328 ndim = 1 

1329 

1330 def __init__(self, n: int) -> None: 

1331 self.n = n 

1332 

1333 @property 

1334 def shape(self) -> tuple[int]: 

1335 return (self.n,) 

1336 

1337 def to_array(self, dtype: DtypeObj) -> ArrayLike: 

1338 """ 

1339 Helper function to create the actual all-NA array from the NullArrayProxy 

1340 object. 

1341 

1342 Parameters 

1343 ---------- 

1344 arr : NullArrayProxy 

1345 dtype : the dtype for the resulting array 

1346 

1347 Returns 

1348 ------- 

1349 np.ndarray or ExtensionArray 

1350 """ 

1351 if isinstance(dtype, ExtensionDtype): 

1352 empty = dtype.construct_array_type()._from_sequence([], dtype=dtype) 

1353 indexer = -np.ones(self.n, dtype=np.intp) 

1354 return empty.take(indexer, allow_fill=True) 

1355 else: 

1356 # when introducing missing values, int becomes float, bool becomes object 

1357 dtype = ensure_dtype_can_hold_na(dtype) 

1358 fill_value = na_value_for_dtype(dtype) 

1359 arr = np.empty(self.n, dtype=dtype) 

1360 arr.fill(fill_value) 

1361 return ensure_wrapped_if_datetimelike(arr)