Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pandas/core/internals/managers.py: 19%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

951 statements  

1from __future__ import annotations 

2 

3import itertools 

4from typing import ( 

5 Any, 

6 Callable, 

7 Hashable, 

8 Literal, 

9 Sequence, 

10 TypeVar, 

11 cast, 

12) 

13import warnings 

14import weakref 

15 

16import numpy as np 

17 

18from pandas._config import using_copy_on_write 

19 

20from pandas._libs import ( 

21 algos as libalgos, 

22 internals as libinternals, 

23 lib, 

24) 

25from pandas._libs.internals import ( 

26 BlockPlacement, 

27 BlockValuesRefs, 

28) 

29from pandas._typing import ( 

30 ArrayLike, 

31 AxisInt, 

32 DtypeObj, 

33 QuantileInterpolation, 

34 Shape, 

35 npt, 

36 type_t, 

37) 

38from pandas.errors import PerformanceWarning 

39from pandas.util._decorators import cache_readonly 

40from pandas.util._exceptions import find_stack_level 

41from pandas.util._validators import validate_bool_kwarg 

42 

43from pandas.core.dtypes.cast import infer_dtype_from_scalar 

44from pandas.core.dtypes.common import ( 

45 ensure_platform_int, 

46 is_1d_only_ea_dtype, 

47 is_dtype_equal, 

48 is_list_like, 

49) 

50from pandas.core.dtypes.dtypes import ExtensionDtype 

51from pandas.core.dtypes.generic import ( 

52 ABCDataFrame, 

53 ABCSeries, 

54) 

55from pandas.core.dtypes.missing import ( 

56 array_equals, 

57 isna, 

58) 

59 

60import pandas.core.algorithms as algos 

61from pandas.core.arrays._mixins import NDArrayBackedExtensionArray 

62from pandas.core.arrays.sparse import SparseDtype 

63import pandas.core.common as com 

64from pandas.core.construction import ( 

65 ensure_wrapped_if_datetimelike, 

66 extract_array, 

67) 

68from pandas.core.indexers import maybe_convert_indices 

69from pandas.core.indexes.api import ( 

70 Index, 

71 ensure_index, 

72) 

73from pandas.core.internals.base import ( 

74 DataManager, 

75 SingleDataManager, 

76 interleaved_dtype, 

77) 

78from pandas.core.internals.blocks import ( 

79 Block, 

80 NumpyBlock, 

81 ensure_block_shape, 

82 extend_blocks, 

83 get_block_type, 

84 new_block, 

85 new_block_2d, 

86) 

87from pandas.core.internals.ops import ( 

88 blockwise_all, 

89 operate_blockwise, 

90) 

91 

92T = TypeVar("T", bound="BaseBlockManager") 

93 

94 

95class BaseBlockManager(DataManager): 

96 """ 

97 Core internal data structure to implement DataFrame, Series, etc. 

98 

99 Manage a bunch of labeled 2D mixed-type ndarrays. Essentially it's a 

100 lightweight blocked set of labeled data to be manipulated by the DataFrame 

101 public API class 

102 

103 Attributes 

104 ---------- 

105 shape 

106 ndim 

107 axes 

108 values 

109 items 

110 

111 Methods 

112 ------- 

113 set_axis(axis, new_labels) 

114 copy(deep=True) 

115 

116 get_dtypes 

117 

118 apply(func, axes, block_filter_fn) 

119 

120 get_bool_data 

121 get_numeric_data 

122 

123 get_slice(slice_like, axis) 

124 get(label) 

125 iget(loc) 

126 

127 take(indexer, axis) 

128 reindex_axis(new_labels, axis) 

129 reindex_indexer(new_labels, indexer, axis) 

130 

131 delete(label) 

132 insert(loc, label, value) 

133 set(label, value) 

134 

135 Parameters 

136 ---------- 

137 blocks: Sequence of Block 

138 axes: Sequence of Index 

139 verify_integrity: bool, default True 

140 

141 Notes 

142 ----- 

143 This is *not* a public API class 

144 """ 

145 

146 __slots__ = () 

147 

148 _blknos: npt.NDArray[np.intp] 

149 _blklocs: npt.NDArray[np.intp] 

150 blocks: tuple[Block, ...] 

151 axes: list[Index] 

152 

153 @property 

154 def ndim(self) -> int: 

155 raise NotImplementedError 

156 

157 _known_consolidated: bool 

158 _is_consolidated: bool 

159 

160 def __init__(self, blocks, axes, verify_integrity: bool = True) -> None: 

161 raise NotImplementedError 

162 

163 @classmethod 

164 def from_blocks(cls: type_t[T], blocks: list[Block], axes: list[Index]) -> T: 

165 raise NotImplementedError 

166 

167 @property 

168 def blknos(self) -> npt.NDArray[np.intp]: 

169 """ 

170 Suppose we want to find the array corresponding to our i'th column. 

171 

172 blknos[i] identifies the block from self.blocks that contains this column. 

173 

174 blklocs[i] identifies the column of interest within 

175 self.blocks[self.blknos[i]] 

176 """ 

177 if self._blknos is None: 

178 # Note: these can be altered by other BlockManager methods. 

179 self._rebuild_blknos_and_blklocs() 

180 

181 return self._blknos 

182 

183 @property 

184 def blklocs(self) -> npt.NDArray[np.intp]: 

185 """ 

186 See blknos.__doc__ 

187 """ 

188 if self._blklocs is None: 

189 # Note: these can be altered by other BlockManager methods. 

190 self._rebuild_blknos_and_blklocs() 

191 

192 return self._blklocs 

193 

194 def make_empty(self: T, axes=None) -> T: 

195 """return an empty BlockManager with the items axis of len 0""" 

196 if axes is None: 

197 axes = [Index([])] + self.axes[1:] 

198 

199 # preserve dtype if possible 

200 if self.ndim == 1: 

201 assert isinstance(self, SingleBlockManager) # for mypy 

202 blk = self.blocks[0] 

203 arr = blk.values[:0] 

204 bp = BlockPlacement(slice(0, 0)) 

205 nb = blk.make_block_same_class(arr, placement=bp) 

206 blocks = [nb] 

207 else: 

208 blocks = [] 

209 return type(self).from_blocks(blocks, axes) 

210 

211 def __nonzero__(self) -> bool: 

212 return True 

213 

214 # Python3 compat 

215 __bool__ = __nonzero__ 

216 

217 def _normalize_axis(self, axis: AxisInt) -> int: 

218 # switch axis to follow BlockManager logic 

219 if self.ndim == 2: 

220 axis = 1 if axis == 0 else 0 

221 return axis 

222 

223 def set_axis(self, axis: AxisInt, new_labels: Index) -> None: 

224 # Caller is responsible for ensuring we have an Index object. 

225 self._validate_set_axis(axis, new_labels) 

226 self.axes[axis] = new_labels 

227 

228 @property 

229 def is_single_block(self) -> bool: 

230 # Assumes we are 2D; overridden by SingleBlockManager 

231 return len(self.blocks) == 1 

232 

233 @property 

234 def items(self) -> Index: 

235 return self.axes[0] 

236 

237 def _has_no_reference(self, i: int) -> bool: 

238 """ 

239 Check for column `i` if it has references. 

240 (whether it references another array or is itself being referenced) 

241 Returns True if the column has no references. 

242 """ 

243 blkno = self.blknos[i] 

244 return self._has_no_reference_block(blkno) 

245 

246 def _has_no_reference_block(self, blkno: int) -> bool: 

247 """ 

248 Check for block `i` if it has references. 

249 (whether it references another array or is itself being referenced) 

250 Returns True if the block has no references. 

251 """ 

252 return not self.blocks[blkno].refs.has_reference() 

253 

254 def add_references(self, mgr: BaseBlockManager) -> None: 

255 """ 

256 Adds the references from one manager to another. We assume that both 

257 managers have the same block structure. 

258 """ 

259 if len(self.blocks) != len(mgr.blocks): 

260 # If block structure changes, then we made a copy 

261 return 

262 for i, blk in enumerate(self.blocks): 

263 blk.refs = mgr.blocks[i].refs 

264 # Argument 1 to "add_reference" of "BlockValuesRefs" has incompatible type 

265 # "Block"; expected "SharedBlock" 

266 blk.refs.add_reference(blk) # type: ignore[arg-type] 

267 

268 def references_same_values(self, mgr: BaseBlockManager, blkno: int) -> bool: 

269 """ 

270 Checks if two blocks from two different block managers reference the 

271 same underlying values. 

272 """ 

273 ref = weakref.ref(self.blocks[blkno]) 

274 return ref in mgr.blocks[blkno].refs.referenced_blocks 

275 

276 def get_dtypes(self): 

277 dtypes = np.array([blk.dtype for blk in self.blocks]) 

278 return dtypes.take(self.blknos) 

279 

280 @property 

281 def arrays(self) -> list[ArrayLike]: 

282 """ 

283 Quick access to the backing arrays of the Blocks. 

284 

285 Only for compatibility with ArrayManager for testing convenience. 

286 Not to be used in actual code, and return value is not the same as the 

287 ArrayManager method (list of 1D arrays vs iterator of 2D ndarrays / 1D EAs). 

288 

289 Warning! The returned arrays don't handle Copy-on-Write, so this should 

290 be used with caution (only in read-mode). 

291 """ 

292 return [blk.values for blk in self.blocks] 

293 

294 def __repr__(self) -> str: 

295 output = type(self).__name__ 

296 for i, ax in enumerate(self.axes): 

297 if i == 0: 

298 output += f"\nItems: {ax}" 

299 else: 

300 output += f"\nAxis {i}: {ax}" 

301 

302 for block in self.blocks: 

303 output += f"\n{block}" 

304 return output 

305 

306 def apply( 

307 self: T, 

308 f, 

309 align_keys: list[str] | None = None, 

310 **kwargs, 

311 ) -> T: 

312 """ 

313 Iterate over the blocks, collect and create a new BlockManager. 

314 

315 Parameters 

316 ---------- 

317 f : str or callable 

318 Name of the Block method to apply. 

319 align_keys: List[str] or None, default None 

320 **kwargs 

321 Keywords to pass to `f` 

322 

323 Returns 

324 ------- 

325 BlockManager 

326 """ 

327 assert "filter" not in kwargs 

328 

329 align_keys = align_keys or [] 

330 result_blocks: list[Block] = [] 

331 # fillna: Series/DataFrame is responsible for making sure value is aligned 

332 

333 aligned_args = {k: kwargs[k] for k in align_keys} 

334 

335 for b in self.blocks: 

336 if aligned_args: 

337 for k, obj in aligned_args.items(): 

338 if isinstance(obj, (ABCSeries, ABCDataFrame)): 

339 # The caller is responsible for ensuring that 

340 # obj.axes[-1].equals(self.items) 

341 if obj.ndim == 1: 

342 kwargs[k] = obj.iloc[b.mgr_locs.indexer]._values 

343 else: 

344 kwargs[k] = obj.iloc[:, b.mgr_locs.indexer]._values 

345 else: 

346 # otherwise we have an ndarray 

347 kwargs[k] = obj[b.mgr_locs.indexer] 

348 

349 if callable(f): 

350 applied = b.apply(f, **kwargs) 

351 else: 

352 applied = getattr(b, f)(**kwargs) 

353 result_blocks = extend_blocks(applied, result_blocks) 

354 

355 out = type(self).from_blocks(result_blocks, self.axes) 

356 return out 

357 

358 def where(self: T, other, cond, align: bool) -> T: 

359 if align: 

360 align_keys = ["other", "cond"] 

361 else: 

362 align_keys = ["cond"] 

363 other = extract_array(other, extract_numpy=True) 

364 

365 return self.apply( 

366 "where", 

367 align_keys=align_keys, 

368 other=other, 

369 cond=cond, 

370 using_cow=using_copy_on_write(), 

371 ) 

372 

373 def round(self: T, decimals: int, using_cow: bool = False) -> T: 

374 return self.apply( 

375 "round", 

376 decimals=decimals, 

377 using_cow=using_cow, 

378 ) 

379 

380 def setitem(self: T, indexer, value) -> T: 

381 """ 

382 Set values with indexer. 

383 

384 For SingleBlockManager, this backs s[indexer] = value 

385 """ 

386 if isinstance(indexer, np.ndarray) and indexer.ndim > self.ndim: 

387 raise ValueError(f"Cannot set values with ndim > {self.ndim}") 

388 

389 if using_copy_on_write() and not self._has_no_reference(0): 

390 # if being referenced -> perform Copy-on-Write and clear the reference 

391 # this method is only called if there is a single block -> hardcoded 0 

392 self = self.copy() 

393 

394 return self.apply("setitem", indexer=indexer, value=value) 

395 

396 def putmask(self, mask, new, align: bool = True): 

397 if align: 

398 align_keys = ["new", "mask"] 

399 else: 

400 align_keys = ["mask"] 

401 new = extract_array(new, extract_numpy=True) 

402 

403 return self.apply( 

404 "putmask", 

405 align_keys=align_keys, 

406 mask=mask, 

407 new=new, 

408 using_cow=using_copy_on_write(), 

409 ) 

410 

411 def diff(self: T, n: int, axis: AxisInt) -> T: 

412 # only reached with self.ndim == 2 and axis == 1 

413 axis = self._normalize_axis(axis) 

414 return self.apply("diff", n=n, axis=axis) 

415 

416 def interpolate(self: T, inplace: bool, **kwargs) -> T: 

417 return self.apply( 

418 "interpolate", inplace=inplace, **kwargs, using_cow=using_copy_on_write() 

419 ) 

420 

421 def shift(self: T, periods: int, axis: AxisInt, fill_value) -> T: 

422 axis = self._normalize_axis(axis) 

423 if fill_value is lib.no_default: 

424 fill_value = None 

425 

426 return self.apply("shift", periods=periods, axis=axis, fill_value=fill_value) 

427 

428 def fillna(self: T, value, limit, inplace: bool, downcast) -> T: 

429 if limit is not None: 

430 # Do this validation even if we go through one of the no-op paths 

431 limit = libalgos.validate_limit(None, limit=limit) 

432 

433 return self.apply( 

434 "fillna", 

435 value=value, 

436 limit=limit, 

437 inplace=inplace, 

438 downcast=downcast, 

439 using_cow=using_copy_on_write(), 

440 ) 

441 

442 def astype(self: T, dtype, copy: bool | None = False, errors: str = "raise") -> T: 

443 if copy is None: 

444 if using_copy_on_write(): 

445 copy = False 

446 else: 

447 copy = True 

448 elif using_copy_on_write(): 

449 copy = False 

450 

451 return self.apply( 

452 "astype", 

453 dtype=dtype, 

454 copy=copy, 

455 errors=errors, 

456 using_cow=using_copy_on_write(), 

457 ) 

458 

459 def convert(self: T, copy: bool | None) -> T: 

460 if copy is None: 

461 if using_copy_on_write(): 

462 copy = False 

463 else: 

464 copy = True 

465 elif using_copy_on_write(): 

466 copy = False 

467 

468 return self.apply("convert", copy=copy, using_cow=using_copy_on_write()) 

469 

470 def replace(self: T, to_replace, value, inplace: bool) -> T: 

471 inplace = validate_bool_kwarg(inplace, "inplace") 

472 # NDFrame.replace ensures the not-is_list_likes here 

473 assert not is_list_like(to_replace) 

474 assert not is_list_like(value) 

475 return self.apply( 

476 "replace", 

477 to_replace=to_replace, 

478 value=value, 

479 inplace=inplace, 

480 using_cow=using_copy_on_write(), 

481 ) 

482 

483 def replace_regex(self, **kwargs): 

484 return self.apply("_replace_regex", **kwargs, using_cow=using_copy_on_write()) 

485 

486 def replace_list( 

487 self: T, 

488 src_list: list[Any], 

489 dest_list: list[Any], 

490 inplace: bool = False, 

491 regex: bool = False, 

492 ) -> T: 

493 """do a list replace""" 

494 inplace = validate_bool_kwarg(inplace, "inplace") 

495 

496 bm = self.apply( 

497 "replace_list", 

498 src_list=src_list, 

499 dest_list=dest_list, 

500 inplace=inplace, 

501 regex=regex, 

502 using_cow=using_copy_on_write(), 

503 ) 

504 bm._consolidate_inplace() 

505 return bm 

506 

507 def to_native_types(self: T, **kwargs) -> T: 

508 """ 

509 Convert values to native types (strings / python objects) that are used 

510 in formatting (repr / csv). 

511 """ 

512 return self.apply("to_native_types", **kwargs) 

513 

514 @property 

515 def is_numeric_mixed_type(self) -> bool: 

516 return all(block.is_numeric for block in self.blocks) 

517 

518 @property 

519 def any_extension_types(self) -> bool: 

520 """Whether any of the blocks in this manager are extension blocks""" 

521 return any(block.is_extension for block in self.blocks) 

522 

523 @property 

524 def is_view(self) -> bool: 

525 """return a boolean if we are a single block and are a view""" 

526 if len(self.blocks) == 1: 

527 return self.blocks[0].is_view 

528 

529 # It is technically possible to figure out which blocks are views 

530 # e.g. [ b.values.base is not None for b in self.blocks ] 

531 # but then we have the case of possibly some blocks being a view 

532 # and some blocks not. setting in theory is possible on the non-view 

533 # blocks w/o causing a SettingWithCopy raise/warn. But this is a bit 

534 # complicated 

535 

536 return False 

537 

538 def _get_data_subset(self: T, predicate: Callable) -> T: 

539 blocks = [blk for blk in self.blocks if predicate(blk.values)] 

540 return self._combine(blocks, copy=False) 

541 

542 def get_bool_data(self: T, copy: bool = False) -> T: 

543 """ 

544 Select blocks that are bool-dtype and columns from object-dtype blocks 

545 that are all-bool. 

546 

547 Parameters 

548 ---------- 

549 copy : bool, default False 

550 Whether to copy the blocks 

551 """ 

552 

553 new_blocks = [] 

554 

555 for blk in self.blocks: 

556 if blk.dtype == bool: 

557 new_blocks.append(blk) 

558 

559 elif blk.is_object: 

560 nbs = blk._split() 

561 for nb in nbs: 

562 if nb.is_bool: 

563 new_blocks.append(nb) 

564 

565 return self._combine(new_blocks, copy) 

566 

567 def get_numeric_data(self: T, copy: bool = False) -> T: 

568 """ 

569 Parameters 

570 ---------- 

571 copy : bool, default False 

572 Whether to copy the blocks 

573 """ 

574 numeric_blocks = [blk for blk in self.blocks if blk.is_numeric] 

575 if len(numeric_blocks) == len(self.blocks): 

576 # Avoid somewhat expensive _combine 

577 if copy: 

578 return self.copy(deep=True) 

579 return self 

580 return self._combine(numeric_blocks, copy) 

581 

582 def _combine( 

583 self: T, blocks: list[Block], copy: bool = True, index: Index | None = None 

584 ) -> T: 

585 """return a new manager with the blocks""" 

586 if len(blocks) == 0: 

587 if self.ndim == 2: 

588 # retain our own Index dtype 

589 if index is not None: 

590 axes = [self.items[:0], index] 

591 else: 

592 axes = [self.items[:0]] + self.axes[1:] 

593 return self.make_empty(axes) 

594 return self.make_empty() 

595 

596 # FIXME: optimization potential 

597 indexer = np.sort(np.concatenate([b.mgr_locs.as_array for b in blocks])) 

598 inv_indexer = lib.get_reverse_indexer(indexer, self.shape[0]) 

599 

600 new_blocks: list[Block] = [] 

601 # TODO(CoW) we could optimize here if we know that the passed blocks 

602 # are fully "owned" (eg created from an operation, not coming from 

603 # an existing manager) 

604 for b in blocks: 

605 nb = b.copy(deep=copy) 

606 nb.mgr_locs = BlockPlacement(inv_indexer[nb.mgr_locs.indexer]) 

607 new_blocks.append(nb) 

608 

609 axes = list(self.axes) 

610 if index is not None: 

611 axes[-1] = index 

612 axes[0] = self.items.take(indexer) 

613 

614 return type(self).from_blocks(new_blocks, axes) 

615 

616 @property 

617 def nblocks(self) -> int: 

618 return len(self.blocks) 

619 

620 def copy(self: T, deep: bool | None | Literal["all"] = True) -> T: 

621 """ 

622 Make deep or shallow copy of BlockManager 

623 

624 Parameters 

625 ---------- 

626 deep : bool, string or None, default True 

627 If False or None, return a shallow copy (do not copy data) 

628 If 'all', copy data and a deep copy of the index 

629 

630 Returns 

631 ------- 

632 BlockManager 

633 """ 

634 if deep is None: 

635 if using_copy_on_write(): 

636 # use shallow copy 

637 deep = False 

638 else: 

639 # preserve deep copy for BlockManager with copy=None 

640 deep = True 

641 

642 # this preserves the notion of view copying of axes 

643 if deep: 

644 # hit in e.g. tests.io.json.test_pandas 

645 

646 def copy_func(ax): 

647 return ax.copy(deep=True) if deep == "all" else ax.view() 

648 

649 new_axes = [copy_func(ax) for ax in self.axes] 

650 else: 

651 new_axes = list(self.axes) 

652 

653 res = self.apply("copy", deep=deep) 

654 res.axes = new_axes 

655 

656 if self.ndim > 1: 

657 # Avoid needing to re-compute these 

658 blknos = self._blknos 

659 if blknos is not None: 

660 res._blknos = blknos.copy() 

661 res._blklocs = self._blklocs.copy() 

662 

663 if deep: 

664 res._consolidate_inplace() 

665 return res 

666 

667 def consolidate(self: T) -> T: 

668 """ 

669 Join together blocks having same dtype 

670 

671 Returns 

672 ------- 

673 y : BlockManager 

674 """ 

675 if self.is_consolidated(): 

676 return self 

677 

678 bm = type(self)(self.blocks, self.axes, verify_integrity=False) 

679 bm._is_consolidated = False 

680 bm._consolidate_inplace() 

681 return bm 

682 

683 def reindex_indexer( 

684 self: T, 

685 new_axis: Index, 

686 indexer: npt.NDArray[np.intp] | None, 

687 axis: AxisInt, 

688 fill_value=None, 

689 allow_dups: bool = False, 

690 copy: bool | None = True, 

691 only_slice: bool = False, 

692 *, 

693 use_na_proxy: bool = False, 

694 ) -> T: 

695 """ 

696 Parameters 

697 ---------- 

698 new_axis : Index 

699 indexer : ndarray[intp] or None 

700 axis : int 

701 fill_value : object, default None 

702 allow_dups : bool, default False 

703 copy : bool or None, default True 

704 If None, regard as False to get shallow copy. 

705 only_slice : bool, default False 

706 Whether to take views, not copies, along columns. 

707 use_na_proxy : bool, default False 

708 Whether to use a np.void ndarray for newly introduced columns. 

709 

710 pandas-indexer with -1's only. 

711 """ 

712 if copy is None: 

713 if using_copy_on_write(): 

714 # use shallow copy 

715 copy = False 

716 else: 

717 # preserve deep copy for BlockManager with copy=None 

718 copy = True 

719 

720 if indexer is None: 

721 if new_axis is self.axes[axis] and not copy: 

722 return self 

723 

724 result = self.copy(deep=copy) 

725 result.axes = list(self.axes) 

726 result.axes[axis] = new_axis 

727 return result 

728 

729 # Should be intp, but in some cases we get int64 on 32bit builds 

730 assert isinstance(indexer, np.ndarray) 

731 

732 # some axes don't allow reindexing with dups 

733 if not allow_dups: 

734 self.axes[axis]._validate_can_reindex(indexer) 

735 

736 if axis >= self.ndim: 

737 raise IndexError("Requested axis not found in manager") 

738 

739 if axis == 0: 

740 new_blocks = self._slice_take_blocks_ax0( 

741 indexer, 

742 fill_value=fill_value, 

743 only_slice=only_slice, 

744 use_na_proxy=use_na_proxy, 

745 ) 

746 else: 

747 new_blocks = [ 

748 blk.take_nd( 

749 indexer, 

750 axis=1, 

751 fill_value=( 

752 fill_value if fill_value is not None else blk.fill_value 

753 ), 

754 ) 

755 for blk in self.blocks 

756 ] 

757 

758 new_axes = list(self.axes) 

759 new_axes[axis] = new_axis 

760 

761 new_mgr = type(self).from_blocks(new_blocks, new_axes) 

762 if axis == 1: 

763 # We can avoid the need to rebuild these 

764 new_mgr._blknos = self.blknos.copy() 

765 new_mgr._blklocs = self.blklocs.copy() 

766 return new_mgr 

767 

768 def _slice_take_blocks_ax0( 

769 self, 

770 slice_or_indexer: slice | np.ndarray, 

771 fill_value=lib.no_default, 

772 only_slice: bool = False, 

773 *, 

774 use_na_proxy: bool = False, 

775 ) -> list[Block]: 

776 """ 

777 Slice/take blocks along axis=0. 

778 

779 Overloaded for SingleBlock 

780 

781 Parameters 

782 ---------- 

783 slice_or_indexer : slice or np.ndarray[int64] 

784 fill_value : scalar, default lib.no_default 

785 only_slice : bool, default False 

786 If True, we always return views on existing arrays, never copies. 

787 This is used when called from ops.blockwise.operate_blockwise. 

788 use_na_proxy : bool, default False 

789 Whether to use a np.void ndarray for newly introduced columns. 

790 

791 Returns 

792 ------- 

793 new_blocks : list of Block 

794 """ 

795 allow_fill = fill_value is not lib.no_default 

796 

797 sl_type, slobj, sllen = _preprocess_slice_or_indexer( 

798 slice_or_indexer, self.shape[0], allow_fill=allow_fill 

799 ) 

800 

801 if self.is_single_block: 

802 blk = self.blocks[0] 

803 

804 if sl_type == "slice": 

805 # GH#32959 EABlock would fail since we can't make 0-width 

806 # TODO(EA2D): special casing unnecessary with 2D EAs 

807 if sllen == 0: 

808 return [] 

809 bp = BlockPlacement(slice(0, sllen)) 

810 return [blk.getitem_block_columns(slobj, new_mgr_locs=bp)] 

811 elif not allow_fill or self.ndim == 1: 

812 if allow_fill and fill_value is None: 

813 fill_value = blk.fill_value 

814 

815 if not allow_fill and only_slice: 

816 # GH#33597 slice instead of take, so we get 

817 # views instead of copies 

818 blocks = [ 

819 blk.getitem_block_columns( 

820 slice(ml, ml + 1), new_mgr_locs=BlockPlacement(i) 

821 ) 

822 for i, ml in enumerate(slobj) 

823 ] 

824 return blocks 

825 else: 

826 bp = BlockPlacement(slice(0, sllen)) 

827 return [ 

828 blk.take_nd( 

829 slobj, 

830 axis=0, 

831 new_mgr_locs=bp, 

832 fill_value=fill_value, 

833 ) 

834 ] 

835 

836 if sl_type == "slice": 

837 blknos = self.blknos[slobj] 

838 blklocs = self.blklocs[slobj] 

839 else: 

840 blknos = algos.take_nd( 

841 self.blknos, slobj, fill_value=-1, allow_fill=allow_fill 

842 ) 

843 blklocs = algos.take_nd( 

844 self.blklocs, slobj, fill_value=-1, allow_fill=allow_fill 

845 ) 

846 

847 # When filling blknos, make sure blknos is updated before appending to 

848 # blocks list, that way new blkno is exactly len(blocks). 

849 blocks = [] 

850 group = not only_slice 

851 for blkno, mgr_locs in libinternals.get_blkno_placements(blknos, group=group): 

852 if blkno == -1: 

853 # If we've got here, fill_value was not lib.no_default 

854 

855 blocks.append( 

856 self._make_na_block( 

857 placement=mgr_locs, 

858 fill_value=fill_value, 

859 use_na_proxy=use_na_proxy, 

860 ) 

861 ) 

862 else: 

863 blk = self.blocks[blkno] 

864 

865 # Otherwise, slicing along items axis is necessary. 

866 if not blk._can_consolidate and not blk._validate_ndim: 

867 # i.e. we dont go through here for DatetimeTZBlock 

868 # A non-consolidatable block, it's easy, because there's 

869 # only one item and each mgr loc is a copy of that single 

870 # item. 

871 deep = not (only_slice or using_copy_on_write()) 

872 for mgr_loc in mgr_locs: 

873 newblk = blk.copy(deep=deep) 

874 newblk.mgr_locs = BlockPlacement(slice(mgr_loc, mgr_loc + 1)) 

875 blocks.append(newblk) 

876 

877 else: 

878 # GH#32779 to avoid the performance penalty of copying, 

879 # we may try to only slice 

880 taker = blklocs[mgr_locs.indexer] 

881 max_len = max(len(mgr_locs), taker.max() + 1) 

882 if only_slice or using_copy_on_write(): 

883 taker = lib.maybe_indices_to_slice(taker, max_len) 

884 

885 if isinstance(taker, slice): 

886 nb = blk.getitem_block_columns(taker, new_mgr_locs=mgr_locs) 

887 blocks.append(nb) 

888 elif only_slice: 

889 # GH#33597 slice instead of take, so we get 

890 # views instead of copies 

891 for i, ml in zip(taker, mgr_locs): 

892 slc = slice(i, i + 1) 

893 bp = BlockPlacement(ml) 

894 nb = blk.getitem_block_columns(slc, new_mgr_locs=bp) 

895 # We have np.shares_memory(nb.values, blk.values) 

896 blocks.append(nb) 

897 else: 

898 nb = blk.take_nd(taker, axis=0, new_mgr_locs=mgr_locs) 

899 blocks.append(nb) 

900 

901 return blocks 

902 

903 def _make_na_block( 

904 self, placement: BlockPlacement, fill_value=None, use_na_proxy: bool = False 

905 ) -> Block: 

906 # Note: we only get here with self.ndim == 2 

907 

908 if use_na_proxy: 

909 assert fill_value is None 

910 shape = (len(placement), self.shape[1]) 

911 vals = np.empty(shape, dtype=np.void) 

912 nb = NumpyBlock(vals, placement, ndim=2) 

913 return nb 

914 

915 if fill_value is None: 

916 fill_value = np.nan 

917 block_shape = list(self.shape) 

918 block_shape[0] = len(placement) 

919 

920 dtype, fill_value = infer_dtype_from_scalar(fill_value) 

921 # error: Argument "dtype" to "empty" has incompatible type "Union[dtype, 

922 # ExtensionDtype]"; expected "Union[dtype, None, type, _SupportsDtype, str, 

923 # Tuple[Any, int], Tuple[Any, Union[int, Sequence[int]]], List[Any], _DtypeDict, 

924 # Tuple[Any, Any]]" 

925 block_values = np.empty(block_shape, dtype=dtype) # type: ignore[arg-type] 

926 block_values.fill(fill_value) 

927 return new_block_2d(block_values, placement=placement) 

928 

929 def take( 

930 self: T, 

931 indexer, 

932 axis: AxisInt = 1, 

933 verify: bool = True, 

934 convert_indices: bool = True, 

935 ) -> T: 

936 """ 

937 Take items along any axis. 

938 

939 indexer : np.ndarray or slice 

940 axis : int, default 1 

941 verify : bool, default True 

942 Check that all entries are between 0 and len(self) - 1, inclusive. 

943 Pass verify=False if this check has been done by the caller. 

944 convert_indices : bool, default True 

945 Whether to attempt to convert indices to positive values. 

946 

947 Returns 

948 ------- 

949 BlockManager 

950 """ 

951 # We have 6 tests that get here with a slice 

952 indexer = ( 

953 np.arange(indexer.start, indexer.stop, indexer.step, dtype=np.intp) 

954 if isinstance(indexer, slice) 

955 else np.asanyarray(indexer, dtype=np.intp) 

956 ) 

957 

958 n = self.shape[axis] 

959 if convert_indices: 

960 indexer = maybe_convert_indices(indexer, n, verify=verify) 

961 

962 new_labels = self.axes[axis].take(indexer) 

963 return self.reindex_indexer( 

964 new_axis=new_labels, 

965 indexer=indexer, 

966 axis=axis, 

967 allow_dups=True, 

968 copy=None, 

969 ) 

970 

971 

972class BlockManager(libinternals.BlockManager, BaseBlockManager): 

973 """ 

974 BaseBlockManager that holds 2D blocks. 

975 """ 

976 

977 ndim = 2 

978 

979 # ---------------------------------------------------------------- 

980 # Constructors 

981 

982 def __init__( 

983 self, 

984 blocks: Sequence[Block], 

985 axes: Sequence[Index], 

986 verify_integrity: bool = True, 

987 ) -> None: 

988 if verify_integrity: 

989 # Assertion disabled for performance 

990 # assert all(isinstance(x, Index) for x in axes) 

991 

992 for block in blocks: 

993 if self.ndim != block.ndim: 

994 raise AssertionError( 

995 f"Number of Block dimensions ({block.ndim}) must equal " 

996 f"number of axes ({self.ndim})" 

997 ) 

998 # As of 2.0, the caller is responsible for ensuring that 

999 # DatetimeTZBlock with block.ndim == 2 has block.values.ndim ==2; 

1000 # previously there was a special check for fastparquet compat. 

1001 

1002 self._verify_integrity() 

1003 

1004 def _verify_integrity(self) -> None: 

1005 mgr_shape = self.shape 

1006 tot_items = sum(len(x.mgr_locs) for x in self.blocks) 

1007 for block in self.blocks: 

1008 if block.shape[1:] != mgr_shape[1:]: 

1009 raise_construction_error(tot_items, block.shape[1:], self.axes) 

1010 if len(self.items) != tot_items: 

1011 raise AssertionError( 

1012 "Number of manager items must equal union of " 

1013 f"block items\n# manager items: {len(self.items)}, # " 

1014 f"tot_items: {tot_items}" 

1015 ) 

1016 

1017 @classmethod 

1018 def from_blocks(cls, blocks: list[Block], axes: list[Index]) -> BlockManager: 

1019 """ 

1020 Constructor for BlockManager and SingleBlockManager with same signature. 

1021 """ 

1022 return cls(blocks, axes, verify_integrity=False) 

1023 

1024 # ---------------------------------------------------------------- 

1025 # Indexing 

1026 

1027 def fast_xs(self, loc: int) -> SingleBlockManager: 

1028 """ 

1029 Return the array corresponding to `frame.iloc[loc]`. 

1030 

1031 Parameters 

1032 ---------- 

1033 loc : int 

1034 

1035 Returns 

1036 ------- 

1037 np.ndarray or ExtensionArray 

1038 """ 

1039 if len(self.blocks) == 1: 

1040 # TODO: this could be wrong if blk.mgr_locs is not slice(None)-like; 

1041 # is this ruled out in the general case? 

1042 result = self.blocks[0].iget((slice(None), loc)) 

1043 # in the case of a single block, the new block is a view 

1044 block = new_block( 

1045 result, 

1046 placement=slice(0, len(result)), 

1047 ndim=1, 

1048 refs=self.blocks[0].refs, 

1049 ) 

1050 return SingleBlockManager(block, self.axes[0]) 

1051 

1052 dtype = interleaved_dtype([blk.dtype for blk in self.blocks]) 

1053 

1054 n = len(self) 

1055 

1056 # GH#46406 

1057 immutable_ea = isinstance(dtype, SparseDtype) 

1058 

1059 if isinstance(dtype, ExtensionDtype) and not immutable_ea: 

1060 cls = dtype.construct_array_type() 

1061 result = cls._empty((n,), dtype=dtype) 

1062 else: 

1063 # error: Argument "dtype" to "empty" has incompatible type 

1064 # "Union[Type[object], dtype[Any], ExtensionDtype, None]"; expected 

1065 # "None" 

1066 result = np.empty( 

1067 n, dtype=object if immutable_ea else dtype # type: ignore[arg-type] 

1068 ) 

1069 result = ensure_wrapped_if_datetimelike(result) 

1070 

1071 for blk in self.blocks: 

1072 # Such assignment may incorrectly coerce NaT to None 

1073 # result[blk.mgr_locs] = blk._slice((slice(None), loc)) 

1074 for i, rl in enumerate(blk.mgr_locs): 

1075 result[rl] = blk.iget((i, loc)) 

1076 

1077 if immutable_ea: 

1078 dtype = cast(ExtensionDtype, dtype) 

1079 result = dtype.construct_array_type()._from_sequence(result, dtype=dtype) 

1080 

1081 block = new_block(result, placement=slice(0, len(result)), ndim=1) 

1082 return SingleBlockManager(block, self.axes[0]) 

1083 

1084 def iget(self, i: int, track_ref: bool = True) -> SingleBlockManager: 

1085 """ 

1086 Return the data as a SingleBlockManager. 

1087 """ 

1088 block = self.blocks[self.blknos[i]] 

1089 values = block.iget(self.blklocs[i]) 

1090 

1091 # shortcut for select a single-dim from a 2-dim BM 

1092 bp = BlockPlacement(slice(0, len(values))) 

1093 nb = type(block)( 

1094 values, placement=bp, ndim=1, refs=block.refs if track_ref else None 

1095 ) 

1096 return SingleBlockManager(nb, self.axes[1]) 

1097 

1098 def iget_values(self, i: int) -> ArrayLike: 

1099 """ 

1100 Return the data for column i as the values (ndarray or ExtensionArray). 

1101 

1102 Warning! The returned array is a view but doesn't handle Copy-on-Write, 

1103 so this should be used with caution. 

1104 """ 

1105 # TODO(CoW) making the arrays read-only might make this safer to use? 

1106 block = self.blocks[self.blknos[i]] 

1107 values = block.iget(self.blklocs[i]) 

1108 return values 

1109 

1110 @property 

1111 def column_arrays(self) -> list[np.ndarray]: 

1112 """ 

1113 Used in the JSON C code to access column arrays. 

1114 This optimizes compared to using `iget_values` by converting each 

1115 

1116 Warning! This doesn't handle Copy-on-Write, so should be used with 

1117 caution (current use case of consuming this in the JSON code is fine). 

1118 """ 

1119 # This is an optimized equivalent to 

1120 # result = [self.iget_values(i) for i in range(len(self.items))] 

1121 result: list[np.ndarray | None] = [None] * len(self.items) 

1122 

1123 for blk in self.blocks: 

1124 mgr_locs = blk._mgr_locs 

1125 values = blk.values_for_json() 

1126 if values.ndim == 1: 

1127 # TODO(EA2D): special casing not needed with 2D EAs 

1128 result[mgr_locs[0]] = values 

1129 

1130 else: 

1131 for i, loc in enumerate(mgr_locs): 

1132 result[loc] = values[i] 

1133 

1134 # error: Incompatible return value type (got "List[None]", 

1135 # expected "List[ndarray[Any, Any]]") 

1136 return result # type: ignore[return-value] 

1137 

1138 def iset( 

1139 self, loc: int | slice | np.ndarray, value: ArrayLike, inplace: bool = False 

1140 ): 

1141 """ 

1142 Set new item in-place. Does not consolidate. Adds new Block if not 

1143 contained in the current set of items 

1144 """ 

1145 

1146 # FIXME: refactor, clearly separate broadcasting & zip-like assignment 

1147 # can prob also fix the various if tests for sparse/categorical 

1148 if self._blklocs is None and self.ndim > 1: 

1149 self._rebuild_blknos_and_blklocs() 

1150 

1151 # Note: we exclude DTA/TDA here 

1152 value_is_extension_type = is_1d_only_ea_dtype(value.dtype) 

1153 if not value_is_extension_type: 

1154 if value.ndim == 2: 

1155 value = value.T 

1156 else: 

1157 value = ensure_block_shape(value, ndim=2) 

1158 

1159 if value.shape[1:] != self.shape[1:]: 

1160 raise AssertionError( 

1161 "Shape of new values must be compatible with manager shape" 

1162 ) 

1163 

1164 if lib.is_integer(loc): 

1165 # We have 6 tests where loc is _not_ an int. 

1166 # In this case, get_blkno_placements will yield only one tuple, 

1167 # containing (self._blknos[loc], BlockPlacement(slice(0, 1, 1))) 

1168 

1169 # Check if we can use _iset_single fastpath 

1170 loc = cast(int, loc) 

1171 blkno = self.blknos[loc] 

1172 blk = self.blocks[blkno] 

1173 if len(blk._mgr_locs) == 1: # TODO: fastest way to check this? 

1174 return self._iset_single( 

1175 loc, 

1176 value, 

1177 inplace=inplace, 

1178 blkno=blkno, 

1179 blk=blk, 

1180 ) 

1181 

1182 # error: Incompatible types in assignment (expression has type 

1183 # "List[Union[int, slice, ndarray]]", variable has type "Union[int, 

1184 # slice, ndarray]") 

1185 loc = [loc] # type: ignore[assignment] 

1186 

1187 # categorical/sparse/datetimetz 

1188 if value_is_extension_type: 

1189 

1190 def value_getitem(placement): 

1191 return value 

1192 

1193 else: 

1194 

1195 def value_getitem(placement): 

1196 return value[placement.indexer] 

1197 

1198 # Accessing public blknos ensures the public versions are initialized 

1199 blknos = self.blknos[loc] 

1200 blklocs = self.blklocs[loc].copy() 

1201 

1202 unfit_mgr_locs = [] 

1203 unfit_val_locs = [] 

1204 removed_blknos = [] 

1205 for blkno_l, val_locs in libinternals.get_blkno_placements(blknos, group=True): 

1206 blk = self.blocks[blkno_l] 

1207 blk_locs = blklocs[val_locs.indexer] 

1208 if inplace and blk.should_store(value): 

1209 # Updating inplace -> check if we need to do Copy-on-Write 

1210 if using_copy_on_write() and not self._has_no_reference_block(blkno_l): 

1211 self._iset_split_block(blkno_l, blk_locs, value_getitem(val_locs)) 

1212 else: 

1213 blk.set_inplace(blk_locs, value_getitem(val_locs)) 

1214 continue 

1215 else: 

1216 unfit_mgr_locs.append(blk.mgr_locs.as_array[blk_locs]) 

1217 unfit_val_locs.append(val_locs) 

1218 

1219 # If all block items are unfit, schedule the block for removal. 

1220 if len(val_locs) == len(blk.mgr_locs): 

1221 removed_blknos.append(blkno_l) 

1222 continue 

1223 else: 

1224 # Defer setting the new values to enable consolidation 

1225 self._iset_split_block(blkno_l, blk_locs) 

1226 

1227 if len(removed_blknos): 

1228 # Remove blocks & update blknos accordingly 

1229 is_deleted = np.zeros(self.nblocks, dtype=np.bool_) 

1230 is_deleted[removed_blknos] = True 

1231 

1232 new_blknos = np.empty(self.nblocks, dtype=np.intp) 

1233 new_blknos.fill(-1) 

1234 new_blknos[~is_deleted] = np.arange(self.nblocks - len(removed_blknos)) 

1235 self._blknos = new_blknos[self._blknos] 

1236 self.blocks = tuple( 

1237 blk for i, blk in enumerate(self.blocks) if i not in set(removed_blknos) 

1238 ) 

1239 

1240 if unfit_val_locs: 

1241 unfit_idxr = np.concatenate(unfit_mgr_locs) 

1242 unfit_count = len(unfit_idxr) 

1243 

1244 new_blocks: list[Block] = [] 

1245 # TODO(CoW) is this always correct to assume that the new_blocks 

1246 # are not referencing anything else? 

1247 if value_is_extension_type: 

1248 # This code (ab-)uses the fact that EA blocks contain only 

1249 # one item. 

1250 # TODO(EA2D): special casing unnecessary with 2D EAs 

1251 new_blocks.extend( 

1252 new_block_2d( 

1253 values=value, 

1254 placement=BlockPlacement(slice(mgr_loc, mgr_loc + 1)), 

1255 ) 

1256 for mgr_loc in unfit_idxr 

1257 ) 

1258 

1259 self._blknos[unfit_idxr] = np.arange(unfit_count) + len(self.blocks) 

1260 self._blklocs[unfit_idxr] = 0 

1261 

1262 else: 

1263 # unfit_val_locs contains BlockPlacement objects 

1264 unfit_val_items = unfit_val_locs[0].append(unfit_val_locs[1:]) 

1265 

1266 new_blocks.append( 

1267 new_block_2d( 

1268 values=value_getitem(unfit_val_items), 

1269 placement=BlockPlacement(unfit_idxr), 

1270 ) 

1271 ) 

1272 

1273 self._blknos[unfit_idxr] = len(self.blocks) 

1274 self._blklocs[unfit_idxr] = np.arange(unfit_count) 

1275 

1276 self.blocks += tuple(new_blocks) 

1277 

1278 # Newly created block's dtype may already be present. 

1279 self._known_consolidated = False 

1280 

1281 def _iset_split_block( 

1282 self, 

1283 blkno_l: int, 

1284 blk_locs: np.ndarray | list[int], 

1285 value: ArrayLike | None = None, 

1286 ) -> None: 

1287 """Removes columns from a block by splitting the block. 

1288 

1289 Avoids copying the whole block through slicing and updates the manager 

1290 after determinint the new block structure. Optionally adds a new block, 

1291 otherwise has to be done by the caller. 

1292 

1293 Parameters 

1294 ---------- 

1295 blkno_l: The block number to operate on, relevant for updating the manager 

1296 blk_locs: The locations of our block that should be deleted. 

1297 value: The value to set as a replacement. 

1298 """ 

1299 blk = self.blocks[blkno_l] 

1300 

1301 if self._blklocs is None: 

1302 self._rebuild_blknos_and_blklocs() 

1303 

1304 nbs_tup = tuple(blk.delete(blk_locs)) 

1305 if value is not None: 

1306 locs = blk.mgr_locs.as_array[blk_locs] 

1307 first_nb = new_block_2d(value, BlockPlacement(locs)) 

1308 else: 

1309 first_nb = nbs_tup[0] 

1310 nbs_tup = tuple(nbs_tup[1:]) 

1311 

1312 nr_blocks = len(self.blocks) 

1313 blocks_tup = ( 

1314 self.blocks[:blkno_l] + (first_nb,) + self.blocks[blkno_l + 1 :] + nbs_tup 

1315 ) 

1316 self.blocks = blocks_tup 

1317 

1318 if not nbs_tup and value is not None: 

1319 # No need to update anything if split did not happen 

1320 return 

1321 

1322 self._blklocs[first_nb.mgr_locs.indexer] = np.arange(len(first_nb)) 

1323 

1324 for i, nb in enumerate(nbs_tup): 

1325 self._blklocs[nb.mgr_locs.indexer] = np.arange(len(nb)) 

1326 self._blknos[nb.mgr_locs.indexer] = i + nr_blocks 

1327 

1328 def _iset_single( 

1329 self, loc: int, value: ArrayLike, inplace: bool, blkno: int, blk: Block 

1330 ) -> None: 

1331 """ 

1332 Fastpath for iset when we are only setting a single position and 

1333 the Block currently in that position is itself single-column. 

1334 

1335 In this case we can swap out the entire Block and blklocs and blknos 

1336 are unaffected. 

1337 """ 

1338 # Caller is responsible for verifying value.shape 

1339 

1340 if inplace and blk.should_store(value): 

1341 copy = False 

1342 if using_copy_on_write() and not self._has_no_reference_block(blkno): 

1343 # perform Copy-on-Write and clear the reference 

1344 copy = True 

1345 iloc = self.blklocs[loc] 

1346 blk.set_inplace(slice(iloc, iloc + 1), value, copy=copy) 

1347 return 

1348 

1349 nb = new_block_2d(value, placement=blk._mgr_locs) 

1350 old_blocks = self.blocks 

1351 new_blocks = old_blocks[:blkno] + (nb,) + old_blocks[blkno + 1 :] 

1352 self.blocks = new_blocks 

1353 return 

1354 

1355 def column_setitem( 

1356 self, loc: int, idx: int | slice | np.ndarray, value, inplace_only: bool = False 

1357 ) -> None: 

1358 """ 

1359 Set values ("setitem") into a single column (not setting the full column). 

1360 

1361 This is a method on the BlockManager level, to avoid creating an 

1362 intermediate Series at the DataFrame level (`s = df[loc]; s[idx] = value`) 

1363 """ 

1364 if using_copy_on_write() and not self._has_no_reference(loc): 

1365 blkno = self.blknos[loc] 

1366 # Split blocks to only copy the column we want to modify 

1367 blk_loc = self.blklocs[loc] 

1368 # Copy our values 

1369 values = self.blocks[blkno].values 

1370 if values.ndim == 1: 

1371 values = values.copy() 

1372 else: 

1373 # Use [blk_loc] as indexer to keep ndim=2, this already results in a 

1374 # copy 

1375 values = values[[blk_loc]] 

1376 self._iset_split_block(blkno, [blk_loc], values) 

1377 

1378 # this manager is only created temporarily to mutate the values in place 

1379 # so don't track references, otherwise the `setitem` would perform CoW again 

1380 col_mgr = self.iget(loc, track_ref=False) 

1381 if inplace_only: 

1382 col_mgr.setitem_inplace(idx, value) 

1383 else: 

1384 new_mgr = col_mgr.setitem((idx,), value) 

1385 self.iset(loc, new_mgr._block.values, inplace=True) 

1386 

1387 def insert(self, loc: int, item: Hashable, value: ArrayLike) -> None: 

1388 """ 

1389 Insert item at selected position. 

1390 

1391 Parameters 

1392 ---------- 

1393 loc : int 

1394 item : hashable 

1395 value : np.ndarray or ExtensionArray 

1396 """ 

1397 # insert to the axis; this could possibly raise a TypeError 

1398 new_axis = self.items.insert(loc, item) 

1399 

1400 if value.ndim == 2: 

1401 value = value.T 

1402 if len(value) > 1: 

1403 raise ValueError( 

1404 f"Expected a 1D array, got an array with shape {value.T.shape}" 

1405 ) 

1406 else: 

1407 value = ensure_block_shape(value, ndim=self.ndim) 

1408 

1409 bp = BlockPlacement(slice(loc, loc + 1)) 

1410 # TODO(CoW) do we always "own" the passed `value`? 

1411 block = new_block_2d(values=value, placement=bp) 

1412 

1413 if not len(self.blocks): 

1414 # Fastpath 

1415 self._blklocs = np.array([0], dtype=np.intp) 

1416 self._blknos = np.array([0], dtype=np.intp) 

1417 else: 

1418 self._insert_update_mgr_locs(loc) 

1419 self._insert_update_blklocs_and_blknos(loc) 

1420 

1421 self.axes[0] = new_axis 

1422 self.blocks += (block,) 

1423 

1424 self._known_consolidated = False 

1425 

1426 if sum(not block.is_extension for block in self.blocks) > 100: 

1427 warnings.warn( 

1428 "DataFrame is highly fragmented. This is usually the result " 

1429 "of calling `frame.insert` many times, which has poor performance. " 

1430 "Consider joining all columns at once using pd.concat(axis=1) " 

1431 "instead. To get a de-fragmented frame, use `newframe = frame.copy()`", 

1432 PerformanceWarning, 

1433 stacklevel=find_stack_level(), 

1434 ) 

1435 

1436 def _insert_update_mgr_locs(self, loc) -> None: 

1437 """ 

1438 When inserting a new Block at location 'loc', we increment 

1439 all of the mgr_locs of blocks above that by one. 

1440 """ 

1441 for blkno, count in _fast_count_smallints(self.blknos[loc:]): 

1442 # .620 this way, .326 of which is in increment_above 

1443 blk = self.blocks[blkno] 

1444 blk._mgr_locs = blk._mgr_locs.increment_above(loc) 

1445 

1446 def _insert_update_blklocs_and_blknos(self, loc) -> None: 

1447 """ 

1448 When inserting a new Block at location 'loc', we update our 

1449 _blklocs and _blknos. 

1450 """ 

1451 

1452 # Accessing public blklocs ensures the public versions are initialized 

1453 if loc == self.blklocs.shape[0]: 

1454 # np.append is a lot faster, let's use it if we can. 

1455 self._blklocs = np.append(self._blklocs, 0) 

1456 self._blknos = np.append(self._blknos, len(self.blocks)) 

1457 elif loc == 0: 

1458 # np.append is a lot faster, let's use it if we can. 

1459 self._blklocs = np.append(self._blklocs[::-1], 0)[::-1] 

1460 self._blknos = np.append(self._blknos[::-1], len(self.blocks))[::-1] 

1461 else: 

1462 new_blklocs, new_blknos = libinternals.update_blklocs_and_blknos( 

1463 self.blklocs, self.blknos, loc, len(self.blocks) 

1464 ) 

1465 self._blklocs = new_blklocs 

1466 self._blknos = new_blknos 

1467 

1468 def idelete(self, indexer) -> BlockManager: 

1469 """ 

1470 Delete selected locations, returning a new BlockManager. 

1471 """ 

1472 is_deleted = np.zeros(self.shape[0], dtype=np.bool_) 

1473 is_deleted[indexer] = True 

1474 taker = (~is_deleted).nonzero()[0] 

1475 

1476 nbs = self._slice_take_blocks_ax0(taker, only_slice=True) 

1477 new_columns = self.items[~is_deleted] 

1478 axes = [new_columns, self.axes[1]] 

1479 return type(self)(tuple(nbs), axes, verify_integrity=False) 

1480 

1481 # ---------------------------------------------------------------- 

1482 # Block-wise Operation 

1483 

1484 def grouped_reduce(self: T, func: Callable) -> T: 

1485 """ 

1486 Apply grouped reduction function blockwise, returning a new BlockManager. 

1487 

1488 Parameters 

1489 ---------- 

1490 func : grouped reduction function 

1491 

1492 Returns 

1493 ------- 

1494 BlockManager 

1495 """ 

1496 result_blocks: list[Block] = [] 

1497 

1498 for blk in self.blocks: 

1499 if blk.is_object: 

1500 # split on object-dtype blocks bc some columns may raise 

1501 # while others do not. 

1502 for sb in blk._split(): 

1503 applied = sb.apply(func) 

1504 result_blocks = extend_blocks(applied, result_blocks) 

1505 else: 

1506 applied = blk.apply(func) 

1507 result_blocks = extend_blocks(applied, result_blocks) 

1508 

1509 if len(result_blocks) == 0: 

1510 nrows = 0 

1511 else: 

1512 nrows = result_blocks[0].values.shape[-1] 

1513 index = Index(range(nrows)) 

1514 

1515 return type(self).from_blocks(result_blocks, [self.axes[0], index]) 

1516 

1517 def reduce(self: T, func: Callable) -> T: 

1518 """ 

1519 Apply reduction function blockwise, returning a single-row BlockManager. 

1520 

1521 Parameters 

1522 ---------- 

1523 func : reduction function 

1524 

1525 Returns 

1526 ------- 

1527 BlockManager 

1528 """ 

1529 # If 2D, we assume that we're operating column-wise 

1530 assert self.ndim == 2 

1531 

1532 res_blocks: list[Block] = [] 

1533 for blk in self.blocks: 

1534 nbs = blk.reduce(func) 

1535 res_blocks.extend(nbs) 

1536 

1537 index = Index([None]) # placeholder 

1538 new_mgr = type(self).from_blocks(res_blocks, [self.items, index]) 

1539 return new_mgr 

1540 

1541 def operate_blockwise(self, other: BlockManager, array_op) -> BlockManager: 

1542 """ 

1543 Apply array_op blockwise with another (aligned) BlockManager. 

1544 """ 

1545 return operate_blockwise(self, other, array_op) 

1546 

1547 def _equal_values(self: BlockManager, other: BlockManager) -> bool: 

1548 """ 

1549 Used in .equals defined in base class. Only check the column values 

1550 assuming shape and indexes have already been checked. 

1551 """ 

1552 return blockwise_all(self, other, array_equals) 

1553 

1554 def quantile( 

1555 self: T, 

1556 *, 

1557 qs: Index, # with dtype float 64 

1558 axis: AxisInt = 0, 

1559 interpolation: QuantileInterpolation = "linear", 

1560 ) -> T: 

1561 """ 

1562 Iterate over blocks applying quantile reduction. 

1563 This routine is intended for reduction type operations and 

1564 will do inference on the generated blocks. 

1565 

1566 Parameters 

1567 ---------- 

1568 axis: reduction axis, default 0 

1569 consolidate: bool, default True. Join together blocks having same 

1570 dtype 

1571 interpolation : type of interpolation, default 'linear' 

1572 qs : list of the quantiles to be computed 

1573 

1574 Returns 

1575 ------- 

1576 BlockManager 

1577 """ 

1578 # Series dispatches to DataFrame for quantile, which allows us to 

1579 # simplify some of the code here and in the blocks 

1580 assert self.ndim >= 2 

1581 assert is_list_like(qs) # caller is responsible for this 

1582 assert axis == 1 # only ever called this way 

1583 

1584 new_axes = list(self.axes) 

1585 new_axes[1] = Index(qs, dtype=np.float64) 

1586 

1587 blocks = [ 

1588 blk.quantile(axis=axis, qs=qs, interpolation=interpolation) 

1589 for blk in self.blocks 

1590 ] 

1591 

1592 return type(self)(blocks, new_axes) 

1593 

1594 # ---------------------------------------------------------------- 

1595 

1596 def unstack(self, unstacker, fill_value) -> BlockManager: 

1597 """ 

1598 Return a BlockManager with all blocks unstacked. 

1599 

1600 Parameters 

1601 ---------- 

1602 unstacker : reshape._Unstacker 

1603 fill_value : Any 

1604 fill_value for newly introduced missing values. 

1605 

1606 Returns 

1607 ------- 

1608 unstacked : BlockManager 

1609 """ 

1610 new_columns = unstacker.get_new_columns(self.items) 

1611 new_index = unstacker.new_index 

1612 

1613 allow_fill = not unstacker.mask_all 

1614 if allow_fill: 

1615 # calculating the full mask once and passing it to Block._unstack is 

1616 # faster than letting calculating it in each repeated call 

1617 new_mask2D = (~unstacker.mask).reshape(*unstacker.full_shape) 

1618 needs_masking = new_mask2D.any(axis=0) 

1619 else: 

1620 needs_masking = np.zeros(unstacker.full_shape[1], dtype=bool) 

1621 

1622 new_blocks: list[Block] = [] 

1623 columns_mask: list[np.ndarray] = [] 

1624 

1625 if len(self.items) == 0: 

1626 factor = 1 

1627 else: 

1628 fac = len(new_columns) / len(self.items) 

1629 assert fac == int(fac) 

1630 factor = int(fac) 

1631 

1632 for blk in self.blocks: 

1633 mgr_locs = blk.mgr_locs 

1634 new_placement = mgr_locs.tile_for_unstack(factor) 

1635 

1636 blocks, mask = blk._unstack( 

1637 unstacker, 

1638 fill_value, 

1639 new_placement=new_placement, 

1640 needs_masking=needs_masking, 

1641 ) 

1642 

1643 new_blocks.extend(blocks) 

1644 columns_mask.extend(mask) 

1645 

1646 # Block._unstack should ensure this holds, 

1647 assert mask.sum() == sum(len(nb._mgr_locs) for nb in blocks) 

1648 # In turn this ensures that in the BlockManager call below 

1649 # we have len(new_columns) == sum(x.shape[0] for x in new_blocks) 

1650 # which suffices to allow us to pass verify_inegrity=False 

1651 

1652 new_columns = new_columns[columns_mask] 

1653 

1654 bm = BlockManager(new_blocks, [new_columns, new_index], verify_integrity=False) 

1655 return bm 

1656 

1657 def to_dict(self, copy: bool = True): 

1658 """ 

1659 Return a dict of str(dtype) -> BlockManager 

1660 

1661 Parameters 

1662 ---------- 

1663 copy : bool, default True 

1664 

1665 Returns 

1666 ------- 

1667 values : a dict of dtype -> BlockManager 

1668 """ 

1669 

1670 bd: dict[str, list[Block]] = {} 

1671 for b in self.blocks: 

1672 bd.setdefault(str(b.dtype), []).append(b) 

1673 

1674 # TODO(EA2D): the combine will be unnecessary with 2D EAs 

1675 return {dtype: self._combine(blocks, copy=copy) for dtype, blocks in bd.items()} 

1676 

1677 def as_array( 

1678 self, 

1679 dtype: np.dtype | None = None, 

1680 copy: bool = False, 

1681 na_value: object = lib.no_default, 

1682 ) -> np.ndarray: 

1683 """ 

1684 Convert the blockmanager data into an numpy array. 

1685 

1686 Parameters 

1687 ---------- 

1688 dtype : np.dtype or None, default None 

1689 Data type of the return array. 

1690 copy : bool, default False 

1691 If True then guarantee that a copy is returned. A value of 

1692 False does not guarantee that the underlying data is not 

1693 copied. 

1694 na_value : object, default lib.no_default 

1695 Value to be used as the missing value sentinel. 

1696 

1697 Returns 

1698 ------- 

1699 arr : ndarray 

1700 """ 

1701 # TODO(CoW) handle case where resulting array is a view 

1702 if len(self.blocks) == 0: 

1703 arr = np.empty(self.shape, dtype=float) 

1704 return arr.transpose() 

1705 

1706 # We want to copy when na_value is provided to avoid 

1707 # mutating the original object 

1708 copy = copy or na_value is not lib.no_default 

1709 

1710 if self.is_single_block: 

1711 blk = self.blocks[0] 

1712 if blk.is_extension: 

1713 # Avoid implicit conversion of extension blocks to object 

1714 

1715 # error: Item "ndarray" of "Union[ndarray, ExtensionArray]" has no 

1716 # attribute "to_numpy" 

1717 arr = blk.values.to_numpy( # type: ignore[union-attr] 

1718 dtype=dtype, 

1719 na_value=na_value, 

1720 ).reshape(blk.shape) 

1721 else: 

1722 arr = np.asarray(blk.get_values()) 

1723 if dtype: 

1724 arr = arr.astype(dtype, copy=False) 

1725 

1726 if copy: 

1727 arr = arr.copy() 

1728 elif using_copy_on_write(): 

1729 arr = arr.view() 

1730 arr.flags.writeable = False 

1731 else: 

1732 arr = self._interleave(dtype=dtype, na_value=na_value) 

1733 # The underlying data was copied within _interleave, so no need 

1734 # to further copy if copy=True or setting na_value 

1735 

1736 if na_value is not lib.no_default: 

1737 arr[isna(arr)] = na_value 

1738 

1739 return arr.transpose() 

1740 

1741 def _interleave( 

1742 self, 

1743 dtype: np.dtype | None = None, 

1744 na_value: object = lib.no_default, 

1745 ) -> np.ndarray: 

1746 """ 

1747 Return ndarray from blocks with specified item order 

1748 Items must be contained in the blocks 

1749 """ 

1750 if not dtype: 

1751 # Incompatible types in assignment (expression has type 

1752 # "Optional[Union[dtype[Any], ExtensionDtype]]", variable has 

1753 # type "Optional[dtype[Any]]") 

1754 dtype = interleaved_dtype( # type: ignore[assignment] 

1755 [blk.dtype for blk in self.blocks] 

1756 ) 

1757 

1758 # TODO: https://github.com/pandas-dev/pandas/issues/22791 

1759 # Give EAs some input on what happens here. Sparse needs this. 

1760 if isinstance(dtype, SparseDtype): 

1761 dtype = dtype.subtype 

1762 dtype = cast(np.dtype, dtype) 

1763 elif isinstance(dtype, ExtensionDtype): 

1764 dtype = np.dtype("object") 

1765 elif is_dtype_equal(dtype, str): 

1766 dtype = np.dtype("object") 

1767 

1768 result = np.empty(self.shape, dtype=dtype) 

1769 

1770 itemmask = np.zeros(self.shape[0]) 

1771 

1772 if dtype == np.dtype("object") and na_value is lib.no_default: 

1773 # much more performant than using to_numpy below 

1774 for blk in self.blocks: 

1775 rl = blk.mgr_locs 

1776 arr = blk.get_values(dtype) 

1777 result[rl.indexer] = arr 

1778 itemmask[rl.indexer] = 1 

1779 return result 

1780 

1781 for blk in self.blocks: 

1782 rl = blk.mgr_locs 

1783 if blk.is_extension: 

1784 # Avoid implicit conversion of extension blocks to object 

1785 

1786 # error: Item "ndarray" of "Union[ndarray, ExtensionArray]" has no 

1787 # attribute "to_numpy" 

1788 arr = blk.values.to_numpy( # type: ignore[union-attr] 

1789 dtype=dtype, 

1790 na_value=na_value, 

1791 ) 

1792 else: 

1793 arr = blk.get_values(dtype) 

1794 result[rl.indexer] = arr 

1795 itemmask[rl.indexer] = 1 

1796 

1797 if not itemmask.all(): 

1798 raise AssertionError("Some items were not contained in blocks") 

1799 

1800 return result 

1801 

1802 # ---------------------------------------------------------------- 

1803 # Consolidation 

1804 

1805 def is_consolidated(self) -> bool: 

1806 """ 

1807 Return True if more than one block with the same dtype 

1808 """ 

1809 if not self._known_consolidated: 

1810 self._consolidate_check() 

1811 return self._is_consolidated 

1812 

1813 def _consolidate_check(self) -> None: 

1814 if len(self.blocks) == 1: 

1815 # fastpath 

1816 self._is_consolidated = True 

1817 self._known_consolidated = True 

1818 return 

1819 dtypes = [blk.dtype for blk in self.blocks if blk._can_consolidate] 

1820 self._is_consolidated = len(dtypes) == len(set(dtypes)) 

1821 self._known_consolidated = True 

1822 

1823 def _consolidate_inplace(self) -> None: 

1824 # In general, _consolidate_inplace should only be called via 

1825 # DataFrame._consolidate_inplace, otherwise we will fail to invalidate 

1826 # the DataFrame's _item_cache. The exception is for newly-created 

1827 # BlockManager objects not yet attached to a DataFrame. 

1828 if not self.is_consolidated(): 

1829 self.blocks = _consolidate(self.blocks) 

1830 self._is_consolidated = True 

1831 self._known_consolidated = True 

1832 self._rebuild_blknos_and_blklocs() 

1833 

1834 

1835class SingleBlockManager(BaseBlockManager, SingleDataManager): 

1836 """manage a single block with""" 

1837 

1838 @property 

1839 def ndim(self) -> Literal[1]: 

1840 return 1 

1841 

1842 _is_consolidated = True 

1843 _known_consolidated = True 

1844 __slots__ = () 

1845 is_single_block = True 

1846 

1847 def __init__( 

1848 self, 

1849 block: Block, 

1850 axis: Index, 

1851 verify_integrity: bool = False, 

1852 ) -> None: 

1853 # Assertions disabled for performance 

1854 # assert isinstance(block, Block), type(block) 

1855 # assert isinstance(axis, Index), type(axis) 

1856 

1857 self.axes = [axis] 

1858 self.blocks = (block,) 

1859 

1860 @classmethod 

1861 def from_blocks( 

1862 cls, 

1863 blocks: list[Block], 

1864 axes: list[Index], 

1865 ) -> SingleBlockManager: 

1866 """ 

1867 Constructor for BlockManager and SingleBlockManager with same signature. 

1868 """ 

1869 assert len(blocks) == 1 

1870 assert len(axes) == 1 

1871 return cls(blocks[0], axes[0], verify_integrity=False) 

1872 

1873 @classmethod 

1874 def from_array( 

1875 cls, array: ArrayLike, index: Index, refs: BlockValuesRefs | None = None 

1876 ) -> SingleBlockManager: 

1877 """ 

1878 Constructor for if we have an array that is not yet a Block. 

1879 """ 

1880 block = new_block(array, placement=slice(0, len(index)), ndim=1, refs=refs) 

1881 return cls(block, index) 

1882 

1883 def to_2d_mgr(self, columns: Index) -> BlockManager: 

1884 """ 

1885 Manager analogue of Series.to_frame 

1886 """ 

1887 blk = self.blocks[0] 

1888 arr = ensure_block_shape(blk.values, ndim=2) 

1889 bp = BlockPlacement(0) 

1890 new_blk = type(blk)(arr, placement=bp, ndim=2, refs=blk.refs) 

1891 axes = [columns, self.axes[0]] 

1892 return BlockManager([new_blk], axes=axes, verify_integrity=False) 

1893 

1894 def _has_no_reference(self, i: int = 0) -> bool: 

1895 """ 

1896 Check for column `i` if it has references. 

1897 (whether it references another array or is itself being referenced) 

1898 Returns True if the column has no references. 

1899 """ 

1900 return not self.blocks[0].refs.has_reference() 

1901 

1902 def __getstate__(self): 

1903 block_values = [b.values for b in self.blocks] 

1904 block_items = [self.items[b.mgr_locs.indexer] for b in self.blocks] 

1905 axes_array = list(self.axes) 

1906 

1907 extra_state = { 

1908 "0.14.1": { 

1909 "axes": axes_array, 

1910 "blocks": [ 

1911 {"values": b.values, "mgr_locs": b.mgr_locs.indexer} 

1912 for b in self.blocks 

1913 ], 

1914 } 

1915 } 

1916 

1917 # First three elements of the state are to maintain forward 

1918 # compatibility with 0.13.1. 

1919 return axes_array, block_values, block_items, extra_state 

1920 

1921 def __setstate__(self, state): 

1922 def unpickle_block(values, mgr_locs, ndim: int) -> Block: 

1923 # TODO(EA2D): ndim would be unnecessary with 2D EAs 

1924 # older pickles may store e.g. DatetimeIndex instead of DatetimeArray 

1925 values = extract_array(values, extract_numpy=True) 

1926 return new_block(values, placement=mgr_locs, ndim=ndim) 

1927 

1928 if isinstance(state, tuple) and len(state) >= 4 and "0.14.1" in state[3]: 

1929 state = state[3]["0.14.1"] 

1930 self.axes = [ensure_index(ax) for ax in state["axes"]] 

1931 ndim = len(self.axes) 

1932 self.blocks = tuple( 

1933 unpickle_block(b["values"], b["mgr_locs"], ndim=ndim) 

1934 for b in state["blocks"] 

1935 ) 

1936 else: 

1937 raise NotImplementedError("pre-0.14.1 pickles are no longer supported") 

1938 

1939 self._post_setstate() 

1940 

1941 def _post_setstate(self) -> None: 

1942 pass 

1943 

1944 @cache_readonly 

1945 def _block(self) -> Block: 

1946 return self.blocks[0] 

1947 

1948 @property 

1949 def _blknos(self): 

1950 """compat with BlockManager""" 

1951 return None 

1952 

1953 @property 

1954 def _blklocs(self): 

1955 """compat with BlockManager""" 

1956 return None 

1957 

1958 def getitem_mgr(self, indexer: slice | np.ndarray) -> SingleBlockManager: 

1959 # similar to get_slice, but not restricted to slice indexer 

1960 blk = self._block 

1961 if ( 

1962 using_copy_on_write() 

1963 and isinstance(indexer, np.ndarray) 

1964 and len(indexer) > 0 

1965 and com.is_bool_indexer(indexer) 

1966 and indexer.all() 

1967 ): 

1968 return type(self)(blk.copy(deep=False), self.index) 

1969 array = blk._slice(indexer) 

1970 if array.ndim > 1: 

1971 # This will be caught by Series._get_values 

1972 raise ValueError("dimension-expanding indexing not allowed") 

1973 

1974 bp = BlockPlacement(slice(0, len(array))) 

1975 # TODO(CoW) in theory only need to track reference if new_array is a view 

1976 block = type(blk)(array, placement=bp, ndim=1, refs=blk.refs) 

1977 

1978 new_idx = self.index[indexer] 

1979 return type(self)(block, new_idx) 

1980 

1981 def get_slice(self, slobj: slice, axis: AxisInt = 0) -> SingleBlockManager: 

1982 # Assertion disabled for performance 

1983 # assert isinstance(slobj, slice), type(slobj) 

1984 if axis >= self.ndim: 

1985 raise IndexError("Requested axis not found in manager") 

1986 

1987 blk = self._block 

1988 array = blk._slice(slobj) 

1989 bp = BlockPlacement(slice(0, len(array))) 

1990 # TODO this method is only used in groupby SeriesSplitter at the moment, 

1991 # so passing refs is not yet covered by the tests 

1992 block = type(blk)(array, placement=bp, ndim=1, refs=blk.refs) 

1993 new_index = self.index._getitem_slice(slobj) 

1994 return type(self)(block, new_index) 

1995 

1996 @property 

1997 def index(self) -> Index: 

1998 return self.axes[0] 

1999 

2000 @property 

2001 def dtype(self) -> DtypeObj: 

2002 return self._block.dtype 

2003 

2004 def get_dtypes(self) -> np.ndarray: 

2005 return np.array([self._block.dtype]) 

2006 

2007 def external_values(self): 

2008 """The array that Series.values returns""" 

2009 return self._block.external_values() 

2010 

2011 def internal_values(self): 

2012 """The array that Series._values returns""" 

2013 return self._block.values 

2014 

2015 def array_values(self): 

2016 """The array that Series.array returns""" 

2017 return self._block.array_values 

2018 

2019 def get_numeric_data(self, copy: bool = False): 

2020 if self._block.is_numeric: 

2021 return self.copy(deep=copy) 

2022 return self.make_empty() 

2023 

2024 @property 

2025 def _can_hold_na(self) -> bool: 

2026 return self._block._can_hold_na 

2027 

2028 def setitem_inplace(self, indexer, value) -> None: 

2029 """ 

2030 Set values with indexer. 

2031 

2032 For Single[Block/Array]Manager, this backs s[indexer] = value 

2033 

2034 This is an inplace version of `setitem()`, mutating the manager/values 

2035 in place, not returning a new Manager (and Block), and thus never changing 

2036 the dtype. 

2037 """ 

2038 if using_copy_on_write() and not self._has_no_reference(0): 

2039 self.blocks = (self._block.copy(),) 

2040 self._cache.clear() 

2041 

2042 super().setitem_inplace(indexer, value) 

2043 

2044 def idelete(self, indexer) -> SingleBlockManager: 

2045 """ 

2046 Delete single location from SingleBlockManager. 

2047 

2048 Ensures that self.blocks doesn't become empty. 

2049 """ 

2050 nb = self._block.delete(indexer)[0] 

2051 self.blocks = (nb,) 

2052 self.axes[0] = self.axes[0].delete(indexer) 

2053 self._cache.clear() 

2054 return self 

2055 

2056 def fast_xs(self, loc): 

2057 """ 

2058 fast path for getting a cross-section 

2059 return a view of the data 

2060 """ 

2061 raise NotImplementedError("Use series._values[loc] instead") 

2062 

2063 def set_values(self, values: ArrayLike) -> None: 

2064 """ 

2065 Set the values of the single block in place. 

2066 

2067 Use at your own risk! This does not check if the passed values are 

2068 valid for the current Block/SingleBlockManager (length, dtype, etc). 

2069 """ 

2070 # TODO(CoW) do we need to handle copy on write here? Currently this is 

2071 # only used for FrameColumnApply.series_generator (what if apply is 

2072 # mutating inplace?) 

2073 self.blocks[0].values = values 

2074 self.blocks[0]._mgr_locs = BlockPlacement(slice(len(values))) 

2075 

2076 def _equal_values(self: T, other: T) -> bool: 

2077 """ 

2078 Used in .equals defined in base class. Only check the column values 

2079 assuming shape and indexes have already been checked. 

2080 """ 

2081 # For SingleBlockManager (i.e.Series) 

2082 if other.ndim != 1: 

2083 return False 

2084 left = self.blocks[0].values 

2085 right = other.blocks[0].values 

2086 return array_equals(left, right) 

2087 

2088 

2089# -------------------------------------------------------------------- 

2090# Constructor Helpers 

2091 

2092 

2093def create_block_manager_from_blocks( 

2094 blocks: list[Block], 

2095 axes: list[Index], 

2096 consolidate: bool = True, 

2097 verify_integrity: bool = True, 

2098) -> BlockManager: 

2099 # If verify_integrity=False, then caller is responsible for checking 

2100 # all(x.shape[-1] == len(axes[1]) for x in blocks) 

2101 # sum(x.shape[0] for x in blocks) == len(axes[0]) 

2102 # set(x for blk in blocks for x in blk.mgr_locs) == set(range(len(axes[0]))) 

2103 # all(blk.ndim == 2 for blk in blocks) 

2104 # This allows us to safely pass verify_integrity=False 

2105 

2106 try: 

2107 mgr = BlockManager(blocks, axes, verify_integrity=verify_integrity) 

2108 

2109 except ValueError as err: 

2110 arrays = [blk.values for blk in blocks] 

2111 tot_items = sum(arr.shape[0] for arr in arrays) 

2112 raise_construction_error(tot_items, arrays[0].shape[1:], axes, err) 

2113 

2114 if consolidate: 

2115 mgr._consolidate_inplace() 

2116 return mgr 

2117 

2118 

2119def create_block_manager_from_column_arrays( 

2120 arrays: list[ArrayLike], 

2121 axes: list[Index], 

2122 consolidate: bool, 

2123 refs: list, 

2124) -> BlockManager: 

2125 # Assertions disabled for performance (caller is responsible for verifying) 

2126 # assert isinstance(axes, list) 

2127 # assert all(isinstance(x, Index) for x in axes) 

2128 # assert all(isinstance(x, (np.ndarray, ExtensionArray)) for x in arrays) 

2129 # assert all(type(x) is not PandasArray for x in arrays) 

2130 # assert all(x.ndim == 1 for x in arrays) 

2131 # assert all(len(x) == len(axes[1]) for x in arrays) 

2132 # assert len(arrays) == len(axes[0]) 

2133 # These last three are sufficient to allow us to safely pass 

2134 # verify_integrity=False below. 

2135 

2136 try: 

2137 blocks = _form_blocks(arrays, consolidate, refs) 

2138 mgr = BlockManager(blocks, axes, verify_integrity=False) 

2139 except ValueError as e: 

2140 raise_construction_error(len(arrays), arrays[0].shape, axes, e) 

2141 if consolidate: 

2142 mgr._consolidate_inplace() 

2143 return mgr 

2144 

2145 

2146def raise_construction_error( 

2147 tot_items: int, 

2148 block_shape: Shape, 

2149 axes: list[Index], 

2150 e: ValueError | None = None, 

2151): 

2152 """raise a helpful message about our construction""" 

2153 passed = tuple(map(int, [tot_items] + list(block_shape))) 

2154 # Correcting the user facing error message during dataframe construction 

2155 if len(passed) <= 2: 

2156 passed = passed[::-1] 

2157 

2158 implied = tuple(len(ax) for ax in axes) 

2159 # Correcting the user facing error message during dataframe construction 

2160 if len(implied) <= 2: 

2161 implied = implied[::-1] 

2162 

2163 # We return the exception object instead of raising it so that we 

2164 # can raise it in the caller; mypy plays better with that 

2165 if passed == implied and e is not None: 

2166 raise e 

2167 if block_shape[0] == 0: 

2168 raise ValueError("Empty data passed with indices specified.") 

2169 raise ValueError(f"Shape of passed values is {passed}, indices imply {implied}") 

2170 

2171 

2172# ----------------------------------------------------------------------- 

2173 

2174 

2175def _grouping_func(tup: tuple[int, ArrayLike]) -> tuple[int, bool, DtypeObj]: 

2176 # compat for numpy<1.21, in which comparing a np.dtype with an ExtensionDtype 

2177 # raises instead of returning False. Once earlier numpy versions are dropped, 

2178 # this can be simplified to `return tup[1].dtype` 

2179 dtype = tup[1].dtype 

2180 

2181 if is_1d_only_ea_dtype(dtype): 

2182 # We know these won't be consolidated, so don't need to group these. 

2183 # This avoids expensive comparisons of CategoricalDtype objects 

2184 sep = id(dtype) 

2185 else: 

2186 sep = 0 

2187 

2188 return sep, isinstance(dtype, np.dtype), dtype 

2189 

2190 

2191def _form_blocks(arrays: list[ArrayLike], consolidate: bool, refs: list) -> list[Block]: 

2192 tuples = list(enumerate(arrays)) 

2193 

2194 if not consolidate: 

2195 nbs = _tuples_to_blocks_no_consolidate(tuples, refs) 

2196 return nbs 

2197 

2198 # when consolidating, we can ignore refs (either stacking always copies, 

2199 # or the EA is already copied in the calling dict_to_mgr) 

2200 # TODO(CoW) check if this is also valid for rec_array_to_mgr 

2201 

2202 # group by dtype 

2203 grouper = itertools.groupby(tuples, _grouping_func) 

2204 

2205 nbs = [] 

2206 for (_, _, dtype), tup_block in grouper: 

2207 block_type = get_block_type(dtype) 

2208 

2209 if isinstance(dtype, np.dtype): 

2210 is_dtlike = dtype.kind in ["m", "M"] 

2211 

2212 if issubclass(dtype.type, (str, bytes)): 

2213 dtype = np.dtype(object) 

2214 

2215 values, placement = _stack_arrays(list(tup_block), dtype) 

2216 if is_dtlike: 

2217 values = ensure_wrapped_if_datetimelike(values) 

2218 blk = block_type(values, placement=BlockPlacement(placement), ndim=2) 

2219 nbs.append(blk) 

2220 

2221 elif is_1d_only_ea_dtype(dtype): 

2222 dtype_blocks = [ 

2223 block_type(x[1], placement=BlockPlacement(x[0]), ndim=2) 

2224 for x in tup_block 

2225 ] 

2226 nbs.extend(dtype_blocks) 

2227 

2228 else: 

2229 dtype_blocks = [ 

2230 block_type( 

2231 ensure_block_shape(x[1], 2), placement=BlockPlacement(x[0]), ndim=2 

2232 ) 

2233 for x in tup_block 

2234 ] 

2235 nbs.extend(dtype_blocks) 

2236 return nbs 

2237 

2238 

2239def _tuples_to_blocks_no_consolidate(tuples, refs) -> list[Block]: 

2240 # tuples produced within _form_blocks are of the form (placement, array) 

2241 return [ 

2242 new_block_2d( 

2243 ensure_block_shape(arr, ndim=2), placement=BlockPlacement(i), refs=ref 

2244 ) 

2245 for ((i, arr), ref) in zip(tuples, refs) 

2246 ] 

2247 

2248 

2249def _stack_arrays(tuples, dtype: np.dtype): 

2250 placement, arrays = zip(*tuples) 

2251 

2252 first = arrays[0] 

2253 shape = (len(arrays),) + first.shape 

2254 

2255 stacked = np.empty(shape, dtype=dtype) 

2256 for i, arr in enumerate(arrays): 

2257 stacked[i] = arr 

2258 

2259 return stacked, placement 

2260 

2261 

2262def _consolidate(blocks: tuple[Block, ...]) -> tuple[Block, ...]: 

2263 """ 

2264 Merge blocks having same dtype, exclude non-consolidating blocks 

2265 """ 

2266 # sort by _can_consolidate, dtype 

2267 gkey = lambda x: x._consolidate_key 

2268 grouper = itertools.groupby(sorted(blocks, key=gkey), gkey) 

2269 

2270 new_blocks: list[Block] = [] 

2271 for (_can_consolidate, dtype), group_blocks in grouper: 

2272 merged_blocks, _ = _merge_blocks( 

2273 list(group_blocks), dtype=dtype, can_consolidate=_can_consolidate 

2274 ) 

2275 new_blocks = extend_blocks(merged_blocks, new_blocks) 

2276 return tuple(new_blocks) 

2277 

2278 

2279def _merge_blocks( 

2280 blocks: list[Block], dtype: DtypeObj, can_consolidate: bool 

2281) -> tuple[list[Block], bool]: 

2282 if len(blocks) == 1: 

2283 return blocks, False 

2284 

2285 if can_consolidate: 

2286 # TODO: optimization potential in case all mgrs contain slices and 

2287 # combination of those slices is a slice, too. 

2288 new_mgr_locs = np.concatenate([b.mgr_locs.as_array for b in blocks]) 

2289 

2290 new_values: ArrayLike 

2291 

2292 if isinstance(blocks[0].dtype, np.dtype): 

2293 # error: List comprehension has incompatible type List[Union[ndarray, 

2294 # ExtensionArray]]; expected List[Union[complex, generic, 

2295 # Sequence[Union[int, float, complex, str, bytes, generic]], 

2296 # Sequence[Sequence[Any]], SupportsArray]] 

2297 new_values = np.vstack([b.values for b in blocks]) # type: ignore[misc] 

2298 else: 

2299 bvals = [blk.values for blk in blocks] 

2300 bvals2 = cast(Sequence[NDArrayBackedExtensionArray], bvals) 

2301 new_values = bvals2[0]._concat_same_type(bvals2, axis=0) 

2302 

2303 argsort = np.argsort(new_mgr_locs) 

2304 new_values = new_values[argsort] 

2305 new_mgr_locs = new_mgr_locs[argsort] 

2306 

2307 bp = BlockPlacement(new_mgr_locs) 

2308 return [new_block_2d(new_values, placement=bp)], True 

2309 

2310 # can't consolidate --> no merge 

2311 return blocks, False 

2312 

2313 

2314def _fast_count_smallints(arr: npt.NDArray[np.intp]): 

2315 """Faster version of set(arr) for sequences of small numbers.""" 

2316 counts = np.bincount(arr) 

2317 nz = counts.nonzero()[0] 

2318 # Note: list(zip(...) outperforms list(np.c_[nz, counts[nz]]) here, 

2319 # in one benchmark by a factor of 11 

2320 return zip(nz, counts[nz]) 

2321 

2322 

2323def _preprocess_slice_or_indexer( 

2324 slice_or_indexer: slice | np.ndarray, length: int, allow_fill: bool 

2325): 

2326 if isinstance(slice_or_indexer, slice): 

2327 return ( 

2328 "slice", 

2329 slice_or_indexer, 

2330 libinternals.slice_len(slice_or_indexer, length), 

2331 ) 

2332 else: 

2333 if ( 

2334 not isinstance(slice_or_indexer, np.ndarray) 

2335 or slice_or_indexer.dtype.kind != "i" 

2336 ): 

2337 dtype = getattr(slice_or_indexer, "dtype", None) 

2338 raise TypeError(type(slice_or_indexer), dtype) 

2339 

2340 indexer = ensure_platform_int(slice_or_indexer) 

2341 if not allow_fill: 

2342 indexer = maybe_convert_indices(indexer, length) 

2343 return "fancy", indexer, len(indexer)