Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/pandas/core/internals/managers.py: 55%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

981 statements  

1from __future__ import annotations 

2 

3from collections.abc import ( 

4 Hashable, 

5 Sequence, 

6) 

7import itertools 

8from typing import ( 

9 TYPE_CHECKING, 

10 Callable, 

11 Literal, 

12 cast, 

13) 

14import warnings 

15 

16import numpy as np 

17 

18from pandas._config import ( 

19 using_copy_on_write, 

20 warn_copy_on_write, 

21) 

22 

23from pandas._libs import ( 

24 internals as libinternals, 

25 lib, 

26) 

27from pandas._libs.internals import ( 

28 BlockPlacement, 

29 BlockValuesRefs, 

30) 

31from pandas._libs.tslibs import Timestamp 

32from pandas.errors import PerformanceWarning 

33from pandas.util._decorators import cache_readonly 

34from pandas.util._exceptions import find_stack_level 

35 

36from pandas.core.dtypes.cast import infer_dtype_from_scalar 

37from pandas.core.dtypes.common import ( 

38 ensure_platform_int, 

39 is_1d_only_ea_dtype, 

40 is_list_like, 

41) 

42from pandas.core.dtypes.dtypes import ( 

43 DatetimeTZDtype, 

44 ExtensionDtype, 

45) 

46from pandas.core.dtypes.generic import ( 

47 ABCDataFrame, 

48 ABCSeries, 

49) 

50from pandas.core.dtypes.missing import ( 

51 array_equals, 

52 isna, 

53) 

54 

55import pandas.core.algorithms as algos 

56from pandas.core.arrays import ( 

57 ArrowExtensionArray, 

58 ArrowStringArray, 

59 DatetimeArray, 

60) 

61from pandas.core.arrays._mixins import NDArrayBackedExtensionArray 

62from pandas.core.construction import ( 

63 ensure_wrapped_if_datetimelike, 

64 extract_array, 

65) 

66from pandas.core.indexers import maybe_convert_indices 

67from pandas.core.indexes.api import ( 

68 Index, 

69 ensure_index, 

70) 

71from pandas.core.internals.base import ( 

72 DataManager, 

73 SingleDataManager, 

74 ensure_np_dtype, 

75 interleaved_dtype, 

76) 

77from pandas.core.internals.blocks import ( 

78 COW_WARNING_GENERAL_MSG, 

79 COW_WARNING_SETITEM_MSG, 

80 Block, 

81 NumpyBlock, 

82 ensure_block_shape, 

83 extend_blocks, 

84 get_block_type, 

85 maybe_coerce_values, 

86 new_block, 

87 new_block_2d, 

88) 

89from pandas.core.internals.ops import ( 

90 blockwise_all, 

91 operate_blockwise, 

92) 

93 

94if TYPE_CHECKING: 

95 from pandas._typing import ( 

96 ArrayLike, 

97 AxisInt, 

98 DtypeObj, 

99 QuantileInterpolation, 

100 Self, 

101 Shape, 

102 npt, 

103 ) 

104 

105 from pandas.api.extensions import ExtensionArray 

106 

107 

108class BaseBlockManager(DataManager): 

109 """ 

110 Core internal data structure to implement DataFrame, Series, etc. 

111 

112 Manage a bunch of labeled 2D mixed-type ndarrays. Essentially it's a 

113 lightweight blocked set of labeled data to be manipulated by the DataFrame 

114 public API class 

115 

116 Attributes 

117 ---------- 

118 shape 

119 ndim 

120 axes 

121 values 

122 items 

123 

124 Methods 

125 ------- 

126 set_axis(axis, new_labels) 

127 copy(deep=True) 

128 

129 get_dtypes 

130 

131 apply(func, axes, block_filter_fn) 

132 

133 get_bool_data 

134 get_numeric_data 

135 

136 get_slice(slice_like, axis) 

137 get(label) 

138 iget(loc) 

139 

140 take(indexer, axis) 

141 reindex_axis(new_labels, axis) 

142 reindex_indexer(new_labels, indexer, axis) 

143 

144 delete(label) 

145 insert(loc, label, value) 

146 set(label, value) 

147 

148 Parameters 

149 ---------- 

150 blocks: Sequence of Block 

151 axes: Sequence of Index 

152 verify_integrity: bool, default True 

153 

154 Notes 

155 ----- 

156 This is *not* a public API class 

157 """ 

158 

159 __slots__ = () 

160 

161 _blknos: npt.NDArray[np.intp] 

162 _blklocs: npt.NDArray[np.intp] 

163 blocks: tuple[Block, ...] 

164 axes: list[Index] 

165 

166 @property 

167 def ndim(self) -> int: 

168 raise NotImplementedError 

169 

170 _known_consolidated: bool 

171 _is_consolidated: bool 

172 

173 def __init__(self, blocks, axes, verify_integrity: bool = True) -> None: 

174 raise NotImplementedError 

175 

176 @classmethod 

177 def from_blocks(cls, blocks: list[Block], axes: list[Index]) -> Self: 

178 raise NotImplementedError 

179 

180 @property 

181 def blknos(self) -> npt.NDArray[np.intp]: 

182 """ 

183 Suppose we want to find the array corresponding to our i'th column. 

184 

185 blknos[i] identifies the block from self.blocks that contains this column. 

186 

187 blklocs[i] identifies the column of interest within 

188 self.blocks[self.blknos[i]] 

189 """ 

190 if self._blknos is None: 

191 # Note: these can be altered by other BlockManager methods. 

192 self._rebuild_blknos_and_blklocs() 

193 

194 return self._blknos 

195 

196 @property 

197 def blklocs(self) -> npt.NDArray[np.intp]: 

198 """ 

199 See blknos.__doc__ 

200 """ 

201 if self._blklocs is None: 

202 # Note: these can be altered by other BlockManager methods. 

203 self._rebuild_blknos_and_blklocs() 

204 

205 return self._blklocs 

206 

207 def make_empty(self, axes=None) -> Self: 

208 """return an empty BlockManager with the items axis of len 0""" 

209 if axes is None: 

210 axes = [Index([])] + self.axes[1:] 

211 

212 # preserve dtype if possible 

213 if self.ndim == 1: 

214 assert isinstance(self, SingleBlockManager) # for mypy 

215 blk = self.blocks[0] 

216 arr = blk.values[:0] 

217 bp = BlockPlacement(slice(0, 0)) 

218 nb = blk.make_block_same_class(arr, placement=bp) 

219 blocks = [nb] 

220 else: 

221 blocks = [] 

222 return type(self).from_blocks(blocks, axes) 

223 

224 def __nonzero__(self) -> bool: 

225 return True 

226 

227 # Python3 compat 

228 __bool__ = __nonzero__ 

229 

230 def _normalize_axis(self, axis: AxisInt) -> int: 

231 # switch axis to follow BlockManager logic 

232 if self.ndim == 2: 

233 axis = 1 if axis == 0 else 0 

234 return axis 

235 

236 def set_axis(self, axis: AxisInt, new_labels: Index) -> None: 

237 # Caller is responsible for ensuring we have an Index object. 

238 self._validate_set_axis(axis, new_labels) 

239 self.axes[axis] = new_labels 

240 

241 @property 

242 def is_single_block(self) -> bool: 

243 # Assumes we are 2D; overridden by SingleBlockManager 

244 return len(self.blocks) == 1 

245 

246 @property 

247 def items(self) -> Index: 

248 return self.axes[0] 

249 

250 def _has_no_reference(self, i: int) -> bool: 

251 """ 

252 Check for column `i` if it has references. 

253 (whether it references another array or is itself being referenced) 

254 Returns True if the column has no references. 

255 """ 

256 blkno = self.blknos[i] 

257 return self._has_no_reference_block(blkno) 

258 

259 def _has_no_reference_block(self, blkno: int) -> bool: 

260 """ 

261 Check for block `i` if it has references. 

262 (whether it references another array or is itself being referenced) 

263 Returns True if the block has no references. 

264 """ 

265 return not self.blocks[blkno].refs.has_reference() 

266 

267 def add_references(self, mgr: BaseBlockManager) -> None: 

268 """ 

269 Adds the references from one manager to another. We assume that both 

270 managers have the same block structure. 

271 """ 

272 if len(self.blocks) != len(mgr.blocks): 

273 # If block structure changes, then we made a copy 

274 return 

275 for i, blk in enumerate(self.blocks): 

276 blk.refs = mgr.blocks[i].refs 

277 blk.refs.add_reference(blk) 

278 

279 def references_same_values(self, mgr: BaseBlockManager, blkno: int) -> bool: 

280 """ 

281 Checks if two blocks from two different block managers reference the 

282 same underlying values. 

283 """ 

284 blk = self.blocks[blkno] 

285 return any(blk is ref() for ref in mgr.blocks[blkno].refs.referenced_blocks) 

286 

287 def get_dtypes(self) -> npt.NDArray[np.object_]: 

288 dtypes = np.array([blk.dtype for blk in self.blocks], dtype=object) 

289 return dtypes.take(self.blknos) 

290 

291 @property 

292 def arrays(self) -> list[ArrayLike]: 

293 """ 

294 Quick access to the backing arrays of the Blocks. 

295 

296 Only for compatibility with ArrayManager for testing convenience. 

297 Not to be used in actual code, and return value is not the same as the 

298 ArrayManager method (list of 1D arrays vs iterator of 2D ndarrays / 1D EAs). 

299 

300 Warning! The returned arrays don't handle Copy-on-Write, so this should 

301 be used with caution (only in read-mode). 

302 """ 

303 return [blk.values for blk in self.blocks] 

304 

305 def __repr__(self) -> str: 

306 output = type(self).__name__ 

307 for i, ax in enumerate(self.axes): 

308 if i == 0: 

309 output += f"\nItems: {ax}" 

310 else: 

311 output += f"\nAxis {i}: {ax}" 

312 

313 for block in self.blocks: 

314 output += f"\n{block}" 

315 return output 

316 

317 def apply( 

318 self, 

319 f, 

320 align_keys: list[str] | None = None, 

321 **kwargs, 

322 ) -> Self: 

323 """ 

324 Iterate over the blocks, collect and create a new BlockManager. 

325 

326 Parameters 

327 ---------- 

328 f : str or callable 

329 Name of the Block method to apply. 

330 align_keys: List[str] or None, default None 

331 **kwargs 

332 Keywords to pass to `f` 

333 

334 Returns 

335 ------- 

336 BlockManager 

337 """ 

338 assert "filter" not in kwargs 

339 

340 align_keys = align_keys or [] 

341 result_blocks: list[Block] = [] 

342 # fillna: Series/DataFrame is responsible for making sure value is aligned 

343 

344 aligned_args = {k: kwargs[k] for k in align_keys} 

345 

346 for b in self.blocks: 

347 if aligned_args: 

348 for k, obj in aligned_args.items(): 

349 if isinstance(obj, (ABCSeries, ABCDataFrame)): 

350 # The caller is responsible for ensuring that 

351 # obj.axes[-1].equals(self.items) 

352 if obj.ndim == 1: 

353 kwargs[k] = obj.iloc[b.mgr_locs.indexer]._values 

354 else: 

355 kwargs[k] = obj.iloc[:, b.mgr_locs.indexer]._values 

356 else: 

357 # otherwise we have an ndarray 

358 kwargs[k] = obj[b.mgr_locs.indexer] 

359 

360 if callable(f): 

361 applied = b.apply(f, **kwargs) 

362 else: 

363 applied = getattr(b, f)(**kwargs) 

364 result_blocks = extend_blocks(applied, result_blocks) 

365 

366 out = type(self).from_blocks(result_blocks, self.axes) 

367 return out 

368 

369 # Alias so we can share code with ArrayManager 

370 apply_with_block = apply 

371 

372 def setitem(self, indexer, value, warn: bool = True) -> Self: 

373 """ 

374 Set values with indexer. 

375 

376 For SingleBlockManager, this backs s[indexer] = value 

377 """ 

378 if isinstance(indexer, np.ndarray) and indexer.ndim > self.ndim: 

379 raise ValueError(f"Cannot set values with ndim > {self.ndim}") 

380 

381 if warn and warn_copy_on_write() and not self._has_no_reference(0): 

382 warnings.warn( 

383 COW_WARNING_GENERAL_MSG, 

384 FutureWarning, 

385 stacklevel=find_stack_level(), 

386 ) 

387 

388 elif using_copy_on_write() and not self._has_no_reference(0): 

389 # this method is only called if there is a single block -> hardcoded 0 

390 # Split blocks to only copy the columns we want to modify 

391 if self.ndim == 2 and isinstance(indexer, tuple): 

392 blk_loc = self.blklocs[indexer[1]] 

393 if is_list_like(blk_loc) and blk_loc.ndim == 2: 

394 blk_loc = np.squeeze(blk_loc, axis=0) 

395 elif not is_list_like(blk_loc): 

396 # Keep dimension and copy data later 

397 blk_loc = [blk_loc] # type: ignore[assignment] 

398 if len(blk_loc) == 0: 

399 return self.copy(deep=False) 

400 

401 values = self.blocks[0].values 

402 if values.ndim == 2: 

403 values = values[blk_loc] 

404 # "T" has no attribute "_iset_split_block" 

405 self._iset_split_block( # type: ignore[attr-defined] 

406 0, blk_loc, values 

407 ) 

408 # first block equals values 

409 self.blocks[0].setitem((indexer[0], np.arange(len(blk_loc))), value) 

410 return self 

411 # No need to split if we either set all columns or on a single block 

412 # manager 

413 self = self.copy() 

414 

415 return self.apply("setitem", indexer=indexer, value=value) 

416 

417 def diff(self, n: int) -> Self: 

418 # only reached with self.ndim == 2 

419 return self.apply("diff", n=n) 

420 

421 def astype(self, dtype, copy: bool | None = False, errors: str = "raise") -> Self: 

422 if copy is None: 

423 if using_copy_on_write(): 

424 copy = False 

425 else: 

426 copy = True 

427 elif using_copy_on_write(): 

428 copy = False 

429 

430 return self.apply( 

431 "astype", 

432 dtype=dtype, 

433 copy=copy, 

434 errors=errors, 

435 using_cow=using_copy_on_write(), 

436 ) 

437 

438 def convert(self, copy: bool | None) -> Self: 

439 if copy is None: 

440 if using_copy_on_write(): 

441 copy = False 

442 else: 

443 copy = True 

444 elif using_copy_on_write(): 

445 copy = False 

446 

447 return self.apply("convert", copy=copy, using_cow=using_copy_on_write()) 

448 

449 def convert_dtypes(self, **kwargs): 

450 if using_copy_on_write(): 

451 copy = False 

452 else: 

453 copy = True 

454 

455 return self.apply( 

456 "convert_dtypes", copy=copy, using_cow=using_copy_on_write(), **kwargs 

457 ) 

458 

459 def get_values_for_csv( 

460 self, *, float_format, date_format, decimal, na_rep: str = "nan", quoting=None 

461 ) -> Self: 

462 """ 

463 Convert values to native types (strings / python objects) that are used 

464 in formatting (repr / csv). 

465 """ 

466 return self.apply( 

467 "get_values_for_csv", 

468 na_rep=na_rep, 

469 quoting=quoting, 

470 float_format=float_format, 

471 date_format=date_format, 

472 decimal=decimal, 

473 ) 

474 

475 @property 

476 def any_extension_types(self) -> bool: 

477 """Whether any of the blocks in this manager are extension blocks""" 

478 return any(block.is_extension for block in self.blocks) 

479 

480 @property 

481 def is_view(self) -> bool: 

482 """return a boolean if we are a single block and are a view""" 

483 if len(self.blocks) == 1: 

484 return self.blocks[0].is_view 

485 

486 # It is technically possible to figure out which blocks are views 

487 # e.g. [ b.values.base is not None for b in self.blocks ] 

488 # but then we have the case of possibly some blocks being a view 

489 # and some blocks not. setting in theory is possible on the non-view 

490 # blocks w/o causing a SettingWithCopy raise/warn. But this is a bit 

491 # complicated 

492 

493 return False 

494 

495 def _get_data_subset(self, predicate: Callable) -> Self: 

496 blocks = [blk for blk in self.blocks if predicate(blk.values)] 

497 return self._combine(blocks) 

498 

499 def get_bool_data(self) -> Self: 

500 """ 

501 Select blocks that are bool-dtype and columns from object-dtype blocks 

502 that are all-bool. 

503 """ 

504 

505 new_blocks = [] 

506 

507 for blk in self.blocks: 

508 if blk.dtype == bool: 

509 new_blocks.append(blk) 

510 

511 elif blk.is_object: 

512 nbs = blk._split() 

513 new_blocks.extend(nb for nb in nbs if nb.is_bool) 

514 

515 return self._combine(new_blocks) 

516 

517 def get_numeric_data(self) -> Self: 

518 numeric_blocks = [blk for blk in self.blocks if blk.is_numeric] 

519 if len(numeric_blocks) == len(self.blocks): 

520 # Avoid somewhat expensive _combine 

521 return self 

522 return self._combine(numeric_blocks) 

523 

524 def _combine(self, blocks: list[Block], index: Index | None = None) -> Self: 

525 """return a new manager with the blocks""" 

526 if len(blocks) == 0: 

527 if self.ndim == 2: 

528 # retain our own Index dtype 

529 if index is not None: 

530 axes = [self.items[:0], index] 

531 else: 

532 axes = [self.items[:0]] + self.axes[1:] 

533 return self.make_empty(axes) 

534 return self.make_empty() 

535 

536 # FIXME: optimization potential 

537 indexer = np.sort(np.concatenate([b.mgr_locs.as_array for b in blocks])) 

538 inv_indexer = lib.get_reverse_indexer(indexer, self.shape[0]) 

539 

540 new_blocks: list[Block] = [] 

541 for b in blocks: 

542 nb = b.copy(deep=False) 

543 nb.mgr_locs = BlockPlacement(inv_indexer[nb.mgr_locs.indexer]) 

544 new_blocks.append(nb) 

545 

546 axes = list(self.axes) 

547 if index is not None: 

548 axes[-1] = index 

549 axes[0] = self.items.take(indexer) 

550 

551 return type(self).from_blocks(new_blocks, axes) 

552 

553 @property 

554 def nblocks(self) -> int: 

555 return len(self.blocks) 

556 

557 def copy(self, deep: bool | None | Literal["all"] = True) -> Self: 

558 """ 

559 Make deep or shallow copy of BlockManager 

560 

561 Parameters 

562 ---------- 

563 deep : bool, string or None, default True 

564 If False or None, return a shallow copy (do not copy data) 

565 If 'all', copy data and a deep copy of the index 

566 

567 Returns 

568 ------- 

569 BlockManager 

570 """ 

571 if deep is None: 

572 if using_copy_on_write(): 

573 # use shallow copy 

574 deep = False 

575 else: 

576 # preserve deep copy for BlockManager with copy=None 

577 deep = True 

578 

579 # this preserves the notion of view copying of axes 

580 if deep: 

581 # hit in e.g. tests.io.json.test_pandas 

582 

583 def copy_func(ax): 

584 return ax.copy(deep=True) if deep == "all" else ax.view() 

585 

586 new_axes = [copy_func(ax) for ax in self.axes] 

587 else: 

588 if using_copy_on_write(): 

589 new_axes = [ax.view() for ax in self.axes] 

590 else: 

591 new_axes = list(self.axes) 

592 

593 res = self.apply("copy", deep=deep) 

594 res.axes = new_axes 

595 

596 if self.ndim > 1: 

597 # Avoid needing to re-compute these 

598 blknos = self._blknos 

599 if blknos is not None: 

600 res._blknos = blknos.copy() 

601 res._blklocs = self._blklocs.copy() 

602 

603 if deep: 

604 res._consolidate_inplace() 

605 return res 

606 

607 def consolidate(self) -> Self: 

608 """ 

609 Join together blocks having same dtype 

610 

611 Returns 

612 ------- 

613 y : BlockManager 

614 """ 

615 if self.is_consolidated(): 

616 return self 

617 

618 bm = type(self)(self.blocks, self.axes, verify_integrity=False) 

619 bm._is_consolidated = False 

620 bm._consolidate_inplace() 

621 return bm 

622 

623 def reindex_indexer( 

624 self, 

625 new_axis: Index, 

626 indexer: npt.NDArray[np.intp] | None, 

627 axis: AxisInt, 

628 fill_value=None, 

629 allow_dups: bool = False, 

630 copy: bool | None = True, 

631 only_slice: bool = False, 

632 *, 

633 use_na_proxy: bool = False, 

634 ) -> Self: 

635 """ 

636 Parameters 

637 ---------- 

638 new_axis : Index 

639 indexer : ndarray[intp] or None 

640 axis : int 

641 fill_value : object, default None 

642 allow_dups : bool, default False 

643 copy : bool or None, default True 

644 If None, regard as False to get shallow copy. 

645 only_slice : bool, default False 

646 Whether to take views, not copies, along columns. 

647 use_na_proxy : bool, default False 

648 Whether to use a np.void ndarray for newly introduced columns. 

649 

650 pandas-indexer with -1's only. 

651 """ 

652 if copy is None: 

653 if using_copy_on_write(): 

654 # use shallow copy 

655 copy = False 

656 else: 

657 # preserve deep copy for BlockManager with copy=None 

658 copy = True 

659 

660 if indexer is None: 

661 if new_axis is self.axes[axis] and not copy: 

662 return self 

663 

664 result = self.copy(deep=copy) 

665 result.axes = list(self.axes) 

666 result.axes[axis] = new_axis 

667 return result 

668 

669 # Should be intp, but in some cases we get int64 on 32bit builds 

670 assert isinstance(indexer, np.ndarray) 

671 

672 # some axes don't allow reindexing with dups 

673 if not allow_dups: 

674 self.axes[axis]._validate_can_reindex(indexer) 

675 

676 if axis >= self.ndim: 

677 raise IndexError("Requested axis not found in manager") 

678 

679 if axis == 0: 

680 new_blocks = self._slice_take_blocks_ax0( 

681 indexer, 

682 fill_value=fill_value, 

683 only_slice=only_slice, 

684 use_na_proxy=use_na_proxy, 

685 ) 

686 else: 

687 new_blocks = [ 

688 blk.take_nd( 

689 indexer, 

690 axis=1, 

691 fill_value=( 

692 fill_value if fill_value is not None else blk.fill_value 

693 ), 

694 ) 

695 for blk in self.blocks 

696 ] 

697 

698 new_axes = list(self.axes) 

699 new_axes[axis] = new_axis 

700 

701 new_mgr = type(self).from_blocks(new_blocks, new_axes) 

702 if axis == 1: 

703 # We can avoid the need to rebuild these 

704 new_mgr._blknos = self.blknos.copy() 

705 new_mgr._blklocs = self.blklocs.copy() 

706 return new_mgr 

707 

708 def _slice_take_blocks_ax0( 

709 self, 

710 slice_or_indexer: slice | np.ndarray, 

711 fill_value=lib.no_default, 

712 only_slice: bool = False, 

713 *, 

714 use_na_proxy: bool = False, 

715 ref_inplace_op: bool = False, 

716 ) -> list[Block]: 

717 """ 

718 Slice/take blocks along axis=0. 

719 

720 Overloaded for SingleBlock 

721 

722 Parameters 

723 ---------- 

724 slice_or_indexer : slice or np.ndarray[int64] 

725 fill_value : scalar, default lib.no_default 

726 only_slice : bool, default False 

727 If True, we always return views on existing arrays, never copies. 

728 This is used when called from ops.blockwise.operate_blockwise. 

729 use_na_proxy : bool, default False 

730 Whether to use a np.void ndarray for newly introduced columns. 

731 ref_inplace_op: bool, default False 

732 Don't track refs if True because we operate inplace 

733 

734 Returns 

735 ------- 

736 new_blocks : list of Block 

737 """ 

738 allow_fill = fill_value is not lib.no_default 

739 

740 sl_type, slobj, sllen = _preprocess_slice_or_indexer( 

741 slice_or_indexer, self.shape[0], allow_fill=allow_fill 

742 ) 

743 

744 if self.is_single_block: 

745 blk = self.blocks[0] 

746 

747 if sl_type == "slice": 

748 # GH#32959 EABlock would fail since we can't make 0-width 

749 # TODO(EA2D): special casing unnecessary with 2D EAs 

750 if sllen == 0: 

751 return [] 

752 bp = BlockPlacement(slice(0, sllen)) 

753 return [blk.getitem_block_columns(slobj, new_mgr_locs=bp)] 

754 elif not allow_fill or self.ndim == 1: 

755 if allow_fill and fill_value is None: 

756 fill_value = blk.fill_value 

757 

758 if not allow_fill and only_slice: 

759 # GH#33597 slice instead of take, so we get 

760 # views instead of copies 

761 blocks = [ 

762 blk.getitem_block_columns( 

763 slice(ml, ml + 1), 

764 new_mgr_locs=BlockPlacement(i), 

765 ref_inplace_op=ref_inplace_op, 

766 ) 

767 for i, ml in enumerate(slobj) 

768 ] 

769 return blocks 

770 else: 

771 bp = BlockPlacement(slice(0, sllen)) 

772 return [ 

773 blk.take_nd( 

774 slobj, 

775 axis=0, 

776 new_mgr_locs=bp, 

777 fill_value=fill_value, 

778 ) 

779 ] 

780 

781 if sl_type == "slice": 

782 blknos = self.blknos[slobj] 

783 blklocs = self.blklocs[slobj] 

784 else: 

785 blknos = algos.take_nd( 

786 self.blknos, slobj, fill_value=-1, allow_fill=allow_fill 

787 ) 

788 blklocs = algos.take_nd( 

789 self.blklocs, slobj, fill_value=-1, allow_fill=allow_fill 

790 ) 

791 

792 # When filling blknos, make sure blknos is updated before appending to 

793 # blocks list, that way new blkno is exactly len(blocks). 

794 blocks = [] 

795 group = not only_slice 

796 for blkno, mgr_locs in libinternals.get_blkno_placements(blknos, group=group): 

797 if blkno == -1: 

798 # If we've got here, fill_value was not lib.no_default 

799 

800 blocks.append( 

801 self._make_na_block( 

802 placement=mgr_locs, 

803 fill_value=fill_value, 

804 use_na_proxy=use_na_proxy, 

805 ) 

806 ) 

807 else: 

808 blk = self.blocks[blkno] 

809 

810 # Otherwise, slicing along items axis is necessary. 

811 if not blk._can_consolidate and not blk._validate_ndim: 

812 # i.e. we dont go through here for DatetimeTZBlock 

813 # A non-consolidatable block, it's easy, because there's 

814 # only one item and each mgr loc is a copy of that single 

815 # item. 

816 deep = not (only_slice or using_copy_on_write()) 

817 for mgr_loc in mgr_locs: 

818 newblk = blk.copy(deep=deep) 

819 newblk.mgr_locs = BlockPlacement(slice(mgr_loc, mgr_loc + 1)) 

820 blocks.append(newblk) 

821 

822 else: 

823 # GH#32779 to avoid the performance penalty of copying, 

824 # we may try to only slice 

825 taker = blklocs[mgr_locs.indexer] 

826 max_len = max(len(mgr_locs), taker.max() + 1) 

827 if only_slice or using_copy_on_write(): 

828 taker = lib.maybe_indices_to_slice(taker, max_len) 

829 

830 if isinstance(taker, slice): 

831 nb = blk.getitem_block_columns(taker, new_mgr_locs=mgr_locs) 

832 blocks.append(nb) 

833 elif only_slice: 

834 # GH#33597 slice instead of take, so we get 

835 # views instead of copies 

836 for i, ml in zip(taker, mgr_locs): 

837 slc = slice(i, i + 1) 

838 bp = BlockPlacement(ml) 

839 nb = blk.getitem_block_columns(slc, new_mgr_locs=bp) 

840 # We have np.shares_memory(nb.values, blk.values) 

841 blocks.append(nb) 

842 else: 

843 nb = blk.take_nd(taker, axis=0, new_mgr_locs=mgr_locs) 

844 blocks.append(nb) 

845 

846 return blocks 

847 

848 def _make_na_block( 

849 self, placement: BlockPlacement, fill_value=None, use_na_proxy: bool = False 

850 ) -> Block: 

851 # Note: we only get here with self.ndim == 2 

852 

853 if use_na_proxy: 

854 assert fill_value is None 

855 shape = (len(placement), self.shape[1]) 

856 vals = np.empty(shape, dtype=np.void) 

857 nb = NumpyBlock(vals, placement, ndim=2) 

858 return nb 

859 

860 if fill_value is None: 

861 fill_value = np.nan 

862 

863 shape = (len(placement), self.shape[1]) 

864 

865 dtype, fill_value = infer_dtype_from_scalar(fill_value) 

866 block_values = make_na_array(dtype, shape, fill_value) 

867 return new_block_2d(block_values, placement=placement) 

868 

869 def take( 

870 self, 

871 indexer: npt.NDArray[np.intp], 

872 axis: AxisInt = 1, 

873 verify: bool = True, 

874 ) -> Self: 

875 """ 

876 Take items along any axis. 

877 

878 indexer : np.ndarray[np.intp] 

879 axis : int, default 1 

880 verify : bool, default True 

881 Check that all entries are between 0 and len(self) - 1, inclusive. 

882 Pass verify=False if this check has been done by the caller. 

883 

884 Returns 

885 ------- 

886 BlockManager 

887 """ 

888 # Caller is responsible for ensuring indexer annotation is accurate 

889 

890 n = self.shape[axis] 

891 indexer = maybe_convert_indices(indexer, n, verify=verify) 

892 

893 new_labels = self.axes[axis].take(indexer) 

894 return self.reindex_indexer( 

895 new_axis=new_labels, 

896 indexer=indexer, 

897 axis=axis, 

898 allow_dups=True, 

899 copy=None, 

900 ) 

901 

902 

903class BlockManager(libinternals.BlockManager, BaseBlockManager): 

904 """ 

905 BaseBlockManager that holds 2D blocks. 

906 """ 

907 

908 ndim = 2 

909 

910 # ---------------------------------------------------------------- 

911 # Constructors 

912 

913 def __init__( 

914 self, 

915 blocks: Sequence[Block], 

916 axes: Sequence[Index], 

917 verify_integrity: bool = True, 

918 ) -> None: 

919 if verify_integrity: 

920 # Assertion disabled for performance 

921 # assert all(isinstance(x, Index) for x in axes) 

922 

923 for block in blocks: 

924 if self.ndim != block.ndim: 

925 raise AssertionError( 

926 f"Number of Block dimensions ({block.ndim}) must equal " 

927 f"number of axes ({self.ndim})" 

928 ) 

929 # As of 2.0, the caller is responsible for ensuring that 

930 # DatetimeTZBlock with block.ndim == 2 has block.values.ndim ==2; 

931 # previously there was a special check for fastparquet compat. 

932 

933 self._verify_integrity() 

934 

935 def _verify_integrity(self) -> None: 

936 mgr_shape = self.shape 

937 tot_items = sum(len(x.mgr_locs) for x in self.blocks) 

938 for block in self.blocks: 

939 if block.shape[1:] != mgr_shape[1:]: 

940 raise_construction_error(tot_items, block.shape[1:], self.axes) 

941 if len(self.items) != tot_items: 

942 raise AssertionError( 

943 "Number of manager items must equal union of " 

944 f"block items\n# manager items: {len(self.items)}, # " 

945 f"tot_items: {tot_items}" 

946 ) 

947 

948 @classmethod 

949 def from_blocks(cls, blocks: list[Block], axes: list[Index]) -> Self: 

950 """ 

951 Constructor for BlockManager and SingleBlockManager with same signature. 

952 """ 

953 return cls(blocks, axes, verify_integrity=False) 

954 

955 # ---------------------------------------------------------------- 

956 # Indexing 

957 

958 def fast_xs(self, loc: int) -> SingleBlockManager: 

959 """ 

960 Return the array corresponding to `frame.iloc[loc]`. 

961 

962 Parameters 

963 ---------- 

964 loc : int 

965 

966 Returns 

967 ------- 

968 np.ndarray or ExtensionArray 

969 """ 

970 if len(self.blocks) == 1: 

971 # TODO: this could be wrong if blk.mgr_locs is not slice(None)-like; 

972 # is this ruled out in the general case? 

973 result = self.blocks[0].iget((slice(None), loc)) 

974 # in the case of a single block, the new block is a view 

975 bp = BlockPlacement(slice(0, len(result))) 

976 block = new_block( 

977 result, 

978 placement=bp, 

979 ndim=1, 

980 refs=self.blocks[0].refs, 

981 ) 

982 return SingleBlockManager(block, self.axes[0]) 

983 

984 dtype = interleaved_dtype([blk.dtype for blk in self.blocks]) 

985 

986 n = len(self) 

987 

988 if isinstance(dtype, ExtensionDtype): 

989 # TODO: use object dtype as workaround for non-performant 

990 # EA.__setitem__ methods. (primarily ArrowExtensionArray.__setitem__ 

991 # when iteratively setting individual values) 

992 # https://github.com/pandas-dev/pandas/pull/54508#issuecomment-1675827918 

993 result = np.empty(n, dtype=object) 

994 else: 

995 result = np.empty(n, dtype=dtype) 

996 result = ensure_wrapped_if_datetimelike(result) 

997 

998 for blk in self.blocks: 

999 # Such assignment may incorrectly coerce NaT to None 

1000 # result[blk.mgr_locs] = blk._slice((slice(None), loc)) 

1001 for i, rl in enumerate(blk.mgr_locs): 

1002 result[rl] = blk.iget((i, loc)) 

1003 

1004 if isinstance(dtype, ExtensionDtype): 

1005 cls = dtype.construct_array_type() 

1006 result = cls._from_sequence(result, dtype=dtype) 

1007 

1008 bp = BlockPlacement(slice(0, len(result))) 

1009 block = new_block(result, placement=bp, ndim=1) 

1010 return SingleBlockManager(block, self.axes[0]) 

1011 

1012 def iget(self, i: int, track_ref: bool = True) -> SingleBlockManager: 

1013 """ 

1014 Return the data as a SingleBlockManager. 

1015 """ 

1016 block = self.blocks[self.blknos[i]] 

1017 values = block.iget(self.blklocs[i]) 

1018 

1019 # shortcut for select a single-dim from a 2-dim BM 

1020 bp = BlockPlacement(slice(0, len(values))) 

1021 nb = type(block)( 

1022 values, placement=bp, ndim=1, refs=block.refs if track_ref else None 

1023 ) 

1024 return SingleBlockManager(nb, self.axes[1]) 

1025 

1026 def iget_values(self, i: int) -> ArrayLike: 

1027 """ 

1028 Return the data for column i as the values (ndarray or ExtensionArray). 

1029 

1030 Warning! The returned array is a view but doesn't handle Copy-on-Write, 

1031 so this should be used with caution. 

1032 """ 

1033 # TODO(CoW) making the arrays read-only might make this safer to use? 

1034 block = self.blocks[self.blknos[i]] 

1035 values = block.iget(self.blklocs[i]) 

1036 return values 

1037 

1038 @property 

1039 def column_arrays(self) -> list[np.ndarray]: 

1040 """ 

1041 Used in the JSON C code to access column arrays. 

1042 This optimizes compared to using `iget_values` by converting each 

1043 

1044 Warning! This doesn't handle Copy-on-Write, so should be used with 

1045 caution (current use case of consuming this in the JSON code is fine). 

1046 """ 

1047 # This is an optimized equivalent to 

1048 # result = [self.iget_values(i) for i in range(len(self.items))] 

1049 result: list[np.ndarray | None] = [None] * len(self.items) 

1050 

1051 for blk in self.blocks: 

1052 mgr_locs = blk._mgr_locs 

1053 values = blk.array_values._values_for_json() 

1054 if values.ndim == 1: 

1055 # TODO(EA2D): special casing not needed with 2D EAs 

1056 result[mgr_locs[0]] = values 

1057 

1058 else: 

1059 for i, loc in enumerate(mgr_locs): 

1060 result[loc] = values[i] 

1061 

1062 # error: Incompatible return value type (got "List[None]", 

1063 # expected "List[ndarray[Any, Any]]") 

1064 return result # type: ignore[return-value] 

1065 

1066 def iset( 

1067 self, 

1068 loc: int | slice | np.ndarray, 

1069 value: ArrayLike, 

1070 inplace: bool = False, 

1071 refs: BlockValuesRefs | None = None, 

1072 ) -> None: 

1073 """ 

1074 Set new item in-place. Does not consolidate. Adds new Block if not 

1075 contained in the current set of items 

1076 """ 

1077 

1078 # FIXME: refactor, clearly separate broadcasting & zip-like assignment 

1079 # can prob also fix the various if tests for sparse/categorical 

1080 if self._blklocs is None and self.ndim > 1: 

1081 self._rebuild_blknos_and_blklocs() 

1082 

1083 # Note: we exclude DTA/TDA here 

1084 value_is_extension_type = is_1d_only_ea_dtype(value.dtype) 

1085 if not value_is_extension_type: 

1086 if value.ndim == 2: 

1087 value = value.T 

1088 else: 

1089 value = ensure_block_shape(value, ndim=2) 

1090 

1091 if value.shape[1:] != self.shape[1:]: 

1092 raise AssertionError( 

1093 "Shape of new values must be compatible with manager shape" 

1094 ) 

1095 

1096 if lib.is_integer(loc): 

1097 # We have 6 tests where loc is _not_ an int. 

1098 # In this case, get_blkno_placements will yield only one tuple, 

1099 # containing (self._blknos[loc], BlockPlacement(slice(0, 1, 1))) 

1100 

1101 # Check if we can use _iset_single fastpath 

1102 loc = cast(int, loc) 

1103 blkno = self.blknos[loc] 

1104 blk = self.blocks[blkno] 

1105 if len(blk._mgr_locs) == 1: # TODO: fastest way to check this? 

1106 return self._iset_single( 

1107 loc, 

1108 value, 

1109 inplace=inplace, 

1110 blkno=blkno, 

1111 blk=blk, 

1112 refs=refs, 

1113 ) 

1114 

1115 # error: Incompatible types in assignment (expression has type 

1116 # "List[Union[int, slice, ndarray]]", variable has type "Union[int, 

1117 # slice, ndarray]") 

1118 loc = [loc] # type: ignore[assignment] 

1119 

1120 # categorical/sparse/datetimetz 

1121 if value_is_extension_type: 

1122 

1123 def value_getitem(placement): 

1124 return value 

1125 

1126 else: 

1127 

1128 def value_getitem(placement): 

1129 return value[placement.indexer] 

1130 

1131 # Accessing public blknos ensures the public versions are initialized 

1132 blknos = self.blknos[loc] 

1133 blklocs = self.blklocs[loc].copy() 

1134 

1135 unfit_mgr_locs = [] 

1136 unfit_val_locs = [] 

1137 removed_blknos = [] 

1138 for blkno_l, val_locs in libinternals.get_blkno_placements(blknos, group=True): 

1139 blk = self.blocks[blkno_l] 

1140 blk_locs = blklocs[val_locs.indexer] 

1141 if inplace and blk.should_store(value): 

1142 # Updating inplace -> check if we need to do Copy-on-Write 

1143 if using_copy_on_write() and not self._has_no_reference_block(blkno_l): 

1144 self._iset_split_block( 

1145 blkno_l, blk_locs, value_getitem(val_locs), refs=refs 

1146 ) 

1147 else: 

1148 blk.set_inplace(blk_locs, value_getitem(val_locs)) 

1149 continue 

1150 else: 

1151 unfit_mgr_locs.append(blk.mgr_locs.as_array[blk_locs]) 

1152 unfit_val_locs.append(val_locs) 

1153 

1154 # If all block items are unfit, schedule the block for removal. 

1155 if len(val_locs) == len(blk.mgr_locs): 

1156 removed_blknos.append(blkno_l) 

1157 continue 

1158 else: 

1159 # Defer setting the new values to enable consolidation 

1160 self._iset_split_block(blkno_l, blk_locs, refs=refs) 

1161 

1162 if len(removed_blknos): 

1163 # Remove blocks & update blknos accordingly 

1164 is_deleted = np.zeros(self.nblocks, dtype=np.bool_) 

1165 is_deleted[removed_blknos] = True 

1166 

1167 new_blknos = np.empty(self.nblocks, dtype=np.intp) 

1168 new_blknos.fill(-1) 

1169 new_blknos[~is_deleted] = np.arange(self.nblocks - len(removed_blknos)) 

1170 self._blknos = new_blknos[self._blknos] 

1171 self.blocks = tuple( 

1172 blk for i, blk in enumerate(self.blocks) if i not in set(removed_blknos) 

1173 ) 

1174 

1175 if unfit_val_locs: 

1176 unfit_idxr = np.concatenate(unfit_mgr_locs) 

1177 unfit_count = len(unfit_idxr) 

1178 

1179 new_blocks: list[Block] = [] 

1180 if value_is_extension_type: 

1181 # This code (ab-)uses the fact that EA blocks contain only 

1182 # one item. 

1183 # TODO(EA2D): special casing unnecessary with 2D EAs 

1184 new_blocks.extend( 

1185 new_block_2d( 

1186 values=value, 

1187 placement=BlockPlacement(slice(mgr_loc, mgr_loc + 1)), 

1188 refs=refs, 

1189 ) 

1190 for mgr_loc in unfit_idxr 

1191 ) 

1192 

1193 self._blknos[unfit_idxr] = np.arange(unfit_count) + len(self.blocks) 

1194 self._blklocs[unfit_idxr] = 0 

1195 

1196 else: 

1197 # unfit_val_locs contains BlockPlacement objects 

1198 unfit_val_items = unfit_val_locs[0].append(unfit_val_locs[1:]) 

1199 

1200 new_blocks.append( 

1201 new_block_2d( 

1202 values=value_getitem(unfit_val_items), 

1203 placement=BlockPlacement(unfit_idxr), 

1204 refs=refs, 

1205 ) 

1206 ) 

1207 

1208 self._blknos[unfit_idxr] = len(self.blocks) 

1209 self._blklocs[unfit_idxr] = np.arange(unfit_count) 

1210 

1211 self.blocks += tuple(new_blocks) 

1212 

1213 # Newly created block's dtype may already be present. 

1214 self._known_consolidated = False 

1215 

1216 def _iset_split_block( 

1217 self, 

1218 blkno_l: int, 

1219 blk_locs: np.ndarray | list[int], 

1220 value: ArrayLike | None = None, 

1221 refs: BlockValuesRefs | None = None, 

1222 ) -> None: 

1223 """Removes columns from a block by splitting the block. 

1224 

1225 Avoids copying the whole block through slicing and updates the manager 

1226 after determinint the new block structure. Optionally adds a new block, 

1227 otherwise has to be done by the caller. 

1228 

1229 Parameters 

1230 ---------- 

1231 blkno_l: The block number to operate on, relevant for updating the manager 

1232 blk_locs: The locations of our block that should be deleted. 

1233 value: The value to set as a replacement. 

1234 refs: The reference tracking object of the value to set. 

1235 """ 

1236 blk = self.blocks[blkno_l] 

1237 

1238 if self._blklocs is None: 

1239 self._rebuild_blknos_and_blklocs() 

1240 

1241 nbs_tup = tuple(blk.delete(blk_locs)) 

1242 if value is not None: 

1243 locs = blk.mgr_locs.as_array[blk_locs] 

1244 first_nb = new_block_2d(value, BlockPlacement(locs), refs=refs) 

1245 else: 

1246 first_nb = nbs_tup[0] 

1247 nbs_tup = tuple(nbs_tup[1:]) 

1248 

1249 nr_blocks = len(self.blocks) 

1250 blocks_tup = ( 

1251 self.blocks[:blkno_l] + (first_nb,) + self.blocks[blkno_l + 1 :] + nbs_tup 

1252 ) 

1253 self.blocks = blocks_tup 

1254 

1255 if not nbs_tup and value is not None: 

1256 # No need to update anything if split did not happen 

1257 return 

1258 

1259 self._blklocs[first_nb.mgr_locs.indexer] = np.arange(len(first_nb)) 

1260 

1261 for i, nb in enumerate(nbs_tup): 

1262 self._blklocs[nb.mgr_locs.indexer] = np.arange(len(nb)) 

1263 self._blknos[nb.mgr_locs.indexer] = i + nr_blocks 

1264 

1265 def _iset_single( 

1266 self, 

1267 loc: int, 

1268 value: ArrayLike, 

1269 inplace: bool, 

1270 blkno: int, 

1271 blk: Block, 

1272 refs: BlockValuesRefs | None = None, 

1273 ) -> None: 

1274 """ 

1275 Fastpath for iset when we are only setting a single position and 

1276 the Block currently in that position is itself single-column. 

1277 

1278 In this case we can swap out the entire Block and blklocs and blknos 

1279 are unaffected. 

1280 """ 

1281 # Caller is responsible for verifying value.shape 

1282 

1283 if inplace and blk.should_store(value): 

1284 copy = False 

1285 if using_copy_on_write() and not self._has_no_reference_block(blkno): 

1286 # perform Copy-on-Write and clear the reference 

1287 copy = True 

1288 iloc = self.blklocs[loc] 

1289 blk.set_inplace(slice(iloc, iloc + 1), value, copy=copy) 

1290 return 

1291 

1292 nb = new_block_2d(value, placement=blk._mgr_locs, refs=refs) 

1293 old_blocks = self.blocks 

1294 new_blocks = old_blocks[:blkno] + (nb,) + old_blocks[blkno + 1 :] 

1295 self.blocks = new_blocks 

1296 return 

1297 

1298 def column_setitem( 

1299 self, loc: int, idx: int | slice | np.ndarray, value, inplace_only: bool = False 

1300 ) -> None: 

1301 """ 

1302 Set values ("setitem") into a single column (not setting the full column). 

1303 

1304 This is a method on the BlockManager level, to avoid creating an 

1305 intermediate Series at the DataFrame level (`s = df[loc]; s[idx] = value`) 

1306 """ 

1307 needs_to_warn = False 

1308 if warn_copy_on_write() and not self._has_no_reference(loc): 

1309 if not isinstance( 

1310 self.blocks[self.blknos[loc]].values, 

1311 (ArrowExtensionArray, ArrowStringArray), 

1312 ): 

1313 # We might raise if we are in an expansion case, so defer 

1314 # warning till we actually updated 

1315 needs_to_warn = True 

1316 

1317 elif using_copy_on_write() and not self._has_no_reference(loc): 

1318 blkno = self.blknos[loc] 

1319 # Split blocks to only copy the column we want to modify 

1320 blk_loc = self.blklocs[loc] 

1321 # Copy our values 

1322 values = self.blocks[blkno].values 

1323 if values.ndim == 1: 

1324 values = values.copy() 

1325 else: 

1326 # Use [blk_loc] as indexer to keep ndim=2, this already results in a 

1327 # copy 

1328 values = values[[blk_loc]] 

1329 self._iset_split_block(blkno, [blk_loc], values) 

1330 

1331 # this manager is only created temporarily to mutate the values in place 

1332 # so don't track references, otherwise the `setitem` would perform CoW again 

1333 col_mgr = self.iget(loc, track_ref=False) 

1334 if inplace_only: 

1335 col_mgr.setitem_inplace(idx, value) 

1336 else: 

1337 new_mgr = col_mgr.setitem((idx,), value) 

1338 self.iset(loc, new_mgr._block.values, inplace=True) 

1339 

1340 if needs_to_warn: 

1341 warnings.warn( 

1342 COW_WARNING_GENERAL_MSG, 

1343 FutureWarning, 

1344 stacklevel=find_stack_level(), 

1345 ) 

1346 

1347 def insert(self, loc: int, item: Hashable, value: ArrayLike, refs=None) -> None: 

1348 """ 

1349 Insert item at selected position. 

1350 

1351 Parameters 

1352 ---------- 

1353 loc : int 

1354 item : hashable 

1355 value : np.ndarray or ExtensionArray 

1356 refs : The reference tracking object of the value to set. 

1357 """ 

1358 with warnings.catch_warnings(): 

1359 # TODO: re-issue this with setitem-specific message? 

1360 warnings.filterwarnings( 

1361 "ignore", 

1362 "The behavior of Index.insert with object-dtype is deprecated", 

1363 category=FutureWarning, 

1364 ) 

1365 new_axis = self.items.insert(loc, item) 

1366 

1367 if value.ndim == 2: 

1368 value = value.T 

1369 if len(value) > 1: 

1370 raise ValueError( 

1371 f"Expected a 1D array, got an array with shape {value.T.shape}" 

1372 ) 

1373 else: 

1374 value = ensure_block_shape(value, ndim=self.ndim) 

1375 

1376 bp = BlockPlacement(slice(loc, loc + 1)) 

1377 block = new_block_2d(values=value, placement=bp, refs=refs) 

1378 

1379 if not len(self.blocks): 

1380 # Fastpath 

1381 self._blklocs = np.array([0], dtype=np.intp) 

1382 self._blknos = np.array([0], dtype=np.intp) 

1383 else: 

1384 self._insert_update_mgr_locs(loc) 

1385 self._insert_update_blklocs_and_blknos(loc) 

1386 

1387 self.axes[0] = new_axis 

1388 self.blocks += (block,) 

1389 

1390 self._known_consolidated = False 

1391 

1392 if sum(not block.is_extension for block in self.blocks) > 100: 

1393 warnings.warn( 

1394 "DataFrame is highly fragmented. This is usually the result " 

1395 "of calling `frame.insert` many times, which has poor performance. " 

1396 "Consider joining all columns at once using pd.concat(axis=1) " 

1397 "instead. To get a de-fragmented frame, use `newframe = frame.copy()`", 

1398 PerformanceWarning, 

1399 stacklevel=find_stack_level(), 

1400 ) 

1401 

1402 def _insert_update_mgr_locs(self, loc) -> None: 

1403 """ 

1404 When inserting a new Block at location 'loc', we increment 

1405 all of the mgr_locs of blocks above that by one. 

1406 """ 

1407 for blkno, count in _fast_count_smallints(self.blknos[loc:]): 

1408 # .620 this way, .326 of which is in increment_above 

1409 blk = self.blocks[blkno] 

1410 blk._mgr_locs = blk._mgr_locs.increment_above(loc) 

1411 

1412 def _insert_update_blklocs_and_blknos(self, loc) -> None: 

1413 """ 

1414 When inserting a new Block at location 'loc', we update our 

1415 _blklocs and _blknos. 

1416 """ 

1417 

1418 # Accessing public blklocs ensures the public versions are initialized 

1419 if loc == self.blklocs.shape[0]: 

1420 # np.append is a lot faster, let's use it if we can. 

1421 self._blklocs = np.append(self._blklocs, 0) 

1422 self._blknos = np.append(self._blknos, len(self.blocks)) 

1423 elif loc == 0: 

1424 # np.append is a lot faster, let's use it if we can. 

1425 self._blklocs = np.append(self._blklocs[::-1], 0)[::-1] 

1426 self._blknos = np.append(self._blknos[::-1], len(self.blocks))[::-1] 

1427 else: 

1428 new_blklocs, new_blknos = libinternals.update_blklocs_and_blknos( 

1429 self.blklocs, self.blknos, loc, len(self.blocks) 

1430 ) 

1431 self._blklocs = new_blklocs 

1432 self._blknos = new_blknos 

1433 

1434 def idelete(self, indexer) -> BlockManager: 

1435 """ 

1436 Delete selected locations, returning a new BlockManager. 

1437 """ 

1438 is_deleted = np.zeros(self.shape[0], dtype=np.bool_) 

1439 is_deleted[indexer] = True 

1440 taker = (~is_deleted).nonzero()[0] 

1441 

1442 nbs = self._slice_take_blocks_ax0(taker, only_slice=True, ref_inplace_op=True) 

1443 new_columns = self.items[~is_deleted] 

1444 axes = [new_columns, self.axes[1]] 

1445 return type(self)(tuple(nbs), axes, verify_integrity=False) 

1446 

1447 # ---------------------------------------------------------------- 

1448 # Block-wise Operation 

1449 

1450 def grouped_reduce(self, func: Callable) -> Self: 

1451 """ 

1452 Apply grouped reduction function blockwise, returning a new BlockManager. 

1453 

1454 Parameters 

1455 ---------- 

1456 func : grouped reduction function 

1457 

1458 Returns 

1459 ------- 

1460 BlockManager 

1461 """ 

1462 result_blocks: list[Block] = [] 

1463 

1464 for blk in self.blocks: 

1465 if blk.is_object: 

1466 # split on object-dtype blocks bc some columns may raise 

1467 # while others do not. 

1468 for sb in blk._split(): 

1469 applied = sb.apply(func) 

1470 result_blocks = extend_blocks(applied, result_blocks) 

1471 else: 

1472 applied = blk.apply(func) 

1473 result_blocks = extend_blocks(applied, result_blocks) 

1474 

1475 if len(result_blocks) == 0: 

1476 nrows = 0 

1477 else: 

1478 nrows = result_blocks[0].values.shape[-1] 

1479 index = Index(range(nrows)) 

1480 

1481 return type(self).from_blocks(result_blocks, [self.axes[0], index]) 

1482 

1483 def reduce(self, func: Callable) -> Self: 

1484 """ 

1485 Apply reduction function blockwise, returning a single-row BlockManager. 

1486 

1487 Parameters 

1488 ---------- 

1489 func : reduction function 

1490 

1491 Returns 

1492 ------- 

1493 BlockManager 

1494 """ 

1495 # If 2D, we assume that we're operating column-wise 

1496 assert self.ndim == 2 

1497 

1498 res_blocks: list[Block] = [] 

1499 for blk in self.blocks: 

1500 nbs = blk.reduce(func) 

1501 res_blocks.extend(nbs) 

1502 

1503 index = Index([None]) # placeholder 

1504 new_mgr = type(self).from_blocks(res_blocks, [self.items, index]) 

1505 return new_mgr 

1506 

1507 def operate_blockwise(self, other: BlockManager, array_op) -> BlockManager: 

1508 """ 

1509 Apply array_op blockwise with another (aligned) BlockManager. 

1510 """ 

1511 return operate_blockwise(self, other, array_op) 

1512 

1513 def _equal_values(self: BlockManager, other: BlockManager) -> bool: 

1514 """ 

1515 Used in .equals defined in base class. Only check the column values 

1516 assuming shape and indexes have already been checked. 

1517 """ 

1518 return blockwise_all(self, other, array_equals) 

1519 

1520 def quantile( 

1521 self, 

1522 *, 

1523 qs: Index, # with dtype float 64 

1524 interpolation: QuantileInterpolation = "linear", 

1525 ) -> Self: 

1526 """ 

1527 Iterate over blocks applying quantile reduction. 

1528 This routine is intended for reduction type operations and 

1529 will do inference on the generated blocks. 

1530 

1531 Parameters 

1532 ---------- 

1533 interpolation : type of interpolation, default 'linear' 

1534 qs : list of the quantiles to be computed 

1535 

1536 Returns 

1537 ------- 

1538 BlockManager 

1539 """ 

1540 # Series dispatches to DataFrame for quantile, which allows us to 

1541 # simplify some of the code here and in the blocks 

1542 assert self.ndim >= 2 

1543 assert is_list_like(qs) # caller is responsible for this 

1544 

1545 new_axes = list(self.axes) 

1546 new_axes[1] = Index(qs, dtype=np.float64) 

1547 

1548 blocks = [ 

1549 blk.quantile(qs=qs, interpolation=interpolation) for blk in self.blocks 

1550 ] 

1551 

1552 return type(self)(blocks, new_axes) 

1553 

1554 # ---------------------------------------------------------------- 

1555 

1556 def unstack(self, unstacker, fill_value) -> BlockManager: 

1557 """ 

1558 Return a BlockManager with all blocks unstacked. 

1559 

1560 Parameters 

1561 ---------- 

1562 unstacker : reshape._Unstacker 

1563 fill_value : Any 

1564 fill_value for newly introduced missing values. 

1565 

1566 Returns 

1567 ------- 

1568 unstacked : BlockManager 

1569 """ 

1570 new_columns = unstacker.get_new_columns(self.items) 

1571 new_index = unstacker.new_index 

1572 

1573 allow_fill = not unstacker.mask_all 

1574 if allow_fill: 

1575 # calculating the full mask once and passing it to Block._unstack is 

1576 # faster than letting calculating it in each repeated call 

1577 new_mask2D = (~unstacker.mask).reshape(*unstacker.full_shape) 

1578 needs_masking = new_mask2D.any(axis=0) 

1579 else: 

1580 needs_masking = np.zeros(unstacker.full_shape[1], dtype=bool) 

1581 

1582 new_blocks: list[Block] = [] 

1583 columns_mask: list[np.ndarray] = [] 

1584 

1585 if len(self.items) == 0: 

1586 factor = 1 

1587 else: 

1588 fac = len(new_columns) / len(self.items) 

1589 assert fac == int(fac) 

1590 factor = int(fac) 

1591 

1592 for blk in self.blocks: 

1593 mgr_locs = blk.mgr_locs 

1594 new_placement = mgr_locs.tile_for_unstack(factor) 

1595 

1596 blocks, mask = blk._unstack( 

1597 unstacker, 

1598 fill_value, 

1599 new_placement=new_placement, 

1600 needs_masking=needs_masking, 

1601 ) 

1602 

1603 new_blocks.extend(blocks) 

1604 columns_mask.extend(mask) 

1605 

1606 # Block._unstack should ensure this holds, 

1607 assert mask.sum() == sum(len(nb._mgr_locs) for nb in blocks) 

1608 # In turn this ensures that in the BlockManager call below 

1609 # we have len(new_columns) == sum(x.shape[0] for x in new_blocks) 

1610 # which suffices to allow us to pass verify_inegrity=False 

1611 

1612 new_columns = new_columns[columns_mask] 

1613 

1614 bm = BlockManager(new_blocks, [new_columns, new_index], verify_integrity=False) 

1615 return bm 

1616 

1617 def to_dict(self) -> dict[str, Self]: 

1618 """ 

1619 Return a dict of str(dtype) -> BlockManager 

1620 

1621 Returns 

1622 ------- 

1623 values : a dict of dtype -> BlockManager 

1624 """ 

1625 

1626 bd: dict[str, list[Block]] = {} 

1627 for b in self.blocks: 

1628 bd.setdefault(str(b.dtype), []).append(b) 

1629 

1630 # TODO(EA2D): the combine will be unnecessary with 2D EAs 

1631 return {dtype: self._combine(blocks) for dtype, blocks in bd.items()} 

1632 

1633 def as_array( 

1634 self, 

1635 dtype: np.dtype | None = None, 

1636 copy: bool = False, 

1637 na_value: object = lib.no_default, 

1638 ) -> np.ndarray: 

1639 """ 

1640 Convert the blockmanager data into an numpy array. 

1641 

1642 Parameters 

1643 ---------- 

1644 dtype : np.dtype or None, default None 

1645 Data type of the return array. 

1646 copy : bool, default False 

1647 If True then guarantee that a copy is returned. A value of 

1648 False does not guarantee that the underlying data is not 

1649 copied. 

1650 na_value : object, default lib.no_default 

1651 Value to be used as the missing value sentinel. 

1652 

1653 Returns 

1654 ------- 

1655 arr : ndarray 

1656 """ 

1657 passed_nan = lib.is_float(na_value) and isna(na_value) 

1658 

1659 if len(self.blocks) == 0: 

1660 arr = np.empty(self.shape, dtype=float) 

1661 return arr.transpose() 

1662 

1663 if self.is_single_block: 

1664 blk = self.blocks[0] 

1665 

1666 if na_value is not lib.no_default: 

1667 # We want to copy when na_value is provided to avoid 

1668 # mutating the original object 

1669 if lib.is_np_dtype(blk.dtype, "f") and passed_nan: 

1670 # We are already numpy-float and na_value=np.nan 

1671 pass 

1672 else: 

1673 copy = True 

1674 

1675 if blk.is_extension: 

1676 # Avoid implicit conversion of extension blocks to object 

1677 

1678 # error: Item "ndarray" of "Union[ndarray, ExtensionArray]" has no 

1679 # attribute "to_numpy" 

1680 arr = blk.values.to_numpy( # type: ignore[union-attr] 

1681 dtype=dtype, 

1682 na_value=na_value, 

1683 copy=copy, 

1684 ).reshape(blk.shape) 

1685 elif not copy: 

1686 arr = np.asarray(blk.values, dtype=dtype) 

1687 else: 

1688 arr = np.array(blk.values, dtype=dtype, copy=copy) 

1689 

1690 if using_copy_on_write() and not copy: 

1691 arr = arr.view() 

1692 arr.flags.writeable = False 

1693 else: 

1694 arr = self._interleave(dtype=dtype, na_value=na_value) 

1695 # The underlying data was copied within _interleave, so no need 

1696 # to further copy if copy=True or setting na_value 

1697 

1698 if na_value is lib.no_default: 

1699 pass 

1700 elif arr.dtype.kind == "f" and passed_nan: 

1701 pass 

1702 else: 

1703 arr[isna(arr)] = na_value 

1704 

1705 return arr.transpose() 

1706 

1707 def _interleave( 

1708 self, 

1709 dtype: np.dtype | None = None, 

1710 na_value: object = lib.no_default, 

1711 ) -> np.ndarray: 

1712 """ 

1713 Return ndarray from blocks with specified item order 

1714 Items must be contained in the blocks 

1715 """ 

1716 if not dtype: 

1717 # Incompatible types in assignment (expression has type 

1718 # "Optional[Union[dtype[Any], ExtensionDtype]]", variable has 

1719 # type "Optional[dtype[Any]]") 

1720 dtype = interleaved_dtype( # type: ignore[assignment] 

1721 [blk.dtype for blk in self.blocks] 

1722 ) 

1723 

1724 # error: Argument 1 to "ensure_np_dtype" has incompatible type 

1725 # "Optional[dtype[Any]]"; expected "Union[dtype[Any], ExtensionDtype]" 

1726 dtype = ensure_np_dtype(dtype) # type: ignore[arg-type] 

1727 result = np.empty(self.shape, dtype=dtype) 

1728 

1729 itemmask = np.zeros(self.shape[0]) 

1730 

1731 if dtype == np.dtype("object") and na_value is lib.no_default: 

1732 # much more performant than using to_numpy below 

1733 for blk in self.blocks: 

1734 rl = blk.mgr_locs 

1735 arr = blk.get_values(dtype) 

1736 result[rl.indexer] = arr 

1737 itemmask[rl.indexer] = 1 

1738 return result 

1739 

1740 for blk in self.blocks: 

1741 rl = blk.mgr_locs 

1742 if blk.is_extension: 

1743 # Avoid implicit conversion of extension blocks to object 

1744 

1745 # error: Item "ndarray" of "Union[ndarray, ExtensionArray]" has no 

1746 # attribute "to_numpy" 

1747 arr = blk.values.to_numpy( # type: ignore[union-attr] 

1748 dtype=dtype, 

1749 na_value=na_value, 

1750 ) 

1751 else: 

1752 arr = blk.get_values(dtype) 

1753 result[rl.indexer] = arr 

1754 itemmask[rl.indexer] = 1 

1755 

1756 if not itemmask.all(): 

1757 raise AssertionError("Some items were not contained in blocks") 

1758 

1759 return result 

1760 

1761 # ---------------------------------------------------------------- 

1762 # Consolidation 

1763 

1764 def is_consolidated(self) -> bool: 

1765 """ 

1766 Return True if more than one block with the same dtype 

1767 """ 

1768 if not self._known_consolidated: 

1769 self._consolidate_check() 

1770 return self._is_consolidated 

1771 

1772 def _consolidate_check(self) -> None: 

1773 if len(self.blocks) == 1: 

1774 # fastpath 

1775 self._is_consolidated = True 

1776 self._known_consolidated = True 

1777 return 

1778 dtypes = [blk.dtype for blk in self.blocks if blk._can_consolidate] 

1779 self._is_consolidated = len(dtypes) == len(set(dtypes)) 

1780 self._known_consolidated = True 

1781 

1782 def _consolidate_inplace(self) -> None: 

1783 # In general, _consolidate_inplace should only be called via 

1784 # DataFrame._consolidate_inplace, otherwise we will fail to invalidate 

1785 # the DataFrame's _item_cache. The exception is for newly-created 

1786 # BlockManager objects not yet attached to a DataFrame. 

1787 if not self.is_consolidated(): 

1788 self.blocks = _consolidate(self.blocks) 

1789 self._is_consolidated = True 

1790 self._known_consolidated = True 

1791 self._rebuild_blknos_and_blklocs() 

1792 

1793 # ---------------------------------------------------------------- 

1794 # Concatenation 

1795 

1796 @classmethod 

1797 def concat_horizontal(cls, mgrs: list[Self], axes: list[Index]) -> Self: 

1798 """ 

1799 Concatenate uniformly-indexed BlockManagers horizontally. 

1800 """ 

1801 offset = 0 

1802 blocks: list[Block] = [] 

1803 for mgr in mgrs: 

1804 for blk in mgr.blocks: 

1805 # We need to do getitem_block here otherwise we would be altering 

1806 # blk.mgr_locs in place, which would render it invalid. This is only 

1807 # relevant in the copy=False case. 

1808 nb = blk.slice_block_columns(slice(None)) 

1809 nb._mgr_locs = nb._mgr_locs.add(offset) 

1810 blocks.append(nb) 

1811 

1812 offset += len(mgr.items) 

1813 

1814 new_mgr = cls(tuple(blocks), axes) 

1815 return new_mgr 

1816 

1817 @classmethod 

1818 def concat_vertical(cls, mgrs: list[Self], axes: list[Index]) -> Self: 

1819 """ 

1820 Concatenate uniformly-indexed BlockManagers vertically. 

1821 """ 

1822 raise NotImplementedError("This logic lives (for now) in internals.concat") 

1823 

1824 

1825class SingleBlockManager(BaseBlockManager, SingleDataManager): 

1826 """manage a single block with""" 

1827 

1828 @property 

1829 def ndim(self) -> Literal[1]: 

1830 return 1 

1831 

1832 _is_consolidated = True 

1833 _known_consolidated = True 

1834 __slots__ = () 

1835 is_single_block = True 

1836 

1837 def __init__( 

1838 self, 

1839 block: Block, 

1840 axis: Index, 

1841 verify_integrity: bool = False, 

1842 ) -> None: 

1843 # Assertions disabled for performance 

1844 # assert isinstance(block, Block), type(block) 

1845 # assert isinstance(axis, Index), type(axis) 

1846 

1847 self.axes = [axis] 

1848 self.blocks = (block,) 

1849 

1850 @classmethod 

1851 def from_blocks( 

1852 cls, 

1853 blocks: list[Block], 

1854 axes: list[Index], 

1855 ) -> Self: 

1856 """ 

1857 Constructor for BlockManager and SingleBlockManager with same signature. 

1858 """ 

1859 assert len(blocks) == 1 

1860 assert len(axes) == 1 

1861 return cls(blocks[0], axes[0], verify_integrity=False) 

1862 

1863 @classmethod 

1864 def from_array( 

1865 cls, array: ArrayLike, index: Index, refs: BlockValuesRefs | None = None 

1866 ) -> SingleBlockManager: 

1867 """ 

1868 Constructor for if we have an array that is not yet a Block. 

1869 """ 

1870 array = maybe_coerce_values(array) 

1871 bp = BlockPlacement(slice(0, len(index))) 

1872 block = new_block(array, placement=bp, ndim=1, refs=refs) 

1873 return cls(block, index) 

1874 

1875 def to_2d_mgr(self, columns: Index) -> BlockManager: 

1876 """ 

1877 Manager analogue of Series.to_frame 

1878 """ 

1879 blk = self.blocks[0] 

1880 arr = ensure_block_shape(blk.values, ndim=2) 

1881 bp = BlockPlacement(0) 

1882 new_blk = type(blk)(arr, placement=bp, ndim=2, refs=blk.refs) 

1883 axes = [columns, self.axes[0]] 

1884 return BlockManager([new_blk], axes=axes, verify_integrity=False) 

1885 

1886 def _has_no_reference(self, i: int = 0) -> bool: 

1887 """ 

1888 Check for column `i` if it has references. 

1889 (whether it references another array or is itself being referenced) 

1890 Returns True if the column has no references. 

1891 """ 

1892 return not self.blocks[0].refs.has_reference() 

1893 

1894 def __getstate__(self): 

1895 block_values = [b.values for b in self.blocks] 

1896 block_items = [self.items[b.mgr_locs.indexer] for b in self.blocks] 

1897 axes_array = list(self.axes) 

1898 

1899 extra_state = { 

1900 "0.14.1": { 

1901 "axes": axes_array, 

1902 "blocks": [ 

1903 {"values": b.values, "mgr_locs": b.mgr_locs.indexer} 

1904 for b in self.blocks 

1905 ], 

1906 } 

1907 } 

1908 

1909 # First three elements of the state are to maintain forward 

1910 # compatibility with 0.13.1. 

1911 return axes_array, block_values, block_items, extra_state 

1912 

1913 def __setstate__(self, state) -> None: 

1914 def unpickle_block(values, mgr_locs, ndim: int) -> Block: 

1915 # TODO(EA2D): ndim would be unnecessary with 2D EAs 

1916 # older pickles may store e.g. DatetimeIndex instead of DatetimeArray 

1917 values = extract_array(values, extract_numpy=True) 

1918 if not isinstance(mgr_locs, BlockPlacement): 

1919 mgr_locs = BlockPlacement(mgr_locs) 

1920 

1921 values = maybe_coerce_values(values) 

1922 return new_block(values, placement=mgr_locs, ndim=ndim) 

1923 

1924 if isinstance(state, tuple) and len(state) >= 4 and "0.14.1" in state[3]: 

1925 state = state[3]["0.14.1"] 

1926 self.axes = [ensure_index(ax) for ax in state["axes"]] 

1927 ndim = len(self.axes) 

1928 self.blocks = tuple( 

1929 unpickle_block(b["values"], b["mgr_locs"], ndim=ndim) 

1930 for b in state["blocks"] 

1931 ) 

1932 else: 

1933 raise NotImplementedError("pre-0.14.1 pickles are no longer supported") 

1934 

1935 self._post_setstate() 

1936 

1937 def _post_setstate(self) -> None: 

1938 pass 

1939 

1940 @cache_readonly 

1941 def _block(self) -> Block: 

1942 return self.blocks[0] 

1943 

1944 @property 

1945 def _blknos(self): 

1946 """compat with BlockManager""" 

1947 return None 

1948 

1949 @property 

1950 def _blklocs(self): 

1951 """compat with BlockManager""" 

1952 return None 

1953 

1954 def get_rows_with_mask(self, indexer: npt.NDArray[np.bool_]) -> Self: 

1955 # similar to get_slice, but not restricted to slice indexer 

1956 blk = self._block 

1957 if using_copy_on_write() and len(indexer) > 0 and indexer.all(): 

1958 return type(self)(blk.copy(deep=False), self.index) 

1959 array = blk.values[indexer] 

1960 

1961 if isinstance(indexer, np.ndarray) and indexer.dtype.kind == "b": 

1962 # boolean indexing always gives a copy with numpy 

1963 refs = None 

1964 else: 

1965 # TODO(CoW) in theory only need to track reference if new_array is a view 

1966 refs = blk.refs 

1967 

1968 bp = BlockPlacement(slice(0, len(array))) 

1969 block = type(blk)(array, placement=bp, ndim=1, refs=refs) 

1970 

1971 new_idx = self.index[indexer] 

1972 return type(self)(block, new_idx) 

1973 

1974 def get_slice(self, slobj: slice, axis: AxisInt = 0) -> SingleBlockManager: 

1975 # Assertion disabled for performance 

1976 # assert isinstance(slobj, slice), type(slobj) 

1977 if axis >= self.ndim: 

1978 raise IndexError("Requested axis not found in manager") 

1979 

1980 blk = self._block 

1981 array = blk.values[slobj] 

1982 bp = BlockPlacement(slice(0, len(array))) 

1983 # TODO this method is only used in groupby SeriesSplitter at the moment, 

1984 # so passing refs is not yet covered by the tests 

1985 block = type(blk)(array, placement=bp, ndim=1, refs=blk.refs) 

1986 new_index = self.index._getitem_slice(slobj) 

1987 return type(self)(block, new_index) 

1988 

1989 @property 

1990 def index(self) -> Index: 

1991 return self.axes[0] 

1992 

1993 @property 

1994 def dtype(self) -> DtypeObj: 

1995 return self._block.dtype 

1996 

1997 def get_dtypes(self) -> npt.NDArray[np.object_]: 

1998 return np.array([self._block.dtype], dtype=object) 

1999 

2000 def external_values(self): 

2001 """The array that Series.values returns""" 

2002 return self._block.external_values() 

2003 

2004 def internal_values(self): 

2005 """The array that Series._values returns""" 

2006 return self._block.values 

2007 

2008 def array_values(self) -> ExtensionArray: 

2009 """The array that Series.array returns""" 

2010 return self._block.array_values 

2011 

2012 def get_numeric_data(self) -> Self: 

2013 if self._block.is_numeric: 

2014 return self.copy(deep=False) 

2015 return self.make_empty() 

2016 

2017 @property 

2018 def _can_hold_na(self) -> bool: 

2019 return self._block._can_hold_na 

2020 

2021 def setitem_inplace(self, indexer, value, warn: bool = True) -> None: 

2022 """ 

2023 Set values with indexer. 

2024 

2025 For Single[Block/Array]Manager, this backs s[indexer] = value 

2026 

2027 This is an inplace version of `setitem()`, mutating the manager/values 

2028 in place, not returning a new Manager (and Block), and thus never changing 

2029 the dtype. 

2030 """ 

2031 using_cow = using_copy_on_write() 

2032 warn_cow = warn_copy_on_write() 

2033 if (using_cow or warn_cow) and not self._has_no_reference(0): 

2034 if using_cow: 

2035 self.blocks = (self._block.copy(),) 

2036 self._cache.clear() 

2037 elif warn_cow and warn: 

2038 warnings.warn( 

2039 COW_WARNING_SETITEM_MSG, 

2040 FutureWarning, 

2041 stacklevel=find_stack_level(), 

2042 ) 

2043 

2044 super().setitem_inplace(indexer, value) 

2045 

2046 def idelete(self, indexer) -> SingleBlockManager: 

2047 """ 

2048 Delete single location from SingleBlockManager. 

2049 

2050 Ensures that self.blocks doesn't become empty. 

2051 """ 

2052 nb = self._block.delete(indexer)[0] 

2053 self.blocks = (nb,) 

2054 self.axes[0] = self.axes[0].delete(indexer) 

2055 self._cache.clear() 

2056 return self 

2057 

2058 def fast_xs(self, loc): 

2059 """ 

2060 fast path for getting a cross-section 

2061 return a view of the data 

2062 """ 

2063 raise NotImplementedError("Use series._values[loc] instead") 

2064 

2065 def set_values(self, values: ArrayLike) -> None: 

2066 """ 

2067 Set the values of the single block in place. 

2068 

2069 Use at your own risk! This does not check if the passed values are 

2070 valid for the current Block/SingleBlockManager (length, dtype, etc), 

2071 and this does not properly keep track of references. 

2072 """ 

2073 # NOTE(CoW) Currently this is only used for FrameColumnApply.series_generator 

2074 # which handles CoW by setting the refs manually if necessary 

2075 self.blocks[0].values = values 

2076 self.blocks[0]._mgr_locs = BlockPlacement(slice(len(values))) 

2077 

2078 def _equal_values(self, other: Self) -> bool: 

2079 """ 

2080 Used in .equals defined in base class. Only check the column values 

2081 assuming shape and indexes have already been checked. 

2082 """ 

2083 # For SingleBlockManager (i.e.Series) 

2084 if other.ndim != 1: 

2085 return False 

2086 left = self.blocks[0].values 

2087 right = other.blocks[0].values 

2088 return array_equals(left, right) 

2089 

2090 

2091# -------------------------------------------------------------------- 

2092# Constructor Helpers 

2093 

2094 

2095def create_block_manager_from_blocks( 

2096 blocks: list[Block], 

2097 axes: list[Index], 

2098 consolidate: bool = True, 

2099 verify_integrity: bool = True, 

2100) -> BlockManager: 

2101 # If verify_integrity=False, then caller is responsible for checking 

2102 # all(x.shape[-1] == len(axes[1]) for x in blocks) 

2103 # sum(x.shape[0] for x in blocks) == len(axes[0]) 

2104 # set(x for blk in blocks for x in blk.mgr_locs) == set(range(len(axes[0]))) 

2105 # all(blk.ndim == 2 for blk in blocks) 

2106 # This allows us to safely pass verify_integrity=False 

2107 

2108 try: 

2109 mgr = BlockManager(blocks, axes, verify_integrity=verify_integrity) 

2110 

2111 except ValueError as err: 

2112 arrays = [blk.values for blk in blocks] 

2113 tot_items = sum(arr.shape[0] for arr in arrays) 

2114 raise_construction_error(tot_items, arrays[0].shape[1:], axes, err) 

2115 

2116 if consolidate: 

2117 mgr._consolidate_inplace() 

2118 return mgr 

2119 

2120 

2121def create_block_manager_from_column_arrays( 

2122 arrays: list[ArrayLike], 

2123 axes: list[Index], 

2124 consolidate: bool, 

2125 refs: list, 

2126) -> BlockManager: 

2127 # Assertions disabled for performance (caller is responsible for verifying) 

2128 # assert isinstance(axes, list) 

2129 # assert all(isinstance(x, Index) for x in axes) 

2130 # assert all(isinstance(x, (np.ndarray, ExtensionArray)) for x in arrays) 

2131 # assert all(type(x) is not NumpyExtensionArray for x in arrays) 

2132 # assert all(x.ndim == 1 for x in arrays) 

2133 # assert all(len(x) == len(axes[1]) for x in arrays) 

2134 # assert len(arrays) == len(axes[0]) 

2135 # These last three are sufficient to allow us to safely pass 

2136 # verify_integrity=False below. 

2137 

2138 try: 

2139 blocks = _form_blocks(arrays, consolidate, refs) 

2140 mgr = BlockManager(blocks, axes, verify_integrity=False) 

2141 except ValueError as e: 

2142 raise_construction_error(len(arrays), arrays[0].shape, axes, e) 

2143 if consolidate: 

2144 mgr._consolidate_inplace() 

2145 return mgr 

2146 

2147 

2148def raise_construction_error( 

2149 tot_items: int, 

2150 block_shape: Shape, 

2151 axes: list[Index], 

2152 e: ValueError | None = None, 

2153): 

2154 """raise a helpful message about our construction""" 

2155 passed = tuple(map(int, [tot_items] + list(block_shape))) 

2156 # Correcting the user facing error message during dataframe construction 

2157 if len(passed) <= 2: 

2158 passed = passed[::-1] 

2159 

2160 implied = tuple(len(ax) for ax in axes) 

2161 # Correcting the user facing error message during dataframe construction 

2162 if len(implied) <= 2: 

2163 implied = implied[::-1] 

2164 

2165 # We return the exception object instead of raising it so that we 

2166 # can raise it in the caller; mypy plays better with that 

2167 if passed == implied and e is not None: 

2168 raise e 

2169 if block_shape[0] == 0: 

2170 raise ValueError("Empty data passed with indices specified.") 

2171 raise ValueError(f"Shape of passed values is {passed}, indices imply {implied}") 

2172 

2173 

2174# ----------------------------------------------------------------------- 

2175 

2176 

2177def _grouping_func(tup: tuple[int, ArrayLike]) -> tuple[int, DtypeObj]: 

2178 dtype = tup[1].dtype 

2179 

2180 if is_1d_only_ea_dtype(dtype): 

2181 # We know these won't be consolidated, so don't need to group these. 

2182 # This avoids expensive comparisons of CategoricalDtype objects 

2183 sep = id(dtype) 

2184 else: 

2185 sep = 0 

2186 

2187 return sep, dtype 

2188 

2189 

2190def _form_blocks(arrays: list[ArrayLike], consolidate: bool, refs: list) -> list[Block]: 

2191 tuples = list(enumerate(arrays)) 

2192 

2193 if not consolidate: 

2194 return _tuples_to_blocks_no_consolidate(tuples, refs) 

2195 

2196 # when consolidating, we can ignore refs (either stacking always copies, 

2197 # or the EA is already copied in the calling dict_to_mgr) 

2198 

2199 # group by dtype 

2200 grouper = itertools.groupby(tuples, _grouping_func) 

2201 

2202 nbs: list[Block] = [] 

2203 for (_, dtype), tup_block in grouper: 

2204 block_type = get_block_type(dtype) 

2205 

2206 if isinstance(dtype, np.dtype): 

2207 is_dtlike = dtype.kind in "mM" 

2208 

2209 if issubclass(dtype.type, (str, bytes)): 

2210 dtype = np.dtype(object) 

2211 

2212 values, placement = _stack_arrays(list(tup_block), dtype) 

2213 if is_dtlike: 

2214 values = ensure_wrapped_if_datetimelike(values) 

2215 blk = block_type(values, placement=BlockPlacement(placement), ndim=2) 

2216 nbs.append(blk) 

2217 

2218 elif is_1d_only_ea_dtype(dtype): 

2219 dtype_blocks = [ 

2220 block_type(x[1], placement=BlockPlacement(x[0]), ndim=2) 

2221 for x in tup_block 

2222 ] 

2223 nbs.extend(dtype_blocks) 

2224 

2225 else: 

2226 dtype_blocks = [ 

2227 block_type( 

2228 ensure_block_shape(x[1], 2), placement=BlockPlacement(x[0]), ndim=2 

2229 ) 

2230 for x in tup_block 

2231 ] 

2232 nbs.extend(dtype_blocks) 

2233 return nbs 

2234 

2235 

2236def _tuples_to_blocks_no_consolidate(tuples, refs) -> list[Block]: 

2237 # tuples produced within _form_blocks are of the form (placement, array) 

2238 return [ 

2239 new_block_2d( 

2240 ensure_block_shape(arr, ndim=2), placement=BlockPlacement(i), refs=ref 

2241 ) 

2242 for ((i, arr), ref) in zip(tuples, refs) 

2243 ] 

2244 

2245 

2246def _stack_arrays(tuples, dtype: np.dtype): 

2247 placement, arrays = zip(*tuples) 

2248 

2249 first = arrays[0] 

2250 shape = (len(arrays),) + first.shape 

2251 

2252 stacked = np.empty(shape, dtype=dtype) 

2253 for i, arr in enumerate(arrays): 

2254 stacked[i] = arr 

2255 

2256 return stacked, placement 

2257 

2258 

2259def _consolidate(blocks: tuple[Block, ...]) -> tuple[Block, ...]: 

2260 """ 

2261 Merge blocks having same dtype, exclude non-consolidating blocks 

2262 """ 

2263 # sort by _can_consolidate, dtype 

2264 gkey = lambda x: x._consolidate_key 

2265 grouper = itertools.groupby(sorted(blocks, key=gkey), gkey) 

2266 

2267 new_blocks: list[Block] = [] 

2268 for (_can_consolidate, dtype), group_blocks in grouper: 

2269 merged_blocks, _ = _merge_blocks( 

2270 list(group_blocks), dtype=dtype, can_consolidate=_can_consolidate 

2271 ) 

2272 new_blocks = extend_blocks(merged_blocks, new_blocks) 

2273 return tuple(new_blocks) 

2274 

2275 

2276def _merge_blocks( 

2277 blocks: list[Block], dtype: DtypeObj, can_consolidate: bool 

2278) -> tuple[list[Block], bool]: 

2279 if len(blocks) == 1: 

2280 return blocks, False 

2281 

2282 if can_consolidate: 

2283 # TODO: optimization potential in case all mgrs contain slices and 

2284 # combination of those slices is a slice, too. 

2285 new_mgr_locs = np.concatenate([b.mgr_locs.as_array for b in blocks]) 

2286 

2287 new_values: ArrayLike 

2288 

2289 if isinstance(blocks[0].dtype, np.dtype): 

2290 # error: List comprehension has incompatible type List[Union[ndarray, 

2291 # ExtensionArray]]; expected List[Union[complex, generic, 

2292 # Sequence[Union[int, float, complex, str, bytes, generic]], 

2293 # Sequence[Sequence[Any]], SupportsArray]] 

2294 new_values = np.vstack([b.values for b in blocks]) # type: ignore[misc] 

2295 else: 

2296 bvals = [blk.values for blk in blocks] 

2297 bvals2 = cast(Sequence[NDArrayBackedExtensionArray], bvals) 

2298 new_values = bvals2[0]._concat_same_type(bvals2, axis=0) 

2299 

2300 argsort = np.argsort(new_mgr_locs) 

2301 new_values = new_values[argsort] 

2302 new_mgr_locs = new_mgr_locs[argsort] 

2303 

2304 bp = BlockPlacement(new_mgr_locs) 

2305 return [new_block_2d(new_values, placement=bp)], True 

2306 

2307 # can't consolidate --> no merge 

2308 return blocks, False 

2309 

2310 

2311def _fast_count_smallints(arr: npt.NDArray[np.intp]): 

2312 """Faster version of set(arr) for sequences of small numbers.""" 

2313 counts = np.bincount(arr) 

2314 nz = counts.nonzero()[0] 

2315 # Note: list(zip(...) outperforms list(np.c_[nz, counts[nz]]) here, 

2316 # in one benchmark by a factor of 11 

2317 return zip(nz, counts[nz]) 

2318 

2319 

2320def _preprocess_slice_or_indexer( 

2321 slice_or_indexer: slice | np.ndarray, length: int, allow_fill: bool 

2322): 

2323 if isinstance(slice_or_indexer, slice): 

2324 return ( 

2325 "slice", 

2326 slice_or_indexer, 

2327 libinternals.slice_len(slice_or_indexer, length), 

2328 ) 

2329 else: 

2330 if ( 

2331 not isinstance(slice_or_indexer, np.ndarray) 

2332 or slice_or_indexer.dtype.kind != "i" 

2333 ): 

2334 dtype = getattr(slice_or_indexer, "dtype", None) 

2335 raise TypeError(type(slice_or_indexer), dtype) 

2336 

2337 indexer = ensure_platform_int(slice_or_indexer) 

2338 if not allow_fill: 

2339 indexer = maybe_convert_indices(indexer, length) 

2340 return "fancy", indexer, len(indexer) 

2341 

2342 

2343def make_na_array(dtype: DtypeObj, shape: Shape, fill_value) -> ArrayLike: 

2344 if isinstance(dtype, DatetimeTZDtype): 

2345 # NB: exclude e.g. pyarrow[dt64tz] dtypes 

2346 ts = Timestamp(fill_value).as_unit(dtype.unit) 

2347 i8values = np.full(shape, ts._value) 

2348 dt64values = i8values.view(f"M8[{dtype.unit}]") 

2349 return DatetimeArray._simple_new(dt64values, dtype=dtype) 

2350 

2351 elif is_1d_only_ea_dtype(dtype): 

2352 dtype = cast(ExtensionDtype, dtype) 

2353 cls = dtype.construct_array_type() 

2354 

2355 missing_arr = cls._from_sequence([], dtype=dtype) 

2356 ncols, nrows = shape 

2357 assert ncols == 1, ncols 

2358 empty_arr = -1 * np.ones((nrows,), dtype=np.intp) 

2359 return missing_arr.take(empty_arr, allow_fill=True, fill_value=fill_value) 

2360 elif isinstance(dtype, ExtensionDtype): 

2361 # TODO: no tests get here, a handful would if we disabled 

2362 # the dt64tz special-case above (which is faster) 

2363 cls = dtype.construct_array_type() 

2364 missing_arr = cls._empty(shape=shape, dtype=dtype) 

2365 missing_arr[:] = fill_value 

2366 return missing_arr 

2367 else: 

2368 # NB: we should never get here with dtype integer or bool; 

2369 # if we did, the missing_arr.fill would cast to gibberish 

2370 missing_arr = np.empty(shape, dtype=dtype) 

2371 missing_arr.fill(fill_value) 

2372 

2373 if dtype.kind in "mM": 

2374 missing_arr = ensure_wrapped_if_datetimelike(missing_arr) 

2375 return missing_arr