Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/pandas/core/internals/blocks.py: 37%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1084 statements  

1from __future__ import annotations 

2 

3from functools import wraps 

4import inspect 

5import re 

6from typing import ( 

7 TYPE_CHECKING, 

8 Any, 

9 Callable, 

10 Literal, 

11 cast, 

12 final, 

13) 

14import warnings 

15import weakref 

16 

17import numpy as np 

18 

19from pandas._config import ( 

20 get_option, 

21 using_copy_on_write, 

22 warn_copy_on_write, 

23) 

24 

25from pandas._libs import ( 

26 NaT, 

27 internals as libinternals, 

28 lib, 

29) 

30from pandas._libs.internals import ( 

31 BlockPlacement, 

32 BlockValuesRefs, 

33) 

34from pandas._libs.missing import NA 

35from pandas._typing import ( 

36 ArrayLike, 

37 AxisInt, 

38 DtypeBackend, 

39 DtypeObj, 

40 F, 

41 FillnaOptions, 

42 IgnoreRaise, 

43 InterpolateOptions, 

44 QuantileInterpolation, 

45 Self, 

46 Shape, 

47 npt, 

48) 

49from pandas.errors import AbstractMethodError 

50from pandas.util._decorators import cache_readonly 

51from pandas.util._exceptions import find_stack_level 

52from pandas.util._validators import validate_bool_kwarg 

53 

54from pandas.core.dtypes.astype import ( 

55 astype_array_safe, 

56 astype_is_view, 

57) 

58from pandas.core.dtypes.cast import ( 

59 LossySetitemError, 

60 can_hold_element, 

61 convert_dtypes, 

62 find_result_type, 

63 maybe_downcast_to_dtype, 

64 np_can_hold_element, 

65) 

66from pandas.core.dtypes.common import ( 

67 is_1d_only_ea_dtype, 

68 is_float_dtype, 

69 is_integer_dtype, 

70 is_list_like, 

71 is_scalar, 

72 is_string_dtype, 

73) 

74from pandas.core.dtypes.dtypes import ( 

75 DatetimeTZDtype, 

76 ExtensionDtype, 

77 IntervalDtype, 

78 NumpyEADtype, 

79 PeriodDtype, 

80) 

81from pandas.core.dtypes.generic import ( 

82 ABCDataFrame, 

83 ABCIndex, 

84 ABCNumpyExtensionArray, 

85 ABCSeries, 

86) 

87from pandas.core.dtypes.missing import ( 

88 is_valid_na_for_dtype, 

89 isna, 

90 na_value_for_dtype, 

91) 

92 

93from pandas.core import missing 

94import pandas.core.algorithms as algos 

95from pandas.core.array_algos.putmask import ( 

96 extract_bool_array, 

97 putmask_inplace, 

98 putmask_without_repeat, 

99 setitem_datetimelike_compat, 

100 validate_putmask, 

101) 

102from pandas.core.array_algos.quantile import quantile_compat 

103from pandas.core.array_algos.replace import ( 

104 compare_or_regex_search, 

105 replace_regex, 

106 should_use_regex, 

107) 

108from pandas.core.array_algos.transforms import shift 

109from pandas.core.arrays import ( 

110 Categorical, 

111 DatetimeArray, 

112 ExtensionArray, 

113 IntervalArray, 

114 NumpyExtensionArray, 

115 PeriodArray, 

116 TimedeltaArray, 

117) 

118from pandas.core.base import PandasObject 

119import pandas.core.common as com 

120from pandas.core.computation import expressions 

121from pandas.core.construction import ( 

122 ensure_wrapped_if_datetimelike, 

123 extract_array, 

124) 

125from pandas.core.indexers import check_setitem_lengths 

126from pandas.core.indexes.base import get_values_for_csv 

127 

128if TYPE_CHECKING: 

129 from collections.abc import ( 

130 Iterable, 

131 Sequence, 

132 ) 

133 

134 from pandas.core.api import Index 

135 from pandas.core.arrays._mixins import NDArrayBackedExtensionArray 

136 

137# comparison is faster than is_object_dtype 

138_dtype_obj = np.dtype("object") 

139 

140 

141COW_WARNING_GENERAL_MSG = """\ 

142Setting a value on a view: behaviour will change in pandas 3.0. 

143You are mutating a Series or DataFrame object, and currently this mutation will 

144also have effect on other Series or DataFrame objects that share data with this 

145object. In pandas 3.0 (with Copy-on-Write), updating one Series or DataFrame object 

146will never modify another. 

147""" 

148 

149 

150COW_WARNING_SETITEM_MSG = """\ 

151Setting a value on a view: behaviour will change in pandas 3.0. 

152Currently, the mutation will also have effect on the object that shares data 

153with this object. For example, when setting a value in a Series that was 

154extracted from a column of a DataFrame, that DataFrame will also be updated: 

155 

156 ser = df["col"] 

157 ser[0] = 0 <--- in pandas 2, this also updates `df` 

158 

159In pandas 3.0 (with Copy-on-Write), updating one Series/DataFrame will never 

160modify another, and thus in the example above, `df` will not be changed. 

161""" 

162 

163 

164def maybe_split(meth: F) -> F: 

165 """ 

166 If we have a multi-column block, split and operate block-wise. Otherwise 

167 use the original method. 

168 """ 

169 

170 @wraps(meth) 

171 def newfunc(self, *args, **kwargs) -> list[Block]: 

172 if self.ndim == 1 or self.shape[0] == 1: 

173 return meth(self, *args, **kwargs) 

174 else: 

175 # Split and operate column-by-column 

176 return self.split_and_operate(meth, *args, **kwargs) 

177 

178 return cast(F, newfunc) 

179 

180 

181class Block(PandasObject, libinternals.Block): 

182 """ 

183 Canonical n-dimensional unit of homogeneous dtype contained in a pandas 

184 data structure 

185 

186 Index-ignorant; let the container take care of that 

187 """ 

188 

189 values: np.ndarray | ExtensionArray 

190 ndim: int 

191 refs: BlockValuesRefs 

192 __init__: Callable 

193 

194 __slots__ = () 

195 is_numeric = False 

196 

197 @final 

198 @cache_readonly 

199 def _validate_ndim(self) -> bool: 

200 """ 

201 We validate dimension for blocks that can hold 2D values, which for now 

202 means numpy dtypes or DatetimeTZDtype. 

203 """ 

204 dtype = self.dtype 

205 return not isinstance(dtype, ExtensionDtype) or isinstance( 

206 dtype, DatetimeTZDtype 

207 ) 

208 

209 @final 

210 @cache_readonly 

211 def is_object(self) -> bool: 

212 return self.values.dtype == _dtype_obj 

213 

214 @final 

215 @cache_readonly 

216 def is_extension(self) -> bool: 

217 return not lib.is_np_dtype(self.values.dtype) 

218 

219 @final 

220 @cache_readonly 

221 def _can_consolidate(self) -> bool: 

222 # We _could_ consolidate for DatetimeTZDtype but don't for now. 

223 return not self.is_extension 

224 

225 @final 

226 @cache_readonly 

227 def _consolidate_key(self): 

228 return self._can_consolidate, self.dtype.name 

229 

230 @final 

231 @cache_readonly 

232 def _can_hold_na(self) -> bool: 

233 """ 

234 Can we store NA values in this Block? 

235 """ 

236 dtype = self.dtype 

237 if isinstance(dtype, np.dtype): 

238 return dtype.kind not in "iub" 

239 return dtype._can_hold_na 

240 

241 @final 

242 @property 

243 def is_bool(self) -> bool: 

244 """ 

245 We can be bool if a) we are bool dtype or b) object dtype with bool objects. 

246 """ 

247 return self.values.dtype == np.dtype(bool) 

248 

249 @final 

250 def external_values(self): 

251 return external_values(self.values) 

252 

253 @final 

254 @cache_readonly 

255 def fill_value(self): 

256 # Used in reindex_indexer 

257 return na_value_for_dtype(self.dtype, compat=False) 

258 

259 @final 

260 def _standardize_fill_value(self, value): 

261 # if we are passed a scalar None, convert it here 

262 if self.dtype != _dtype_obj and is_valid_na_for_dtype(value, self.dtype): 

263 value = self.fill_value 

264 return value 

265 

266 @property 

267 def mgr_locs(self) -> BlockPlacement: 

268 return self._mgr_locs 

269 

270 @mgr_locs.setter 

271 def mgr_locs(self, new_mgr_locs: BlockPlacement) -> None: 

272 self._mgr_locs = new_mgr_locs 

273 

274 @final 

275 def make_block( 

276 self, 

277 values, 

278 placement: BlockPlacement | None = None, 

279 refs: BlockValuesRefs | None = None, 

280 ) -> Block: 

281 """ 

282 Create a new block, with type inference propagate any values that are 

283 not specified 

284 """ 

285 if placement is None: 

286 placement = self._mgr_locs 

287 if self.is_extension: 

288 values = ensure_block_shape(values, ndim=self.ndim) 

289 

290 return new_block(values, placement=placement, ndim=self.ndim, refs=refs) 

291 

292 @final 

293 def make_block_same_class( 

294 self, 

295 values, 

296 placement: BlockPlacement | None = None, 

297 refs: BlockValuesRefs | None = None, 

298 ) -> Self: 

299 """Wrap given values in a block of same type as self.""" 

300 # Pre-2.0 we called ensure_wrapped_if_datetimelike because fastparquet 

301 # relied on it, as of 2.0 the caller is responsible for this. 

302 if placement is None: 

303 placement = self._mgr_locs 

304 

305 # We assume maybe_coerce_values has already been called 

306 return type(self)(values, placement=placement, ndim=self.ndim, refs=refs) 

307 

308 @final 

309 def __repr__(self) -> str: 

310 # don't want to print out all of the items here 

311 name = type(self).__name__ 

312 if self.ndim == 1: 

313 result = f"{name}: {len(self)} dtype: {self.dtype}" 

314 else: 

315 shape = " x ".join([str(s) for s in self.shape]) 

316 result = f"{name}: {self.mgr_locs.indexer}, {shape}, dtype: {self.dtype}" 

317 

318 return result 

319 

320 @final 

321 def __len__(self) -> int: 

322 return len(self.values) 

323 

324 @final 

325 def slice_block_columns(self, slc: slice) -> Self: 

326 """ 

327 Perform __getitem__-like, return result as block. 

328 """ 

329 new_mgr_locs = self._mgr_locs[slc] 

330 

331 new_values = self._slice(slc) 

332 refs = self.refs 

333 return type(self)(new_values, new_mgr_locs, self.ndim, refs=refs) 

334 

335 @final 

336 def take_block_columns(self, indices: npt.NDArray[np.intp]) -> Self: 

337 """ 

338 Perform __getitem__-like, return result as block. 

339 

340 Only supports slices that preserve dimensionality. 

341 """ 

342 # Note: only called from is from internals.concat, and we can verify 

343 # that never happens with 1-column blocks, i.e. never for ExtensionBlock. 

344 

345 new_mgr_locs = self._mgr_locs[indices] 

346 

347 new_values = self._slice(indices) 

348 return type(self)(new_values, new_mgr_locs, self.ndim, refs=None) 

349 

350 @final 

351 def getitem_block_columns( 

352 self, slicer: slice, new_mgr_locs: BlockPlacement, ref_inplace_op: bool = False 

353 ) -> Self: 

354 """ 

355 Perform __getitem__-like, return result as block. 

356 

357 Only supports slices that preserve dimensionality. 

358 """ 

359 new_values = self._slice(slicer) 

360 refs = self.refs if not ref_inplace_op or self.refs.has_reference() else None 

361 return type(self)(new_values, new_mgr_locs, self.ndim, refs=refs) 

362 

363 @final 

364 def _can_hold_element(self, element: Any) -> bool: 

365 """require the same dtype as ourselves""" 

366 element = extract_array(element, extract_numpy=True) 

367 return can_hold_element(self.values, element) 

368 

369 @final 

370 def should_store(self, value: ArrayLike) -> bool: 

371 """ 

372 Should we set self.values[indexer] = value inplace or do we need to cast? 

373 

374 Parameters 

375 ---------- 

376 value : np.ndarray or ExtensionArray 

377 

378 Returns 

379 ------- 

380 bool 

381 """ 

382 return value.dtype == self.dtype 

383 

384 # --------------------------------------------------------------------- 

385 # Apply/Reduce and Helpers 

386 

387 @final 

388 def apply(self, func, **kwargs) -> list[Block]: 

389 """ 

390 apply the function to my values; return a block if we are not 

391 one 

392 """ 

393 result = func(self.values, **kwargs) 

394 

395 result = maybe_coerce_values(result) 

396 return self._split_op_result(result) 

397 

398 @final 

399 def reduce(self, func) -> list[Block]: 

400 # We will apply the function and reshape the result into a single-row 

401 # Block with the same mgr_locs; squeezing will be done at a higher level 

402 assert self.ndim == 2 

403 

404 result = func(self.values) 

405 

406 if self.values.ndim == 1: 

407 res_values = result 

408 else: 

409 res_values = result.reshape(-1, 1) 

410 

411 nb = self.make_block(res_values) 

412 return [nb] 

413 

414 @final 

415 def _split_op_result(self, result: ArrayLike) -> list[Block]: 

416 # See also: split_and_operate 

417 if result.ndim > 1 and isinstance(result.dtype, ExtensionDtype): 

418 # TODO(EA2D): unnecessary with 2D EAs 

419 # if we get a 2D ExtensionArray, we need to split it into 1D pieces 

420 nbs = [] 

421 for i, loc in enumerate(self._mgr_locs): 

422 if not is_1d_only_ea_dtype(result.dtype): 

423 vals = result[i : i + 1] 

424 else: 

425 vals = result[i] 

426 

427 bp = BlockPlacement(loc) 

428 block = self.make_block(values=vals, placement=bp) 

429 nbs.append(block) 

430 return nbs 

431 

432 nb = self.make_block(result) 

433 

434 return [nb] 

435 

436 @final 

437 def _split(self) -> list[Block]: 

438 """ 

439 Split a block into a list of single-column blocks. 

440 """ 

441 assert self.ndim == 2 

442 

443 new_blocks = [] 

444 for i, ref_loc in enumerate(self._mgr_locs): 

445 vals = self.values[slice(i, i + 1)] 

446 

447 bp = BlockPlacement(ref_loc) 

448 nb = type(self)(vals, placement=bp, ndim=2, refs=self.refs) 

449 new_blocks.append(nb) 

450 return new_blocks 

451 

452 @final 

453 def split_and_operate(self, func, *args, **kwargs) -> list[Block]: 

454 """ 

455 Split the block and apply func column-by-column. 

456 

457 Parameters 

458 ---------- 

459 func : Block method 

460 *args 

461 **kwargs 

462 

463 Returns 

464 ------- 

465 List[Block] 

466 """ 

467 assert self.ndim == 2 and self.shape[0] != 1 

468 

469 res_blocks = [] 

470 for nb in self._split(): 

471 rbs = func(nb, *args, **kwargs) 

472 res_blocks.extend(rbs) 

473 return res_blocks 

474 

475 # --------------------------------------------------------------------- 

476 # Up/Down-casting 

477 

478 @final 

479 def coerce_to_target_dtype(self, other, warn_on_upcast: bool = False) -> Block: 

480 """ 

481 coerce the current block to a dtype compat for other 

482 we will return a block, possibly object, and not raise 

483 

484 we can also safely try to coerce to the same dtype 

485 and will receive the same block 

486 """ 

487 new_dtype = find_result_type(self.values.dtype, other) 

488 if new_dtype == self.dtype: 

489 # GH#52927 avoid RecursionError 

490 raise AssertionError( 

491 "Something has gone wrong, please report a bug at " 

492 "https://github.com/pandas-dev/pandas/issues" 

493 ) 

494 

495 # In a future version of pandas, the default will be that 

496 # setting `nan` into an integer series won't raise. 

497 if ( 

498 is_scalar(other) 

499 and is_integer_dtype(self.values.dtype) 

500 and isna(other) 

501 and other is not NaT 

502 and not ( 

503 isinstance(other, (np.datetime64, np.timedelta64)) and np.isnat(other) 

504 ) 

505 ): 

506 warn_on_upcast = False 

507 elif ( 

508 isinstance(other, np.ndarray) 

509 and other.ndim == 1 

510 and is_integer_dtype(self.values.dtype) 

511 and is_float_dtype(other.dtype) 

512 and lib.has_only_ints_or_nan(other) 

513 ): 

514 warn_on_upcast = False 

515 

516 if warn_on_upcast: 

517 warnings.warn( 

518 f"Setting an item of incompatible dtype is deprecated " 

519 "and will raise an error in a future version of pandas. " 

520 f"Value '{other}' has dtype incompatible with {self.values.dtype}, " 

521 "please explicitly cast to a compatible dtype first.", 

522 FutureWarning, 

523 stacklevel=find_stack_level(), 

524 ) 

525 if self.values.dtype == new_dtype: 

526 raise AssertionError( 

527 f"Did not expect new dtype {new_dtype} to equal self.dtype " 

528 f"{self.values.dtype}. Please report a bug at " 

529 "https://github.com/pandas-dev/pandas/issues." 

530 ) 

531 return self.astype(new_dtype, copy=False) 

532 

533 @final 

534 def _maybe_downcast( 

535 self, 

536 blocks: list[Block], 

537 downcast, 

538 using_cow: bool, 

539 caller: str, 

540 ) -> list[Block]: 

541 if downcast is False: 

542 return blocks 

543 

544 if self.dtype == _dtype_obj: 

545 # TODO: does it matter that self.dtype might not match blocks[i].dtype? 

546 # GH#44241 We downcast regardless of the argument; 

547 # respecting 'downcast=None' may be worthwhile at some point, 

548 # but ATM it breaks too much existing code. 

549 # split and convert the blocks 

550 

551 if caller == "fillna" and get_option("future.no_silent_downcasting"): 

552 return blocks 

553 

554 nbs = extend_blocks( 

555 [blk.convert(using_cow=using_cow, copy=not using_cow) for blk in blocks] 

556 ) 

557 if caller == "fillna": 

558 if len(nbs) != len(blocks) or not all( 

559 x.dtype == y.dtype for x, y in zip(nbs, blocks) 

560 ): 

561 # GH#54261 

562 warnings.warn( 

563 "Downcasting object dtype arrays on .fillna, .ffill, .bfill " 

564 "is deprecated and will change in a future version. " 

565 "Call result.infer_objects(copy=False) instead. " 

566 "To opt-in to the future " 

567 "behavior, set " 

568 "`pd.set_option('future.no_silent_downcasting', True)`", 

569 FutureWarning, 

570 stacklevel=find_stack_level(), 

571 ) 

572 

573 return nbs 

574 

575 elif downcast is None: 

576 return blocks 

577 elif caller == "where" and get_option("future.no_silent_downcasting") is True: 

578 return blocks 

579 else: 

580 nbs = extend_blocks([b._downcast_2d(downcast, using_cow) for b in blocks]) 

581 

582 # When _maybe_downcast is called with caller="where", it is either 

583 # a) with downcast=False, which is a no-op (the desired future behavior) 

584 # b) with downcast="infer", which is _not_ passed by the user. 

585 # In the latter case the future behavior is to stop doing inference, 

586 # so we issue a warning if and only if some inference occurred. 

587 if caller == "where": 

588 # GH#53656 

589 if len(blocks) != len(nbs) or any( 

590 left.dtype != right.dtype for left, right in zip(blocks, nbs) 

591 ): 

592 # In this case _maybe_downcast was _not_ a no-op, so the behavior 

593 # will change, so we issue a warning. 

594 warnings.warn( 

595 "Downcasting behavior in Series and DataFrame methods 'where', " 

596 "'mask', and 'clip' is deprecated. In a future " 

597 "version this will not infer object dtypes or cast all-round " 

598 "floats to integers. Instead call " 

599 "result.infer_objects(copy=False) for object inference, " 

600 "or cast round floats explicitly. To opt-in to the future " 

601 "behavior, set " 

602 "`pd.set_option('future.no_silent_downcasting', True)`", 

603 FutureWarning, 

604 stacklevel=find_stack_level(), 

605 ) 

606 

607 return nbs 

608 

609 @final 

610 @maybe_split 

611 def _downcast_2d(self, dtype, using_cow: bool = False) -> list[Block]: 

612 """ 

613 downcast specialized to 2D case post-validation. 

614 

615 Refactored to allow use of maybe_split. 

616 """ 

617 new_values = maybe_downcast_to_dtype(self.values, dtype=dtype) 

618 new_values = maybe_coerce_values(new_values) 

619 refs = self.refs if new_values is self.values else None 

620 return [self.make_block(new_values, refs=refs)] 

621 

622 @final 

623 def convert( 

624 self, 

625 *, 

626 copy: bool = True, 

627 using_cow: bool = False, 

628 ) -> list[Block]: 

629 """ 

630 Attempt to coerce any object types to better types. Return a copy 

631 of the block (if copy = True). 

632 """ 

633 if not self.is_object: 

634 if not copy and using_cow: 

635 return [self.copy(deep=False)] 

636 return [self.copy()] if copy else [self] 

637 

638 if self.ndim != 1 and self.shape[0] != 1: 

639 blocks = self.split_and_operate( 

640 Block.convert, copy=copy, using_cow=using_cow 

641 ) 

642 if all(blk.dtype.kind == "O" for blk in blocks): 

643 # Avoid fragmenting the block if convert is a no-op 

644 if using_cow: 

645 return [self.copy(deep=False)] 

646 return [self.copy()] if copy else [self] 

647 return blocks 

648 

649 values = self.values 

650 if values.ndim == 2: 

651 # the check above ensures we only get here with values.shape[0] == 1, 

652 # avoid doing .ravel as that might make a copy 

653 values = values[0] 

654 

655 res_values = lib.maybe_convert_objects( 

656 values, # type: ignore[arg-type] 

657 convert_non_numeric=True, 

658 ) 

659 refs = None 

660 if copy and res_values is values: 

661 res_values = values.copy() 

662 elif res_values is values: 

663 refs = self.refs 

664 

665 res_values = ensure_block_shape(res_values, self.ndim) 

666 res_values = maybe_coerce_values(res_values) 

667 return [self.make_block(res_values, refs=refs)] 

668 

669 def convert_dtypes( 

670 self, 

671 copy: bool, 

672 using_cow: bool, 

673 infer_objects: bool = True, 

674 convert_string: bool = True, 

675 convert_integer: bool = True, 

676 convert_boolean: bool = True, 

677 convert_floating: bool = True, 

678 dtype_backend: DtypeBackend = "numpy_nullable", 

679 ) -> list[Block]: 

680 if infer_objects and self.is_object: 

681 blks = self.convert(copy=False, using_cow=using_cow) 

682 else: 

683 blks = [self] 

684 

685 if not any( 

686 [convert_floating, convert_integer, convert_boolean, convert_string] 

687 ): 

688 return [b.copy(deep=copy) for b in blks] 

689 

690 rbs = [] 

691 for blk in blks: 

692 # Determine dtype column by column 

693 sub_blks = [blk] if blk.ndim == 1 or self.shape[0] == 1 else blk._split() 

694 dtypes = [ 

695 convert_dtypes( 

696 b.values, 

697 convert_string, 

698 convert_integer, 

699 convert_boolean, 

700 convert_floating, 

701 infer_objects, 

702 dtype_backend, 

703 ) 

704 for b in sub_blks 

705 ] 

706 if all(dtype == self.dtype for dtype in dtypes): 

707 # Avoid block splitting if no dtype changes 

708 rbs.append(blk.copy(deep=copy)) 

709 continue 

710 

711 for dtype, b in zip(dtypes, sub_blks): 

712 rbs.append(b.astype(dtype=dtype, copy=copy, squeeze=b.ndim != 1)) 

713 return rbs 

714 

715 # --------------------------------------------------------------------- 

716 # Array-Like Methods 

717 

718 @final 

719 @cache_readonly 

720 def dtype(self) -> DtypeObj: 

721 return self.values.dtype 

722 

723 @final 

724 def astype( 

725 self, 

726 dtype: DtypeObj, 

727 copy: bool = False, 

728 errors: IgnoreRaise = "raise", 

729 using_cow: bool = False, 

730 squeeze: bool = False, 

731 ) -> Block: 

732 """ 

733 Coerce to the new dtype. 

734 

735 Parameters 

736 ---------- 

737 dtype : np.dtype or ExtensionDtype 

738 copy : bool, default False 

739 copy if indicated 

740 errors : str, {'raise', 'ignore'}, default 'raise' 

741 - ``raise`` : allow exceptions to be raised 

742 - ``ignore`` : suppress exceptions. On error return original object 

743 using_cow: bool, default False 

744 Signaling if copy on write copy logic is used. 

745 squeeze : bool, default False 

746 squeeze values to ndim=1 if only one column is given 

747 

748 Returns 

749 ------- 

750 Block 

751 """ 

752 values = self.values 

753 if squeeze and values.ndim == 2 and is_1d_only_ea_dtype(dtype): 

754 if values.shape[0] != 1: 

755 raise ValueError("Can not squeeze with more than one column.") 

756 values = values[0, :] # type: ignore[call-overload] 

757 

758 new_values = astype_array_safe(values, dtype, copy=copy, errors=errors) 

759 

760 new_values = maybe_coerce_values(new_values) 

761 

762 refs = None 

763 if (using_cow or not copy) and astype_is_view(values.dtype, new_values.dtype): 

764 refs = self.refs 

765 

766 newb = self.make_block(new_values, refs=refs) 

767 if newb.shape != self.shape: 

768 raise TypeError( 

769 f"cannot set astype for copy = [{copy}] for dtype " 

770 f"({self.dtype.name} [{self.shape}]) to different shape " 

771 f"({newb.dtype.name} [{newb.shape}])" 

772 ) 

773 return newb 

774 

775 @final 

776 def get_values_for_csv( 

777 self, *, float_format, date_format, decimal, na_rep: str = "nan", quoting=None 

778 ) -> Block: 

779 """convert to our native types format""" 

780 result = get_values_for_csv( 

781 self.values, 

782 na_rep=na_rep, 

783 quoting=quoting, 

784 float_format=float_format, 

785 date_format=date_format, 

786 decimal=decimal, 

787 ) 

788 return self.make_block(result) 

789 

790 @final 

791 def copy(self, deep: bool = True) -> Self: 

792 """copy constructor""" 

793 values = self.values 

794 refs: BlockValuesRefs | None 

795 if deep: 

796 values = values.copy() 

797 refs = None 

798 else: 

799 refs = self.refs 

800 return type(self)(values, placement=self._mgr_locs, ndim=self.ndim, refs=refs) 

801 

802 # --------------------------------------------------------------------- 

803 # Copy-on-Write Helpers 

804 

805 @final 

806 def _maybe_copy(self, using_cow: bool, inplace: bool) -> Self: 

807 if using_cow and inplace: 

808 deep = self.refs.has_reference() 

809 blk = self.copy(deep=deep) 

810 else: 

811 blk = self if inplace else self.copy() 

812 return blk 

813 

814 @final 

815 def _get_refs_and_copy(self, using_cow: bool, inplace: bool): 

816 refs = None 

817 copy = not inplace 

818 if inplace: 

819 if using_cow and self.refs.has_reference(): 

820 copy = True 

821 else: 

822 refs = self.refs 

823 return copy, refs 

824 

825 # --------------------------------------------------------------------- 

826 # Replace 

827 

828 @final 

829 def replace( 

830 self, 

831 to_replace, 

832 value, 

833 inplace: bool = False, 

834 # mask may be pre-computed if we're called from replace_list 

835 mask: npt.NDArray[np.bool_] | None = None, 

836 using_cow: bool = False, 

837 already_warned=None, 

838 ) -> list[Block]: 

839 """ 

840 replace the to_replace value with value, possible to create new 

841 blocks here this is just a call to putmask. 

842 """ 

843 

844 # Note: the checks we do in NDFrame.replace ensure we never get 

845 # here with listlike to_replace or value, as those cases 

846 # go through replace_list 

847 values = self.values 

848 

849 if isinstance(values, Categorical): 

850 # TODO: avoid special-casing 

851 # GH49404 

852 blk = self._maybe_copy(using_cow, inplace) 

853 values = cast(Categorical, blk.values) 

854 values._replace(to_replace=to_replace, value=value, inplace=True) 

855 return [blk] 

856 

857 if not self._can_hold_element(to_replace): 

858 # We cannot hold `to_replace`, so we know immediately that 

859 # replacing it is a no-op. 

860 # Note: If to_replace were a list, NDFrame.replace would call 

861 # replace_list instead of replace. 

862 if using_cow: 

863 return [self.copy(deep=False)] 

864 else: 

865 return [self] if inplace else [self.copy()] 

866 

867 if mask is None: 

868 mask = missing.mask_missing(values, to_replace) 

869 if not mask.any(): 

870 # Note: we get here with test_replace_extension_other incorrectly 

871 # bc _can_hold_element is incorrect. 

872 if using_cow: 

873 return [self.copy(deep=False)] 

874 else: 

875 return [self] if inplace else [self.copy()] 

876 

877 elif self._can_hold_element(value): 

878 # TODO(CoW): Maybe split here as well into columns where mask has True 

879 # and rest? 

880 blk = self._maybe_copy(using_cow, inplace) 

881 putmask_inplace(blk.values, mask, value) 

882 if ( 

883 inplace 

884 and warn_copy_on_write() 

885 and already_warned is not None 

886 and not already_warned.warned_already 

887 ): 

888 if self.refs.has_reference(): 

889 warnings.warn( 

890 COW_WARNING_GENERAL_MSG, 

891 FutureWarning, 

892 stacklevel=find_stack_level(), 

893 ) 

894 already_warned.warned_already = True 

895 

896 if not (self.is_object and value is None): 

897 # if the user *explicitly* gave None, we keep None, otherwise 

898 # may downcast to NaN 

899 if get_option("future.no_silent_downcasting") is True: 

900 blocks = [blk] 

901 else: 

902 blocks = blk.convert(copy=False, using_cow=using_cow) 

903 if len(blocks) > 1 or blocks[0].dtype != blk.dtype: 

904 warnings.warn( 

905 # GH#54710 

906 "Downcasting behavior in `replace` is deprecated and " 

907 "will be removed in a future version. To retain the old " 

908 "behavior, explicitly call " 

909 "`result.infer_objects(copy=False)`. " 

910 "To opt-in to the future " 

911 "behavior, set " 

912 "`pd.set_option('future.no_silent_downcasting', True)`", 

913 FutureWarning, 

914 stacklevel=find_stack_level(), 

915 ) 

916 else: 

917 blocks = [blk] 

918 return blocks 

919 

920 elif self.ndim == 1 or self.shape[0] == 1: 

921 if value is None or value is NA: 

922 blk = self.astype(np.dtype(object)) 

923 else: 

924 blk = self.coerce_to_target_dtype(value) 

925 return blk.replace( 

926 to_replace=to_replace, 

927 value=value, 

928 inplace=True, 

929 mask=mask, 

930 ) 

931 

932 else: 

933 # split so that we only upcast where necessary 

934 blocks = [] 

935 for i, nb in enumerate(self._split()): 

936 blocks.extend( 

937 type(self).replace( 

938 nb, 

939 to_replace=to_replace, 

940 value=value, 

941 inplace=True, 

942 mask=mask[i : i + 1], 

943 using_cow=using_cow, 

944 ) 

945 ) 

946 return blocks 

947 

948 @final 

949 def _replace_regex( 

950 self, 

951 to_replace, 

952 value, 

953 inplace: bool = False, 

954 mask=None, 

955 using_cow: bool = False, 

956 already_warned=None, 

957 ) -> list[Block]: 

958 """ 

959 Replace elements by the given value. 

960 

961 Parameters 

962 ---------- 

963 to_replace : object or pattern 

964 Scalar to replace or regular expression to match. 

965 value : object 

966 Replacement object. 

967 inplace : bool, default False 

968 Perform inplace modification. 

969 mask : array-like of bool, optional 

970 True indicate corresponding element is ignored. 

971 using_cow: bool, default False 

972 Specifying if copy on write is enabled. 

973 

974 Returns 

975 ------- 

976 List[Block] 

977 """ 

978 if not self._can_hold_element(to_replace): 

979 # i.e. only if self.is_object is True, but could in principle include a 

980 # String ExtensionBlock 

981 if using_cow: 

982 return [self.copy(deep=False)] 

983 return [self] if inplace else [self.copy()] 

984 

985 rx = re.compile(to_replace) 

986 

987 block = self._maybe_copy(using_cow, inplace) 

988 

989 replace_regex(block.values, rx, value, mask) 

990 

991 if ( 

992 inplace 

993 and warn_copy_on_write() 

994 and already_warned is not None 

995 and not already_warned.warned_already 

996 ): 

997 if self.refs.has_reference(): 

998 warnings.warn( 

999 COW_WARNING_GENERAL_MSG, 

1000 FutureWarning, 

1001 stacklevel=find_stack_level(), 

1002 ) 

1003 already_warned.warned_already = True 

1004 

1005 nbs = block.convert(copy=False, using_cow=using_cow) 

1006 opt = get_option("future.no_silent_downcasting") 

1007 if (len(nbs) > 1 or nbs[0].dtype != block.dtype) and not opt: 

1008 warnings.warn( 

1009 # GH#54710 

1010 "Downcasting behavior in `replace` is deprecated and " 

1011 "will be removed in a future version. To retain the old " 

1012 "behavior, explicitly call `result.infer_objects(copy=False)`. " 

1013 "To opt-in to the future " 

1014 "behavior, set " 

1015 "`pd.set_option('future.no_silent_downcasting', True)`", 

1016 FutureWarning, 

1017 stacklevel=find_stack_level(), 

1018 ) 

1019 return nbs 

1020 

1021 @final 

1022 def replace_list( 

1023 self, 

1024 src_list: Iterable[Any], 

1025 dest_list: Sequence[Any], 

1026 inplace: bool = False, 

1027 regex: bool = False, 

1028 using_cow: bool = False, 

1029 already_warned=None, 

1030 ) -> list[Block]: 

1031 """ 

1032 See BlockManager.replace_list docstring. 

1033 """ 

1034 values = self.values 

1035 

1036 if isinstance(values, Categorical): 

1037 # TODO: avoid special-casing 

1038 # GH49404 

1039 blk = self._maybe_copy(using_cow, inplace) 

1040 values = cast(Categorical, blk.values) 

1041 values._replace(to_replace=src_list, value=dest_list, inplace=True) 

1042 return [blk] 

1043 

1044 # Exclude anything that we know we won't contain 

1045 pairs = [ 

1046 (x, y) for x, y in zip(src_list, dest_list) if self._can_hold_element(x) 

1047 ] 

1048 if not len(pairs): 

1049 if using_cow: 

1050 return [self.copy(deep=False)] 

1051 # shortcut, nothing to replace 

1052 return [self] if inplace else [self.copy()] 

1053 

1054 src_len = len(pairs) - 1 

1055 

1056 if is_string_dtype(values.dtype): 

1057 # Calculate the mask once, prior to the call of comp 

1058 # in order to avoid repeating the same computations 

1059 na_mask = ~isna(values) 

1060 masks: Iterable[npt.NDArray[np.bool_]] = ( 

1061 extract_bool_array( 

1062 cast( 

1063 ArrayLike, 

1064 compare_or_regex_search( 

1065 values, s[0], regex=regex, mask=na_mask 

1066 ), 

1067 ) 

1068 ) 

1069 for s in pairs 

1070 ) 

1071 else: 

1072 # GH#38086 faster if we know we dont need to check for regex 

1073 masks = (missing.mask_missing(values, s[0]) for s in pairs) 

1074 # Materialize if inplace = True, since the masks can change 

1075 # as we replace 

1076 if inplace: 

1077 masks = list(masks) 

1078 

1079 if using_cow: 

1080 # Don't set up refs here, otherwise we will think that we have 

1081 # references when we check again later 

1082 rb = [self] 

1083 else: 

1084 rb = [self if inplace else self.copy()] 

1085 

1086 if ( 

1087 inplace 

1088 and warn_copy_on_write() 

1089 and already_warned is not None 

1090 and not already_warned.warned_already 

1091 ): 

1092 if self.refs.has_reference(): 

1093 warnings.warn( 

1094 COW_WARNING_GENERAL_MSG, 

1095 FutureWarning, 

1096 stacklevel=find_stack_level(), 

1097 ) 

1098 already_warned.warned_already = True 

1099 

1100 opt = get_option("future.no_silent_downcasting") 

1101 for i, ((src, dest), mask) in enumerate(zip(pairs, masks)): 

1102 convert = i == src_len # only convert once at the end 

1103 new_rb: list[Block] = [] 

1104 

1105 # GH-39338: _replace_coerce can split a block into 

1106 # single-column blocks, so track the index so we know 

1107 # where to index into the mask 

1108 for blk_num, blk in enumerate(rb): 

1109 if len(rb) == 1: 

1110 m = mask 

1111 else: 

1112 mib = mask 

1113 assert not isinstance(mib, bool) 

1114 m = mib[blk_num : blk_num + 1] 

1115 

1116 # error: Argument "mask" to "_replace_coerce" of "Block" has 

1117 # incompatible type "Union[ExtensionArray, ndarray[Any, Any], bool]"; 

1118 # expected "ndarray[Any, dtype[bool_]]" 

1119 result = blk._replace_coerce( 

1120 to_replace=src, 

1121 value=dest, 

1122 mask=m, 

1123 inplace=inplace, 

1124 regex=regex, 

1125 using_cow=using_cow, 

1126 ) 

1127 

1128 if using_cow and i != src_len: 

1129 # This is ugly, but we have to get rid of intermediate refs 

1130 # that did not go out of scope yet, otherwise we will trigger 

1131 # many unnecessary copies 

1132 for b in result: 

1133 ref = weakref.ref(b) 

1134 b.refs.referenced_blocks.pop( 

1135 b.refs.referenced_blocks.index(ref) 

1136 ) 

1137 

1138 if ( 

1139 not opt 

1140 and convert 

1141 and blk.is_object 

1142 and not all(x is None for x in dest_list) 

1143 ): 

1144 # GH#44498 avoid unwanted cast-back 

1145 nbs = [] 

1146 for res_blk in result: 

1147 converted = res_blk.convert( 

1148 copy=True and not using_cow, using_cow=using_cow 

1149 ) 

1150 if len(converted) > 1 or converted[0].dtype != res_blk.dtype: 

1151 warnings.warn( 

1152 # GH#54710 

1153 "Downcasting behavior in `replace` is deprecated " 

1154 "and will be removed in a future version. To " 

1155 "retain the old behavior, explicitly call " 

1156 "`result.infer_objects(copy=False)`. " 

1157 "To opt-in to the future " 

1158 "behavior, set " 

1159 "`pd.set_option('future.no_silent_downcasting', True)`", 

1160 FutureWarning, 

1161 stacklevel=find_stack_level(), 

1162 ) 

1163 nbs.extend(converted) 

1164 result = nbs 

1165 new_rb.extend(result) 

1166 rb = new_rb 

1167 return rb 

1168 

1169 @final 

1170 def _replace_coerce( 

1171 self, 

1172 to_replace, 

1173 value, 

1174 mask: npt.NDArray[np.bool_], 

1175 inplace: bool = True, 

1176 regex: bool = False, 

1177 using_cow: bool = False, 

1178 ) -> list[Block]: 

1179 """ 

1180 Replace value corresponding to the given boolean array with another 

1181 value. 

1182 

1183 Parameters 

1184 ---------- 

1185 to_replace : object or pattern 

1186 Scalar to replace or regular expression to match. 

1187 value : object 

1188 Replacement object. 

1189 mask : np.ndarray[bool] 

1190 True indicate corresponding element is ignored. 

1191 inplace : bool, default True 

1192 Perform inplace modification. 

1193 regex : bool, default False 

1194 If true, perform regular expression substitution. 

1195 

1196 Returns 

1197 ------- 

1198 List[Block] 

1199 """ 

1200 if should_use_regex(regex, to_replace): 

1201 return self._replace_regex( 

1202 to_replace, 

1203 value, 

1204 inplace=inplace, 

1205 mask=mask, 

1206 ) 

1207 else: 

1208 if value is None: 

1209 # gh-45601, gh-45836, gh-46634 

1210 if mask.any(): 

1211 has_ref = self.refs.has_reference() 

1212 nb = self.astype(np.dtype(object), copy=False, using_cow=using_cow) 

1213 if (nb is self or using_cow) and not inplace: 

1214 nb = nb.copy() 

1215 elif inplace and has_ref and nb.refs.has_reference() and using_cow: 

1216 # no copy in astype and we had refs before 

1217 nb = nb.copy() 

1218 putmask_inplace(nb.values, mask, value) 

1219 return [nb] 

1220 if using_cow: 

1221 return [self] 

1222 return [self] if inplace else [self.copy()] 

1223 return self.replace( 

1224 to_replace=to_replace, 

1225 value=value, 

1226 inplace=inplace, 

1227 mask=mask, 

1228 using_cow=using_cow, 

1229 ) 

1230 

1231 # --------------------------------------------------------------------- 

1232 # 2D Methods - Shared by NumpyBlock and NDArrayBackedExtensionBlock 

1233 # but not ExtensionBlock 

1234 

1235 def _maybe_squeeze_arg(self, arg: np.ndarray) -> np.ndarray: 

1236 """ 

1237 For compatibility with 1D-only ExtensionArrays. 

1238 """ 

1239 return arg 

1240 

1241 def _unwrap_setitem_indexer(self, indexer): 

1242 """ 

1243 For compatibility with 1D-only ExtensionArrays. 

1244 """ 

1245 return indexer 

1246 

1247 # NB: this cannot be made cache_readonly because in mgr.set_values we pin 

1248 # new .values that can have different shape GH#42631 

1249 @property 

1250 def shape(self) -> Shape: 

1251 return self.values.shape 

1252 

1253 def iget(self, i: int | tuple[int, int] | tuple[slice, int]) -> np.ndarray: 

1254 # In the case where we have a tuple[slice, int], the slice will always 

1255 # be slice(None) 

1256 # Note: only reached with self.ndim == 2 

1257 # Invalid index type "Union[int, Tuple[int, int], Tuple[slice, int]]" 

1258 # for "Union[ndarray[Any, Any], ExtensionArray]"; expected type 

1259 # "Union[int, integer[Any]]" 

1260 return self.values[i] # type: ignore[index] 

1261 

1262 def _slice( 

1263 self, slicer: slice | npt.NDArray[np.bool_] | npt.NDArray[np.intp] 

1264 ) -> ArrayLike: 

1265 """return a slice of my values""" 

1266 

1267 return self.values[slicer] 

1268 

1269 def set_inplace(self, locs, values: ArrayLike, copy: bool = False) -> None: 

1270 """ 

1271 Modify block values in-place with new item value. 

1272 

1273 If copy=True, first copy the underlying values in place before modifying 

1274 (for Copy-on-Write). 

1275 

1276 Notes 

1277 ----- 

1278 `set_inplace` never creates a new array or new Block, whereas `setitem` 

1279 _may_ create a new array and always creates a new Block. 

1280 

1281 Caller is responsible for checking values.dtype == self.dtype. 

1282 """ 

1283 if copy: 

1284 self.values = self.values.copy() 

1285 self.values[locs] = values 

1286 

1287 @final 

1288 def take_nd( 

1289 self, 

1290 indexer: npt.NDArray[np.intp], 

1291 axis: AxisInt, 

1292 new_mgr_locs: BlockPlacement | None = None, 

1293 fill_value=lib.no_default, 

1294 ) -> Block: 

1295 """ 

1296 Take values according to indexer and return them as a block. 

1297 """ 

1298 values = self.values 

1299 

1300 if fill_value is lib.no_default: 

1301 fill_value = self.fill_value 

1302 allow_fill = False 

1303 else: 

1304 allow_fill = True 

1305 

1306 # Note: algos.take_nd has upcast logic similar to coerce_to_target_dtype 

1307 new_values = algos.take_nd( 

1308 values, indexer, axis=axis, allow_fill=allow_fill, fill_value=fill_value 

1309 ) 

1310 

1311 # Called from three places in managers, all of which satisfy 

1312 # these assertions 

1313 if isinstance(self, ExtensionBlock): 

1314 # NB: in this case, the 'axis' kwarg will be ignored in the 

1315 # algos.take_nd call above. 

1316 assert not (self.ndim == 1 and new_mgr_locs is None) 

1317 assert not (axis == 0 and new_mgr_locs is None) 

1318 

1319 if new_mgr_locs is None: 

1320 new_mgr_locs = self._mgr_locs 

1321 

1322 if new_values.dtype != self.dtype: 

1323 return self.make_block(new_values, new_mgr_locs) 

1324 else: 

1325 return self.make_block_same_class(new_values, new_mgr_locs) 

1326 

1327 def _unstack( 

1328 self, 

1329 unstacker, 

1330 fill_value, 

1331 new_placement: npt.NDArray[np.intp], 

1332 needs_masking: npt.NDArray[np.bool_], 

1333 ): 

1334 """ 

1335 Return a list of unstacked blocks of self 

1336 

1337 Parameters 

1338 ---------- 

1339 unstacker : reshape._Unstacker 

1340 fill_value : int 

1341 Only used in ExtensionBlock._unstack 

1342 new_placement : np.ndarray[np.intp] 

1343 allow_fill : bool 

1344 needs_masking : np.ndarray[bool] 

1345 

1346 Returns 

1347 ------- 

1348 blocks : list of Block 

1349 New blocks of unstacked values. 

1350 mask : array-like of bool 

1351 The mask of columns of `blocks` we should keep. 

1352 """ 

1353 new_values, mask = unstacker.get_new_values( 

1354 self.values.T, fill_value=fill_value 

1355 ) 

1356 

1357 mask = mask.any(0) 

1358 # TODO: in all tests we have mask.all(); can we rely on that? 

1359 

1360 # Note: these next two lines ensure that 

1361 # mask.sum() == sum(len(nb.mgr_locs) for nb in blocks) 

1362 # which the calling function needs in order to pass verify_integrity=False 

1363 # to the BlockManager constructor 

1364 new_values = new_values.T[mask] 

1365 new_placement = new_placement[mask] 

1366 

1367 bp = BlockPlacement(new_placement) 

1368 blocks = [new_block_2d(new_values, placement=bp)] 

1369 return blocks, mask 

1370 

1371 # --------------------------------------------------------------------- 

1372 

1373 def setitem(self, indexer, value, using_cow: bool = False) -> Block: 

1374 """ 

1375 Attempt self.values[indexer] = value, possibly creating a new array. 

1376 

1377 Parameters 

1378 ---------- 

1379 indexer : tuple, list-like, array-like, slice, int 

1380 The subset of self.values to set 

1381 value : object 

1382 The value being set 

1383 using_cow: bool, default False 

1384 Signaling if CoW is used. 

1385 

1386 Returns 

1387 ------- 

1388 Block 

1389 

1390 Notes 

1391 ----- 

1392 `indexer` is a direct slice/positional indexer. `value` must 

1393 be a compatible shape. 

1394 """ 

1395 

1396 value = self._standardize_fill_value(value) 

1397 

1398 values = cast(np.ndarray, self.values) 

1399 if self.ndim == 2: 

1400 values = values.T 

1401 

1402 # length checking 

1403 check_setitem_lengths(indexer, value, values) 

1404 

1405 if self.dtype != _dtype_obj: 

1406 # GH48933: extract_array would convert a pd.Series value to np.ndarray 

1407 value = extract_array(value, extract_numpy=True) 

1408 try: 

1409 casted = np_can_hold_element(values.dtype, value) 

1410 except LossySetitemError: 

1411 # current dtype cannot store value, coerce to common dtype 

1412 nb = self.coerce_to_target_dtype(value, warn_on_upcast=True) 

1413 return nb.setitem(indexer, value) 

1414 else: 

1415 if self.dtype == _dtype_obj: 

1416 # TODO: avoid having to construct values[indexer] 

1417 vi = values[indexer] 

1418 if lib.is_list_like(vi): 

1419 # checking lib.is_scalar here fails on 

1420 # test_iloc_setitem_custom_object 

1421 casted = setitem_datetimelike_compat(values, len(vi), casted) 

1422 

1423 self = self._maybe_copy(using_cow, inplace=True) 

1424 values = cast(np.ndarray, self.values.T) 

1425 if isinstance(casted, np.ndarray) and casted.ndim == 1 and len(casted) == 1: 

1426 # NumPy 1.25 deprecation: https://github.com/numpy/numpy/pull/10615 

1427 casted = casted[0, ...] 

1428 try: 

1429 values[indexer] = casted 

1430 except (TypeError, ValueError) as err: 

1431 if is_list_like(casted): 

1432 raise ValueError( 

1433 "setting an array element with a sequence." 

1434 ) from err 

1435 raise 

1436 return self 

1437 

1438 def putmask( 

1439 self, mask, new, using_cow: bool = False, already_warned=None 

1440 ) -> list[Block]: 

1441 """ 

1442 putmask the data to the block; it is possible that we may create a 

1443 new dtype of block 

1444 

1445 Return the resulting block(s). 

1446 

1447 Parameters 

1448 ---------- 

1449 mask : np.ndarray[bool], SparseArray[bool], or BooleanArray 

1450 new : a ndarray/object 

1451 using_cow: bool, default False 

1452 

1453 Returns 

1454 ------- 

1455 List[Block] 

1456 """ 

1457 orig_mask = mask 

1458 values = cast(np.ndarray, self.values) 

1459 mask, noop = validate_putmask(values.T, mask) 

1460 assert not isinstance(new, (ABCIndex, ABCSeries, ABCDataFrame)) 

1461 

1462 if new is lib.no_default: 

1463 new = self.fill_value 

1464 

1465 new = self._standardize_fill_value(new) 

1466 new = extract_array(new, extract_numpy=True) 

1467 

1468 if noop: 

1469 if using_cow: 

1470 return [self.copy(deep=False)] 

1471 return [self] 

1472 

1473 if ( 

1474 warn_copy_on_write() 

1475 and already_warned is not None 

1476 and not already_warned.warned_already 

1477 ): 

1478 if self.refs.has_reference(): 

1479 warnings.warn( 

1480 COW_WARNING_GENERAL_MSG, 

1481 FutureWarning, 

1482 stacklevel=find_stack_level(), 

1483 ) 

1484 already_warned.warned_already = True 

1485 

1486 try: 

1487 casted = np_can_hold_element(values.dtype, new) 

1488 

1489 self = self._maybe_copy(using_cow, inplace=True) 

1490 values = cast(np.ndarray, self.values) 

1491 

1492 putmask_without_repeat(values.T, mask, casted) 

1493 return [self] 

1494 except LossySetitemError: 

1495 if self.ndim == 1 or self.shape[0] == 1: 

1496 # no need to split columns 

1497 

1498 if not is_list_like(new): 

1499 # using just new[indexer] can't save us the need to cast 

1500 return self.coerce_to_target_dtype( 

1501 new, warn_on_upcast=True 

1502 ).putmask(mask, new) 

1503 else: 

1504 indexer = mask.nonzero()[0] 

1505 nb = self.setitem(indexer, new[indexer], using_cow=using_cow) 

1506 return [nb] 

1507 

1508 else: 

1509 is_array = isinstance(new, np.ndarray) 

1510 

1511 res_blocks = [] 

1512 nbs = self._split() 

1513 for i, nb in enumerate(nbs): 

1514 n = new 

1515 if is_array: 

1516 # we have a different value per-column 

1517 n = new[:, i : i + 1] 

1518 

1519 submask = orig_mask[:, i : i + 1] 

1520 rbs = nb.putmask(submask, n, using_cow=using_cow) 

1521 res_blocks.extend(rbs) 

1522 return res_blocks 

1523 

1524 def where( 

1525 self, other, cond, _downcast: str | bool = "infer", using_cow: bool = False 

1526 ) -> list[Block]: 

1527 """ 

1528 evaluate the block; return result block(s) from the result 

1529 

1530 Parameters 

1531 ---------- 

1532 other : a ndarray/object 

1533 cond : np.ndarray[bool], SparseArray[bool], or BooleanArray 

1534 _downcast : str or None, default "infer" 

1535 Private because we only specify it when calling from fillna. 

1536 

1537 Returns 

1538 ------- 

1539 List[Block] 

1540 """ 

1541 assert cond.ndim == self.ndim 

1542 assert not isinstance(other, (ABCIndex, ABCSeries, ABCDataFrame)) 

1543 

1544 transpose = self.ndim == 2 

1545 

1546 cond = extract_bool_array(cond) 

1547 

1548 # EABlocks override where 

1549 values = cast(np.ndarray, self.values) 

1550 orig_other = other 

1551 if transpose: 

1552 values = values.T 

1553 

1554 icond, noop = validate_putmask(values, ~cond) 

1555 if noop: 

1556 # GH-39595: Always return a copy; short-circuit up/downcasting 

1557 if using_cow: 

1558 return [self.copy(deep=False)] 

1559 return [self.copy()] 

1560 

1561 if other is lib.no_default: 

1562 other = self.fill_value 

1563 

1564 other = self._standardize_fill_value(other) 

1565 

1566 try: 

1567 # try/except here is equivalent to a self._can_hold_element check, 

1568 # but this gets us back 'casted' which we will reuse below; 

1569 # without using 'casted', expressions.where may do unwanted upcasts. 

1570 casted = np_can_hold_element(values.dtype, other) 

1571 except (ValueError, TypeError, LossySetitemError): 

1572 # we cannot coerce, return a compat dtype 

1573 

1574 if self.ndim == 1 or self.shape[0] == 1: 

1575 # no need to split columns 

1576 

1577 block = self.coerce_to_target_dtype(other) 

1578 blocks = block.where(orig_other, cond, using_cow=using_cow) 

1579 return self._maybe_downcast( 

1580 blocks, downcast=_downcast, using_cow=using_cow, caller="where" 

1581 ) 

1582 

1583 else: 

1584 # since _maybe_downcast would split blocks anyway, we 

1585 # can avoid some potential upcast/downcast by splitting 

1586 # on the front end. 

1587 is_array = isinstance(other, (np.ndarray, ExtensionArray)) 

1588 

1589 res_blocks = [] 

1590 nbs = self._split() 

1591 for i, nb in enumerate(nbs): 

1592 oth = other 

1593 if is_array: 

1594 # we have a different value per-column 

1595 oth = other[:, i : i + 1] 

1596 

1597 submask = cond[:, i : i + 1] 

1598 rbs = nb.where( 

1599 oth, submask, _downcast=_downcast, using_cow=using_cow 

1600 ) 

1601 res_blocks.extend(rbs) 

1602 return res_blocks 

1603 

1604 else: 

1605 other = casted 

1606 alt = setitem_datetimelike_compat(values, icond.sum(), other) 

1607 if alt is not other: 

1608 if is_list_like(other) and len(other) < len(values): 

1609 # call np.where with other to get the appropriate ValueError 

1610 np.where(~icond, values, other) 

1611 raise NotImplementedError( 

1612 "This should not be reached; call to np.where above is " 

1613 "expected to raise ValueError. Please report a bug at " 

1614 "github.com/pandas-dev/pandas" 

1615 ) 

1616 result = values.copy() 

1617 np.putmask(result, icond, alt) 

1618 else: 

1619 # By the time we get here, we should have all Series/Index 

1620 # args extracted to ndarray 

1621 if ( 

1622 is_list_like(other) 

1623 and not isinstance(other, np.ndarray) 

1624 and len(other) == self.shape[-1] 

1625 ): 

1626 # If we don't do this broadcasting here, then expressions.where 

1627 # will broadcast a 1D other to be row-like instead of 

1628 # column-like. 

1629 other = np.array(other).reshape(values.shape) 

1630 # If lengths don't match (or len(other)==1), we will raise 

1631 # inside expressions.where, see test_series_where 

1632 

1633 # Note: expressions.where may upcast. 

1634 result = expressions.where(~icond, values, other) 

1635 # The np_can_hold_element check _should_ ensure that we always 

1636 # have result.dtype == self.dtype here. 

1637 

1638 if transpose: 

1639 result = result.T 

1640 

1641 return [self.make_block(result)] 

1642 

1643 def fillna( 

1644 self, 

1645 value, 

1646 limit: int | None = None, 

1647 inplace: bool = False, 

1648 downcast=None, 

1649 using_cow: bool = False, 

1650 already_warned=None, 

1651 ) -> list[Block]: 

1652 """ 

1653 fillna on the block with the value. If we fail, then convert to 

1654 block to hold objects instead and try again 

1655 """ 

1656 # Caller is responsible for validating limit; if int it is strictly positive 

1657 inplace = validate_bool_kwarg(inplace, "inplace") 

1658 

1659 if not self._can_hold_na: 

1660 # can short-circuit the isna call 

1661 noop = True 

1662 else: 

1663 mask = isna(self.values) 

1664 mask, noop = validate_putmask(self.values, mask) 

1665 

1666 if noop: 

1667 # we can't process the value, but nothing to do 

1668 if inplace: 

1669 if using_cow: 

1670 return [self.copy(deep=False)] 

1671 # Arbitrarily imposing the convention that we ignore downcast 

1672 # on no-op when inplace=True 

1673 return [self] 

1674 else: 

1675 # GH#45423 consistent downcasting on no-ops. 

1676 nb = self.copy(deep=not using_cow) 

1677 nbs = nb._maybe_downcast( 

1678 [nb], downcast=downcast, using_cow=using_cow, caller="fillna" 

1679 ) 

1680 return nbs 

1681 

1682 if limit is not None: 

1683 mask[mask.cumsum(self.ndim - 1) > limit] = False 

1684 

1685 if inplace: 

1686 nbs = self.putmask( 

1687 mask.T, value, using_cow=using_cow, already_warned=already_warned 

1688 ) 

1689 else: 

1690 # without _downcast, we would break 

1691 # test_fillna_dtype_conversion_equiv_replace 

1692 nbs = self.where(value, ~mask.T, _downcast=False) 

1693 

1694 # Note: blk._maybe_downcast vs self._maybe_downcast(nbs) 

1695 # makes a difference bc blk may have object dtype, which has 

1696 # different behavior in _maybe_downcast. 

1697 return extend_blocks( 

1698 [ 

1699 blk._maybe_downcast( 

1700 [blk], downcast=downcast, using_cow=using_cow, caller="fillna" 

1701 ) 

1702 for blk in nbs 

1703 ] 

1704 ) 

1705 

1706 def pad_or_backfill( 

1707 self, 

1708 *, 

1709 method: FillnaOptions, 

1710 axis: AxisInt = 0, 

1711 inplace: bool = False, 

1712 limit: int | None = None, 

1713 limit_area: Literal["inside", "outside"] | None = None, 

1714 downcast: Literal["infer"] | None = None, 

1715 using_cow: bool = False, 

1716 already_warned=None, 

1717 ) -> list[Block]: 

1718 if not self._can_hold_na: 

1719 # If there are no NAs, then interpolate is a no-op 

1720 if using_cow: 

1721 return [self.copy(deep=False)] 

1722 return [self] if inplace else [self.copy()] 

1723 

1724 copy, refs = self._get_refs_and_copy(using_cow, inplace) 

1725 

1726 # Dispatch to the NumpyExtensionArray method. 

1727 # We know self.array_values is a NumpyExtensionArray bc EABlock overrides 

1728 vals = cast(NumpyExtensionArray, self.array_values) 

1729 if axis == 1: 

1730 vals = vals.T 

1731 new_values = vals._pad_or_backfill( 

1732 method=method, 

1733 limit=limit, 

1734 limit_area=limit_area, 

1735 copy=copy, 

1736 ) 

1737 if ( 

1738 not copy 

1739 and warn_copy_on_write() 

1740 and already_warned is not None 

1741 and not already_warned.warned_already 

1742 ): 

1743 if self.refs.has_reference(): 

1744 warnings.warn( 

1745 COW_WARNING_GENERAL_MSG, 

1746 FutureWarning, 

1747 stacklevel=find_stack_level(), 

1748 ) 

1749 already_warned.warned_already = True 

1750 if axis == 1: 

1751 new_values = new_values.T 

1752 

1753 data = extract_array(new_values, extract_numpy=True) 

1754 

1755 nb = self.make_block_same_class(data, refs=refs) 

1756 return nb._maybe_downcast([nb], downcast, using_cow, caller="fillna") 

1757 

1758 @final 

1759 def interpolate( 

1760 self, 

1761 *, 

1762 method: InterpolateOptions, 

1763 index: Index, 

1764 inplace: bool = False, 

1765 limit: int | None = None, 

1766 limit_direction: Literal["forward", "backward", "both"] = "forward", 

1767 limit_area: Literal["inside", "outside"] | None = None, 

1768 downcast: Literal["infer"] | None = None, 

1769 using_cow: bool = False, 

1770 already_warned=None, 

1771 **kwargs, 

1772 ) -> list[Block]: 

1773 inplace = validate_bool_kwarg(inplace, "inplace") 

1774 # error: Non-overlapping equality check [...] 

1775 if method == "asfreq": # type: ignore[comparison-overlap] 

1776 # clean_fill_method used to allow this 

1777 missing.clean_fill_method(method) 

1778 

1779 if not self._can_hold_na: 

1780 # If there are no NAs, then interpolate is a no-op 

1781 if using_cow: 

1782 return [self.copy(deep=False)] 

1783 return [self] if inplace else [self.copy()] 

1784 

1785 # TODO(3.0): this case will not be reachable once GH#53638 is enforced 

1786 if self.dtype == _dtype_obj: 

1787 # only deal with floats 

1788 # bc we already checked that can_hold_na, we don't have int dtype here 

1789 # test_interp_basic checks that we make a copy here 

1790 if using_cow: 

1791 return [self.copy(deep=False)] 

1792 return [self] if inplace else [self.copy()] 

1793 

1794 copy, refs = self._get_refs_and_copy(using_cow, inplace) 

1795 

1796 # Dispatch to the EA method. 

1797 new_values = self.array_values.interpolate( 

1798 method=method, 

1799 axis=self.ndim - 1, 

1800 index=index, 

1801 limit=limit, 

1802 limit_direction=limit_direction, 

1803 limit_area=limit_area, 

1804 copy=copy, 

1805 **kwargs, 

1806 ) 

1807 data = extract_array(new_values, extract_numpy=True) 

1808 

1809 if ( 

1810 not copy 

1811 and warn_copy_on_write() 

1812 and already_warned is not None 

1813 and not already_warned.warned_already 

1814 ): 

1815 if self.refs.has_reference(): 

1816 warnings.warn( 

1817 COW_WARNING_GENERAL_MSG, 

1818 FutureWarning, 

1819 stacklevel=find_stack_level(), 

1820 ) 

1821 already_warned.warned_already = True 

1822 

1823 nb = self.make_block_same_class(data, refs=refs) 

1824 return nb._maybe_downcast([nb], downcast, using_cow, caller="interpolate") 

1825 

1826 @final 

1827 def diff(self, n: int) -> list[Block]: 

1828 """return block for the diff of the values""" 

1829 # only reached with ndim == 2 

1830 # TODO(EA2D): transpose will be unnecessary with 2D EAs 

1831 new_values = algos.diff(self.values.T, n, axis=0).T 

1832 return [self.make_block(values=new_values)] 

1833 

1834 def shift(self, periods: int, fill_value: Any = None) -> list[Block]: 

1835 """shift the block by periods, possibly upcast""" 

1836 # convert integer to float if necessary. need to do a lot more than 

1837 # that, handle boolean etc also 

1838 axis = self.ndim - 1 

1839 

1840 # Note: periods is never 0 here, as that is handled at the top of 

1841 # NDFrame.shift. If that ever changes, we can do a check for periods=0 

1842 # and possibly avoid coercing. 

1843 

1844 if not lib.is_scalar(fill_value) and self.dtype != _dtype_obj: 

1845 # with object dtype there is nothing to promote, and the user can 

1846 # pass pretty much any weird fill_value they like 

1847 # see test_shift_object_non_scalar_fill 

1848 raise ValueError("fill_value must be a scalar") 

1849 

1850 fill_value = self._standardize_fill_value(fill_value) 

1851 

1852 try: 

1853 # error: Argument 1 to "np_can_hold_element" has incompatible type 

1854 # "Union[dtype[Any], ExtensionDtype]"; expected "dtype[Any]" 

1855 casted = np_can_hold_element( 

1856 self.dtype, fill_value # type: ignore[arg-type] 

1857 ) 

1858 except LossySetitemError: 

1859 nb = self.coerce_to_target_dtype(fill_value) 

1860 return nb.shift(periods, fill_value=fill_value) 

1861 

1862 else: 

1863 values = cast(np.ndarray, self.values) 

1864 new_values = shift(values, periods, axis, casted) 

1865 return [self.make_block_same_class(new_values)] 

1866 

1867 @final 

1868 def quantile( 

1869 self, 

1870 qs: Index, # with dtype float64 

1871 interpolation: QuantileInterpolation = "linear", 

1872 ) -> Block: 

1873 """ 

1874 compute the quantiles of the 

1875 

1876 Parameters 

1877 ---------- 

1878 qs : Index 

1879 The quantiles to be computed in float64. 

1880 interpolation : str, default 'linear' 

1881 Type of interpolation. 

1882 

1883 Returns 

1884 ------- 

1885 Block 

1886 """ 

1887 # We should always have ndim == 2 because Series dispatches to DataFrame 

1888 assert self.ndim == 2 

1889 assert is_list_like(qs) # caller is responsible for this 

1890 

1891 result = quantile_compat(self.values, np.asarray(qs._values), interpolation) 

1892 # ensure_block_shape needed for cases where we start with EA and result 

1893 # is ndarray, e.g. IntegerArray, SparseArray 

1894 result = ensure_block_shape(result, ndim=2) 

1895 return new_block_2d(result, placement=self._mgr_locs) 

1896 

1897 @final 

1898 def round(self, decimals: int, using_cow: bool = False) -> Self: 

1899 """ 

1900 Rounds the values. 

1901 If the block is not of an integer or float dtype, nothing happens. 

1902 This is consistent with DataFrame.round behavivor. 

1903 (Note: Series.round would raise) 

1904 

1905 Parameters 

1906 ---------- 

1907 decimals: int, 

1908 Number of decimal places to round to. 

1909 Caller is responsible for validating this 

1910 using_cow: bool, 

1911 Whether Copy on Write is enabled right now 

1912 """ 

1913 if not self.is_numeric or self.is_bool: 

1914 return self.copy(deep=not using_cow) 

1915 refs = None 

1916 # TODO: round only defined on BaseMaskedArray 

1917 # Series also does this, so would need to fix both places 

1918 # error: Item "ExtensionArray" of "Union[ndarray[Any, Any], ExtensionArray]" 

1919 # has no attribute "round" 

1920 values = self.values.round(decimals) # type: ignore[union-attr] 

1921 if values is self.values: 

1922 if not using_cow: 

1923 # Normally would need to do this before, but 

1924 # numpy only returns same array when round operation 

1925 # is no-op 

1926 # https://github.com/numpy/numpy/blob/486878b37fc7439a3b2b87747f50db9b62fea8eb/numpy/core/src/multiarray/calculation.c#L625-L636 

1927 values = values.copy() 

1928 else: 

1929 refs = self.refs 

1930 return self.make_block_same_class(values, refs=refs) 

1931 

1932 # --------------------------------------------------------------------- 

1933 # Abstract Methods Overridden By EABackedBlock and NumpyBlock 

1934 

1935 def delete(self, loc) -> list[Block]: 

1936 """Deletes the locs from the block. 

1937 

1938 We split the block to avoid copying the underlying data. We create new 

1939 blocks for every connected segment of the initial block that is not deleted. 

1940 The new blocks point to the initial array. 

1941 """ 

1942 if not is_list_like(loc): 

1943 loc = [loc] 

1944 

1945 if self.ndim == 1: 

1946 values = cast(np.ndarray, self.values) 

1947 values = np.delete(values, loc) 

1948 mgr_locs = self._mgr_locs.delete(loc) 

1949 return [type(self)(values, placement=mgr_locs, ndim=self.ndim)] 

1950 

1951 if np.max(loc) >= self.values.shape[0]: 

1952 raise IndexError 

1953 

1954 # Add one out-of-bounds indexer as maximum to collect 

1955 # all columns after our last indexer if any 

1956 loc = np.concatenate([loc, [self.values.shape[0]]]) 

1957 mgr_locs_arr = self._mgr_locs.as_array 

1958 new_blocks: list[Block] = [] 

1959 

1960 previous_loc = -1 

1961 # TODO(CoW): This is tricky, if parent block goes out of scope 

1962 # all split blocks are referencing each other even though they 

1963 # don't share data 

1964 refs = self.refs if self.refs.has_reference() else None 

1965 for idx in loc: 

1966 if idx == previous_loc + 1: 

1967 # There is no column between current and last idx 

1968 pass 

1969 else: 

1970 # No overload variant of "__getitem__" of "ExtensionArray" matches 

1971 # argument type "Tuple[slice, slice]" 

1972 values = self.values[previous_loc + 1 : idx, :] # type: ignore[call-overload] 

1973 locs = mgr_locs_arr[previous_loc + 1 : idx] 

1974 nb = type(self)( 

1975 values, placement=BlockPlacement(locs), ndim=self.ndim, refs=refs 

1976 ) 

1977 new_blocks.append(nb) 

1978 

1979 previous_loc = idx 

1980 

1981 return new_blocks 

1982 

1983 @property 

1984 def is_view(self) -> bool: 

1985 """return a boolean if I am possibly a view""" 

1986 raise AbstractMethodError(self) 

1987 

1988 @property 

1989 def array_values(self) -> ExtensionArray: 

1990 """ 

1991 The array that Series.array returns. Always an ExtensionArray. 

1992 """ 

1993 raise AbstractMethodError(self) 

1994 

1995 def get_values(self, dtype: DtypeObj | None = None) -> np.ndarray: 

1996 """ 

1997 return an internal format, currently just the ndarray 

1998 this is often overridden to handle to_dense like operations 

1999 """ 

2000 raise AbstractMethodError(self) 

2001 

2002 

2003class EABackedBlock(Block): 

2004 """ 

2005 Mixin for Block subclasses backed by ExtensionArray. 

2006 """ 

2007 

2008 values: ExtensionArray 

2009 

2010 @final 

2011 def shift(self, periods: int, fill_value: Any = None) -> list[Block]: 

2012 """ 

2013 Shift the block by `periods`. 

2014 

2015 Dispatches to underlying ExtensionArray and re-boxes in an 

2016 ExtensionBlock. 

2017 """ 

2018 # Transpose since EA.shift is always along axis=0, while we want to shift 

2019 # along rows. 

2020 new_values = self.values.T.shift(periods=periods, fill_value=fill_value).T 

2021 return [self.make_block_same_class(new_values)] 

2022 

2023 @final 

2024 def setitem(self, indexer, value, using_cow: bool = False): 

2025 """ 

2026 Attempt self.values[indexer] = value, possibly creating a new array. 

2027 

2028 This differs from Block.setitem by not allowing setitem to change 

2029 the dtype of the Block. 

2030 

2031 Parameters 

2032 ---------- 

2033 indexer : tuple, list-like, array-like, slice, int 

2034 The subset of self.values to set 

2035 value : object 

2036 The value being set 

2037 using_cow: bool, default False 

2038 Signaling if CoW is used. 

2039 

2040 Returns 

2041 ------- 

2042 Block 

2043 

2044 Notes 

2045 ----- 

2046 `indexer` is a direct slice/positional indexer. `value` must 

2047 be a compatible shape. 

2048 """ 

2049 orig_indexer = indexer 

2050 orig_value = value 

2051 

2052 indexer = self._unwrap_setitem_indexer(indexer) 

2053 value = self._maybe_squeeze_arg(value) 

2054 

2055 values = self.values 

2056 if values.ndim == 2: 

2057 # TODO(GH#45419): string[pyarrow] tests break if we transpose 

2058 # unconditionally 

2059 values = values.T 

2060 check_setitem_lengths(indexer, value, values) 

2061 

2062 try: 

2063 values[indexer] = value 

2064 except (ValueError, TypeError): 

2065 if isinstance(self.dtype, IntervalDtype): 

2066 # see TestSetitemFloatIntervalWithIntIntervalValues 

2067 nb = self.coerce_to_target_dtype(orig_value, warn_on_upcast=True) 

2068 return nb.setitem(orig_indexer, orig_value) 

2069 

2070 elif isinstance(self, NDArrayBackedExtensionBlock): 

2071 nb = self.coerce_to_target_dtype(orig_value, warn_on_upcast=True) 

2072 return nb.setitem(orig_indexer, orig_value) 

2073 

2074 else: 

2075 raise 

2076 

2077 else: 

2078 return self 

2079 

2080 @final 

2081 def where( 

2082 self, other, cond, _downcast: str | bool = "infer", using_cow: bool = False 

2083 ) -> list[Block]: 

2084 # _downcast private bc we only specify it when calling from fillna 

2085 arr = self.values.T 

2086 

2087 cond = extract_bool_array(cond) 

2088 

2089 orig_other = other 

2090 orig_cond = cond 

2091 other = self._maybe_squeeze_arg(other) 

2092 cond = self._maybe_squeeze_arg(cond) 

2093 

2094 if other is lib.no_default: 

2095 other = self.fill_value 

2096 

2097 icond, noop = validate_putmask(arr, ~cond) 

2098 if noop: 

2099 # GH#44181, GH#45135 

2100 # Avoid a) raising for Interval/PeriodDtype and b) unnecessary object upcast 

2101 if using_cow: 

2102 return [self.copy(deep=False)] 

2103 return [self.copy()] 

2104 

2105 try: 

2106 res_values = arr._where(cond, other).T 

2107 except (ValueError, TypeError): 

2108 if self.ndim == 1 or self.shape[0] == 1: 

2109 if isinstance(self.dtype, IntervalDtype): 

2110 # TestSetitemFloatIntervalWithIntIntervalValues 

2111 blk = self.coerce_to_target_dtype(orig_other) 

2112 nbs = blk.where(orig_other, orig_cond, using_cow=using_cow) 

2113 return self._maybe_downcast( 

2114 nbs, downcast=_downcast, using_cow=using_cow, caller="where" 

2115 ) 

2116 

2117 elif isinstance(self, NDArrayBackedExtensionBlock): 

2118 # NB: not (yet) the same as 

2119 # isinstance(values, NDArrayBackedExtensionArray) 

2120 blk = self.coerce_to_target_dtype(orig_other) 

2121 nbs = blk.where(orig_other, orig_cond, using_cow=using_cow) 

2122 return self._maybe_downcast( 

2123 nbs, downcast=_downcast, using_cow=using_cow, caller="where" 

2124 ) 

2125 

2126 else: 

2127 raise 

2128 

2129 else: 

2130 # Same pattern we use in Block.putmask 

2131 is_array = isinstance(orig_other, (np.ndarray, ExtensionArray)) 

2132 

2133 res_blocks = [] 

2134 nbs = self._split() 

2135 for i, nb in enumerate(nbs): 

2136 n = orig_other 

2137 if is_array: 

2138 # we have a different value per-column 

2139 n = orig_other[:, i : i + 1] 

2140 

2141 submask = orig_cond[:, i : i + 1] 

2142 rbs = nb.where(n, submask, using_cow=using_cow) 

2143 res_blocks.extend(rbs) 

2144 return res_blocks 

2145 

2146 nb = self.make_block_same_class(res_values) 

2147 return [nb] 

2148 

2149 @final 

2150 def putmask( 

2151 self, mask, new, using_cow: bool = False, already_warned=None 

2152 ) -> list[Block]: 

2153 """ 

2154 See Block.putmask.__doc__ 

2155 """ 

2156 mask = extract_bool_array(mask) 

2157 if new is lib.no_default: 

2158 new = self.fill_value 

2159 

2160 orig_new = new 

2161 orig_mask = mask 

2162 new = self._maybe_squeeze_arg(new) 

2163 mask = self._maybe_squeeze_arg(mask) 

2164 

2165 if not mask.any(): 

2166 if using_cow: 

2167 return [self.copy(deep=False)] 

2168 return [self] 

2169 

2170 if ( 

2171 warn_copy_on_write() 

2172 and already_warned is not None 

2173 and not already_warned.warned_already 

2174 ): 

2175 if self.refs.has_reference(): 

2176 warnings.warn( 

2177 COW_WARNING_GENERAL_MSG, 

2178 FutureWarning, 

2179 stacklevel=find_stack_level(), 

2180 ) 

2181 already_warned.warned_already = True 

2182 

2183 self = self._maybe_copy(using_cow, inplace=True) 

2184 values = self.values 

2185 if values.ndim == 2: 

2186 values = values.T 

2187 

2188 try: 

2189 # Caller is responsible for ensuring matching lengths 

2190 values._putmask(mask, new) 

2191 except (TypeError, ValueError): 

2192 if self.ndim == 1 or self.shape[0] == 1: 

2193 if isinstance(self.dtype, IntervalDtype): 

2194 # Discussion about what we want to support in the general 

2195 # case GH#39584 

2196 blk = self.coerce_to_target_dtype(orig_new, warn_on_upcast=True) 

2197 return blk.putmask(orig_mask, orig_new) 

2198 

2199 elif isinstance(self, NDArrayBackedExtensionBlock): 

2200 # NB: not (yet) the same as 

2201 # isinstance(values, NDArrayBackedExtensionArray) 

2202 blk = self.coerce_to_target_dtype(orig_new, warn_on_upcast=True) 

2203 return blk.putmask(orig_mask, orig_new) 

2204 

2205 else: 

2206 raise 

2207 

2208 else: 

2209 # Same pattern we use in Block.putmask 

2210 is_array = isinstance(orig_new, (np.ndarray, ExtensionArray)) 

2211 

2212 res_blocks = [] 

2213 nbs = self._split() 

2214 for i, nb in enumerate(nbs): 

2215 n = orig_new 

2216 if is_array: 

2217 # we have a different value per-column 

2218 n = orig_new[:, i : i + 1] 

2219 

2220 submask = orig_mask[:, i : i + 1] 

2221 rbs = nb.putmask(submask, n) 

2222 res_blocks.extend(rbs) 

2223 return res_blocks 

2224 

2225 return [self] 

2226 

2227 @final 

2228 def delete(self, loc) -> list[Block]: 

2229 # This will be unnecessary if/when __array_function__ is implemented 

2230 if self.ndim == 1: 

2231 values = self.values.delete(loc) 

2232 mgr_locs = self._mgr_locs.delete(loc) 

2233 return [type(self)(values, placement=mgr_locs, ndim=self.ndim)] 

2234 elif self.values.ndim == 1: 

2235 # We get here through to_stata 

2236 return [] 

2237 return super().delete(loc) 

2238 

2239 @final 

2240 @cache_readonly 

2241 def array_values(self) -> ExtensionArray: 

2242 return self.values 

2243 

2244 @final 

2245 def get_values(self, dtype: DtypeObj | None = None) -> np.ndarray: 

2246 """ 

2247 return object dtype as boxed values, such as Timestamps/Timedelta 

2248 """ 

2249 values: ArrayLike = self.values 

2250 if dtype == _dtype_obj: 

2251 values = values.astype(object) 

2252 # TODO(EA2D): reshape not needed with 2D EAs 

2253 return np.asarray(values).reshape(self.shape) 

2254 

2255 @final 

2256 def pad_or_backfill( 

2257 self, 

2258 *, 

2259 method: FillnaOptions, 

2260 axis: AxisInt = 0, 

2261 inplace: bool = False, 

2262 limit: int | None = None, 

2263 limit_area: Literal["inside", "outside"] | None = None, 

2264 downcast: Literal["infer"] | None = None, 

2265 using_cow: bool = False, 

2266 already_warned=None, 

2267 ) -> list[Block]: 

2268 values = self.values 

2269 

2270 kwargs: dict[str, Any] = {"method": method, "limit": limit} 

2271 if "limit_area" in inspect.signature(values._pad_or_backfill).parameters: 

2272 kwargs["limit_area"] = limit_area 

2273 elif limit_area is not None: 

2274 raise NotImplementedError( 

2275 f"{type(values).__name__} does not implement limit_area " 

2276 "(added in pandas 2.2). 3rd-party ExtnsionArray authors " 

2277 "need to add this argument to _pad_or_backfill." 

2278 ) 

2279 

2280 if values.ndim == 2 and axis == 1: 

2281 # NDArrayBackedExtensionArray.fillna assumes axis=0 

2282 new_values = values.T._pad_or_backfill(**kwargs).T 

2283 else: 

2284 new_values = values._pad_or_backfill(**kwargs) 

2285 return [self.make_block_same_class(new_values)] 

2286 

2287 

2288class ExtensionBlock(EABackedBlock): 

2289 """ 

2290 Block for holding extension types. 

2291 

2292 Notes 

2293 ----- 

2294 This holds all 3rd-party extension array types. It's also the immediate 

2295 parent class for our internal extension types' blocks. 

2296 

2297 ExtensionArrays are limited to 1-D. 

2298 """ 

2299 

2300 values: ExtensionArray 

2301 

2302 def fillna( 

2303 self, 

2304 value, 

2305 limit: int | None = None, 

2306 inplace: bool = False, 

2307 downcast=None, 

2308 using_cow: bool = False, 

2309 already_warned=None, 

2310 ) -> list[Block]: 

2311 if isinstance(self.dtype, IntervalDtype): 

2312 # Block.fillna handles coercion (test_fillna_interval) 

2313 return super().fillna( 

2314 value=value, 

2315 limit=limit, 

2316 inplace=inplace, 

2317 downcast=downcast, 

2318 using_cow=using_cow, 

2319 already_warned=already_warned, 

2320 ) 

2321 if using_cow and self._can_hold_na and not self.values._hasna: 

2322 refs = self.refs 

2323 new_values = self.values 

2324 else: 

2325 copy, refs = self._get_refs_and_copy(using_cow, inplace) 

2326 

2327 try: 

2328 new_values = self.values.fillna( 

2329 value=value, method=None, limit=limit, copy=copy 

2330 ) 

2331 except TypeError: 

2332 # 3rd party EA that has not implemented copy keyword yet 

2333 refs = None 

2334 new_values = self.values.fillna(value=value, method=None, limit=limit) 

2335 # issue the warning *after* retrying, in case the TypeError 

2336 # was caused by an invalid fill_value 

2337 warnings.warn( 

2338 # GH#53278 

2339 "ExtensionArray.fillna added a 'copy' keyword in pandas " 

2340 "2.1.0. In a future version, ExtensionArray subclasses will " 

2341 "need to implement this keyword or an exception will be " 

2342 "raised. In the interim, the keyword is ignored by " 

2343 f"{type(self.values).__name__}.", 

2344 DeprecationWarning, 

2345 stacklevel=find_stack_level(), 

2346 ) 

2347 else: 

2348 if ( 

2349 not copy 

2350 and warn_copy_on_write() 

2351 and already_warned is not None 

2352 and not already_warned.warned_already 

2353 ): 

2354 if self.refs.has_reference(): 

2355 warnings.warn( 

2356 COW_WARNING_GENERAL_MSG, 

2357 FutureWarning, 

2358 stacklevel=find_stack_level(), 

2359 ) 

2360 already_warned.warned_already = True 

2361 

2362 nb = self.make_block_same_class(new_values, refs=refs) 

2363 return nb._maybe_downcast([nb], downcast, using_cow=using_cow, caller="fillna") 

2364 

2365 @cache_readonly 

2366 def shape(self) -> Shape: 

2367 # TODO(EA2D): override unnecessary with 2D EAs 

2368 if self.ndim == 1: 

2369 return (len(self.values),) 

2370 return len(self._mgr_locs), len(self.values) 

2371 

2372 def iget(self, i: int | tuple[int, int] | tuple[slice, int]): 

2373 # In the case where we have a tuple[slice, int], the slice will always 

2374 # be slice(None) 

2375 # We _could_ make the annotation more specific, but mypy would 

2376 # complain about override mismatch: 

2377 # Literal[0] | tuple[Literal[0], int] | tuple[slice, int] 

2378 

2379 # Note: only reached with self.ndim == 2 

2380 

2381 if isinstance(i, tuple): 

2382 # TODO(EA2D): unnecessary with 2D EAs 

2383 col, loc = i 

2384 if not com.is_null_slice(col) and col != 0: 

2385 raise IndexError(f"{self} only contains one item") 

2386 if isinstance(col, slice): 

2387 # the is_null_slice check above assures that col is slice(None) 

2388 # so what we want is a view on all our columns and row loc 

2389 if loc < 0: 

2390 loc += len(self.values) 

2391 # Note: loc:loc+1 vs [[loc]] makes a difference when called 

2392 # from fast_xs because we want to get a view back. 

2393 return self.values[loc : loc + 1] 

2394 return self.values[loc] 

2395 else: 

2396 if i != 0: 

2397 raise IndexError(f"{self} only contains one item") 

2398 return self.values 

2399 

2400 def set_inplace(self, locs, values: ArrayLike, copy: bool = False) -> None: 

2401 # When an ndarray, we should have locs.tolist() == [0] 

2402 # When a BlockPlacement we should have list(locs) == [0] 

2403 if copy: 

2404 self.values = self.values.copy() 

2405 self.values[:] = values 

2406 

2407 def _maybe_squeeze_arg(self, arg): 

2408 """ 

2409 If necessary, squeeze a (N, 1) ndarray to (N,) 

2410 """ 

2411 # e.g. if we are passed a 2D mask for putmask 

2412 if ( 

2413 isinstance(arg, (np.ndarray, ExtensionArray)) 

2414 and arg.ndim == self.values.ndim + 1 

2415 ): 

2416 # TODO(EA2D): unnecessary with 2D EAs 

2417 assert arg.shape[1] == 1 

2418 # error: No overload variant of "__getitem__" of "ExtensionArray" 

2419 # matches argument type "Tuple[slice, int]" 

2420 arg = arg[:, 0] # type: ignore[call-overload] 

2421 elif isinstance(arg, ABCDataFrame): 

2422 # 2022-01-06 only reached for setitem 

2423 # TODO: should we avoid getting here with DataFrame? 

2424 assert arg.shape[1] == 1 

2425 arg = arg._ixs(0, axis=1)._values 

2426 

2427 return arg 

2428 

2429 def _unwrap_setitem_indexer(self, indexer): 

2430 """ 

2431 Adapt a 2D-indexer to our 1D values. 

2432 

2433 This is intended for 'setitem', not 'iget' or '_slice'. 

2434 """ 

2435 # TODO: ATM this doesn't work for iget/_slice, can we change that? 

2436 

2437 if isinstance(indexer, tuple) and len(indexer) == 2: 

2438 # TODO(EA2D): not needed with 2D EAs 

2439 # Should never have length > 2. Caller is responsible for checking. 

2440 # Length 1 is reached vis setitem_single_block and setitem_single_column 

2441 # each of which pass indexer=(pi,) 

2442 if all(isinstance(x, np.ndarray) and x.ndim == 2 for x in indexer): 

2443 # GH#44703 went through indexing.maybe_convert_ix 

2444 first, second = indexer 

2445 if not ( 

2446 second.size == 1 and (second == 0).all() and first.shape[1] == 1 

2447 ): 

2448 raise NotImplementedError( 

2449 "This should not be reached. Please report a bug at " 

2450 "github.com/pandas-dev/pandas/" 

2451 ) 

2452 indexer = first[:, 0] 

2453 

2454 elif lib.is_integer(indexer[1]) and indexer[1] == 0: 

2455 # reached via setitem_single_block passing the whole indexer 

2456 indexer = indexer[0] 

2457 

2458 elif com.is_null_slice(indexer[1]): 

2459 indexer = indexer[0] 

2460 

2461 elif is_list_like(indexer[1]) and indexer[1][0] == 0: 

2462 indexer = indexer[0] 

2463 

2464 else: 

2465 raise NotImplementedError( 

2466 "This should not be reached. Please report a bug at " 

2467 "github.com/pandas-dev/pandas/" 

2468 ) 

2469 return indexer 

2470 

2471 @property 

2472 def is_view(self) -> bool: 

2473 """Extension arrays are never treated as views.""" 

2474 return False 

2475 

2476 # error: Cannot override writeable attribute with read-only property 

2477 @cache_readonly 

2478 def is_numeric(self) -> bool: # type: ignore[override] 

2479 return self.values.dtype._is_numeric 

2480 

2481 def _slice( 

2482 self, slicer: slice | npt.NDArray[np.bool_] | npt.NDArray[np.intp] 

2483 ) -> ExtensionArray: 

2484 """ 

2485 Return a slice of my values. 

2486 

2487 Parameters 

2488 ---------- 

2489 slicer : slice, ndarray[int], or ndarray[bool] 

2490 Valid (non-reducing) indexer for self.values. 

2491 

2492 Returns 

2493 ------- 

2494 ExtensionArray 

2495 """ 

2496 # Notes: ndarray[bool] is only reachable when via get_rows_with_mask, which 

2497 # is only for Series, i.e. self.ndim == 1. 

2498 

2499 # return same dims as we currently have 

2500 if self.ndim == 2: 

2501 # reached via getitem_block via _slice_take_blocks_ax0 

2502 # TODO(EA2D): won't be necessary with 2D EAs 

2503 

2504 if not isinstance(slicer, slice): 

2505 raise AssertionError( 

2506 "invalid slicing for a 1-ndim ExtensionArray", slicer 

2507 ) 

2508 # GH#32959 only full-slicers along fake-dim0 are valid 

2509 # TODO(EA2D): won't be necessary with 2D EAs 

2510 # range(1) instead of self._mgr_locs to avoid exception on [::-1] 

2511 # see test_iloc_getitem_slice_negative_step_ea_block 

2512 new_locs = range(1)[slicer] 

2513 if not len(new_locs): 

2514 raise AssertionError( 

2515 "invalid slicing for a 1-ndim ExtensionArray", slicer 

2516 ) 

2517 slicer = slice(None) 

2518 

2519 return self.values[slicer] 

2520 

2521 @final 

2522 def slice_block_rows(self, slicer: slice) -> Self: 

2523 """ 

2524 Perform __getitem__-like specialized to slicing along index. 

2525 """ 

2526 # GH#42787 in principle this is equivalent to values[..., slicer], but we don't 

2527 # require subclasses of ExtensionArray to support that form (for now). 

2528 new_values = self.values[slicer] 

2529 return type(self)(new_values, self._mgr_locs, ndim=self.ndim, refs=self.refs) 

2530 

2531 def _unstack( 

2532 self, 

2533 unstacker, 

2534 fill_value, 

2535 new_placement: npt.NDArray[np.intp], 

2536 needs_masking: npt.NDArray[np.bool_], 

2537 ): 

2538 # ExtensionArray-safe unstack. 

2539 # We override Block._unstack, which unstacks directly on the 

2540 # values of the array. For EA-backed blocks, this would require 

2541 # converting to a 2-D ndarray of objects. 

2542 # Instead, we unstack an ndarray of integer positions, followed by 

2543 # a `take` on the actual values. 

2544 

2545 # Caller is responsible for ensuring self.shape[-1] == len(unstacker.index) 

2546 new_values, mask = unstacker.arange_result 

2547 

2548 # Note: these next two lines ensure that 

2549 # mask.sum() == sum(len(nb.mgr_locs) for nb in blocks) 

2550 # which the calling function needs in order to pass verify_integrity=False 

2551 # to the BlockManager constructor 

2552 new_values = new_values.T[mask] 

2553 new_placement = new_placement[mask] 

2554 

2555 # needs_masking[i] calculated once in BlockManager.unstack tells 

2556 # us if there are any -1s in the relevant indices. When False, 

2557 # that allows us to go through a faster path in 'take', among 

2558 # other things avoiding e.g. Categorical._validate_scalar. 

2559 blocks = [ 

2560 # TODO: could cast to object depending on fill_value? 

2561 type(self)( 

2562 self.values.take( 

2563 indices, allow_fill=needs_masking[i], fill_value=fill_value 

2564 ), 

2565 BlockPlacement(place), 

2566 ndim=2, 

2567 ) 

2568 for i, (indices, place) in enumerate(zip(new_values, new_placement)) 

2569 ] 

2570 return blocks, mask 

2571 

2572 

2573class NumpyBlock(Block): 

2574 values: np.ndarray 

2575 __slots__ = () 

2576 

2577 @property 

2578 def is_view(self) -> bool: 

2579 """return a boolean if I am possibly a view""" 

2580 return self.values.base is not None 

2581 

2582 @property 

2583 def array_values(self) -> ExtensionArray: 

2584 return NumpyExtensionArray(self.values) 

2585 

2586 def get_values(self, dtype: DtypeObj | None = None) -> np.ndarray: 

2587 if dtype == _dtype_obj: 

2588 return self.values.astype(_dtype_obj) 

2589 return self.values 

2590 

2591 @cache_readonly 

2592 def is_numeric(self) -> bool: # type: ignore[override] 

2593 dtype = self.values.dtype 

2594 kind = dtype.kind 

2595 

2596 return kind in "fciub" 

2597 

2598 

2599class NumericBlock(NumpyBlock): 

2600 # this Block type is kept for backwards-compatibility 

2601 # TODO(3.0): delete and remove deprecation in __init__.py. 

2602 __slots__ = () 

2603 

2604 

2605class ObjectBlock(NumpyBlock): 

2606 # this Block type is kept for backwards-compatibility 

2607 # TODO(3.0): delete and remove deprecation in __init__.py. 

2608 __slots__ = () 

2609 

2610 

2611class NDArrayBackedExtensionBlock(EABackedBlock): 

2612 """ 

2613 Block backed by an NDArrayBackedExtensionArray 

2614 """ 

2615 

2616 values: NDArrayBackedExtensionArray 

2617 

2618 @property 

2619 def is_view(self) -> bool: 

2620 """return a boolean if I am possibly a view""" 

2621 # check the ndarray values of the DatetimeIndex values 

2622 return self.values._ndarray.base is not None 

2623 

2624 

2625class DatetimeLikeBlock(NDArrayBackedExtensionBlock): 

2626 """Block for datetime64[ns], timedelta64[ns].""" 

2627 

2628 __slots__ = () 

2629 is_numeric = False 

2630 values: DatetimeArray | TimedeltaArray 

2631 

2632 

2633class DatetimeTZBlock(DatetimeLikeBlock): 

2634 """implement a datetime64 block with a tz attribute""" 

2635 

2636 values: DatetimeArray 

2637 

2638 __slots__ = () 

2639 

2640 

2641# ----------------------------------------------------------------- 

2642# Constructor Helpers 

2643 

2644 

2645def maybe_coerce_values(values: ArrayLike) -> ArrayLike: 

2646 """ 

2647 Input validation for values passed to __init__. Ensure that 

2648 any datetime64/timedelta64 dtypes are in nanoseconds. Ensure 

2649 that we do not have string dtypes. 

2650 

2651 Parameters 

2652 ---------- 

2653 values : np.ndarray or ExtensionArray 

2654 

2655 Returns 

2656 ------- 

2657 values : np.ndarray or ExtensionArray 

2658 """ 

2659 # Caller is responsible for ensuring NumpyExtensionArray is already extracted. 

2660 

2661 if isinstance(values, np.ndarray): 

2662 values = ensure_wrapped_if_datetimelike(values) 

2663 

2664 if issubclass(values.dtype.type, str): 

2665 values = np.array(values, dtype=object) 

2666 

2667 if isinstance(values, (DatetimeArray, TimedeltaArray)) and values.freq is not None: 

2668 # freq is only stored in DatetimeIndex/TimedeltaIndex, not in Series/DataFrame 

2669 values = values._with_freq(None) 

2670 

2671 return values 

2672 

2673 

2674def get_block_type(dtype: DtypeObj) -> type[Block]: 

2675 """ 

2676 Find the appropriate Block subclass to use for the given values and dtype. 

2677 

2678 Parameters 

2679 ---------- 

2680 dtype : numpy or pandas dtype 

2681 

2682 Returns 

2683 ------- 

2684 cls : class, subclass of Block 

2685 """ 

2686 if isinstance(dtype, DatetimeTZDtype): 

2687 return DatetimeTZBlock 

2688 elif isinstance(dtype, PeriodDtype): 

2689 return NDArrayBackedExtensionBlock 

2690 elif isinstance(dtype, ExtensionDtype): 

2691 # Note: need to be sure NumpyExtensionArray is unwrapped before we get here 

2692 return ExtensionBlock 

2693 

2694 # We use kind checks because it is much more performant 

2695 # than is_foo_dtype 

2696 kind = dtype.kind 

2697 if kind in "Mm": 

2698 return DatetimeLikeBlock 

2699 

2700 return NumpyBlock 

2701 

2702 

2703def new_block_2d( 

2704 values: ArrayLike, placement: BlockPlacement, refs: BlockValuesRefs | None = None 

2705): 

2706 # new_block specialized to case with 

2707 # ndim=2 

2708 # isinstance(placement, BlockPlacement) 

2709 # check_ndim/ensure_block_shape already checked 

2710 klass = get_block_type(values.dtype) 

2711 

2712 values = maybe_coerce_values(values) 

2713 return klass(values, ndim=2, placement=placement, refs=refs) 

2714 

2715 

2716def new_block( 

2717 values, 

2718 placement: BlockPlacement, 

2719 *, 

2720 ndim: int, 

2721 refs: BlockValuesRefs | None = None, 

2722) -> Block: 

2723 # caller is responsible for ensuring: 

2724 # - values is NOT a NumpyExtensionArray 

2725 # - check_ndim/ensure_block_shape already checked 

2726 # - maybe_coerce_values already called/unnecessary 

2727 klass = get_block_type(values.dtype) 

2728 return klass(values, ndim=ndim, placement=placement, refs=refs) 

2729 

2730 

2731def check_ndim(values, placement: BlockPlacement, ndim: int) -> None: 

2732 """ 

2733 ndim inference and validation. 

2734 

2735 Validates that values.ndim and ndim are consistent. 

2736 Validates that len(values) and len(placement) are consistent. 

2737 

2738 Parameters 

2739 ---------- 

2740 values : array-like 

2741 placement : BlockPlacement 

2742 ndim : int 

2743 

2744 Raises 

2745 ------ 

2746 ValueError : the number of dimensions do not match 

2747 """ 

2748 

2749 if values.ndim > ndim: 

2750 # Check for both np.ndarray and ExtensionArray 

2751 raise ValueError( 

2752 "Wrong number of dimensions. " 

2753 f"values.ndim > ndim [{values.ndim} > {ndim}]" 

2754 ) 

2755 

2756 if not is_1d_only_ea_dtype(values.dtype): 

2757 # TODO(EA2D): special case not needed with 2D EAs 

2758 if values.ndim != ndim: 

2759 raise ValueError( 

2760 "Wrong number of dimensions. " 

2761 f"values.ndim != ndim [{values.ndim} != {ndim}]" 

2762 ) 

2763 if len(placement) != len(values): 

2764 raise ValueError( 

2765 f"Wrong number of items passed {len(values)}, " 

2766 f"placement implies {len(placement)}" 

2767 ) 

2768 elif ndim == 2 and len(placement) != 1: 

2769 # TODO(EA2D): special case unnecessary with 2D EAs 

2770 raise ValueError("need to split") 

2771 

2772 

2773def extract_pandas_array( 

2774 values: ArrayLike, dtype: DtypeObj | None, ndim: int 

2775) -> tuple[ArrayLike, DtypeObj | None]: 

2776 """ 

2777 Ensure that we don't allow NumpyExtensionArray / NumpyEADtype in internals. 

2778 """ 

2779 # For now, blocks should be backed by ndarrays when possible. 

2780 if isinstance(values, ABCNumpyExtensionArray): 

2781 values = values.to_numpy() 

2782 if ndim and ndim > 1: 

2783 # TODO(EA2D): special case not needed with 2D EAs 

2784 values = np.atleast_2d(values) 

2785 

2786 if isinstance(dtype, NumpyEADtype): 

2787 dtype = dtype.numpy_dtype 

2788 

2789 return values, dtype 

2790 

2791 

2792# ----------------------------------------------------------------- 

2793 

2794 

2795def extend_blocks(result, blocks=None) -> list[Block]: 

2796 """return a new extended blocks, given the result""" 

2797 if blocks is None: 

2798 blocks = [] 

2799 if isinstance(result, list): 

2800 for r in result: 

2801 if isinstance(r, list): 

2802 blocks.extend(r) 

2803 else: 

2804 blocks.append(r) 

2805 else: 

2806 assert isinstance(result, Block), type(result) 

2807 blocks.append(result) 

2808 return blocks 

2809 

2810 

2811def ensure_block_shape(values: ArrayLike, ndim: int = 1) -> ArrayLike: 

2812 """ 

2813 Reshape if possible to have values.ndim == ndim. 

2814 """ 

2815 

2816 if values.ndim < ndim: 

2817 if not is_1d_only_ea_dtype(values.dtype): 

2818 # TODO(EA2D): https://github.com/pandas-dev/pandas/issues/23023 

2819 # block.shape is incorrect for "2D" ExtensionArrays 

2820 # We can't, and don't need to, reshape. 

2821 values = cast("np.ndarray | DatetimeArray | TimedeltaArray", values) 

2822 values = values.reshape(1, -1) 

2823 

2824 return values 

2825 

2826 

2827def external_values(values: ArrayLike) -> ArrayLike: 

2828 """ 

2829 The array that Series.values returns (public attribute). 

2830 

2831 This has some historical constraints, and is overridden in block 

2832 subclasses to return the correct array (e.g. period returns 

2833 object ndarray and datetimetz a datetime64[ns] ndarray instead of 

2834 proper extension array). 

2835 """ 

2836 if isinstance(values, (PeriodArray, IntervalArray)): 

2837 return values.astype(object) 

2838 elif isinstance(values, (DatetimeArray, TimedeltaArray)): 

2839 # NB: for datetime64tz this is different from np.asarray(values), since 

2840 # that returns an object-dtype ndarray of Timestamps. 

2841 # Avoid raising in .astype in casting from dt64tz to dt64 

2842 values = values._ndarray 

2843 

2844 if isinstance(values, np.ndarray) and using_copy_on_write(): 

2845 values = values.view() 

2846 values.flags.writeable = False 

2847 

2848 # TODO(CoW) we should also mark our ExtensionArrays as read-only 

2849 

2850 return values