Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pandas/core/internals/blocks.py: 22%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1041 statements  

1from __future__ import annotations 

2 

3from functools import wraps 

4import re 

5from typing import ( 

6 TYPE_CHECKING, 

7 Any, 

8 Callable, 

9 Iterable, 

10 Sequence, 

11 cast, 

12 final, 

13) 

14 

15import numpy as np 

16 

17from pandas._config import using_copy_on_write 

18 

19from pandas._libs import ( 

20 internals as libinternals, 

21 lib, 

22 writers, 

23) 

24from pandas._libs.internals import ( 

25 BlockPlacement, 

26 BlockValuesRefs, 

27) 

28from pandas._libs.missing import NA 

29from pandas._libs.tslibs import IncompatibleFrequency 

30from pandas._typing import ( 

31 ArrayLike, 

32 AxisInt, 

33 DtypeObj, 

34 F, 

35 FillnaOptions, 

36 IgnoreRaise, 

37 QuantileInterpolation, 

38 Shape, 

39 npt, 

40) 

41from pandas.errors import AbstractMethodError 

42from pandas.util._decorators import cache_readonly 

43from pandas.util._validators import validate_bool_kwarg 

44 

45from pandas.core.dtypes.astype import ( 

46 astype_array_safe, 

47 astype_is_view, 

48) 

49from pandas.core.dtypes.cast import ( 

50 LossySetitemError, 

51 can_hold_element, 

52 find_result_type, 

53 maybe_downcast_to_dtype, 

54 np_can_hold_element, 

55) 

56from pandas.core.dtypes.common import ( 

57 ensure_platform_int, 

58 is_1d_only_ea_dtype, 

59 is_1d_only_ea_obj, 

60 is_dtype_equal, 

61 is_interval_dtype, 

62 is_list_like, 

63 is_sparse, 

64 is_string_dtype, 

65) 

66from pandas.core.dtypes.dtypes import ( 

67 DatetimeTZDtype, 

68 ExtensionDtype, 

69 PandasDtype, 

70 PeriodDtype, 

71) 

72from pandas.core.dtypes.generic import ( 

73 ABCDataFrame, 

74 ABCIndex, 

75 ABCPandasArray, 

76 ABCSeries, 

77) 

78from pandas.core.dtypes.missing import ( 

79 is_valid_na_for_dtype, 

80 isna, 

81 na_value_for_dtype, 

82) 

83 

84from pandas.core import missing 

85import pandas.core.algorithms as algos 

86from pandas.core.array_algos.putmask import ( 

87 extract_bool_array, 

88 putmask_inplace, 

89 putmask_without_repeat, 

90 setitem_datetimelike_compat, 

91 validate_putmask, 

92) 

93from pandas.core.array_algos.quantile import quantile_compat 

94from pandas.core.array_algos.replace import ( 

95 compare_or_regex_search, 

96 replace_regex, 

97 should_use_regex, 

98) 

99from pandas.core.array_algos.transforms import shift 

100from pandas.core.arrays import ( 

101 Categorical, 

102 DatetimeArray, 

103 ExtensionArray, 

104 IntervalArray, 

105 PandasArray, 

106 PeriodArray, 

107 TimedeltaArray, 

108) 

109from pandas.core.arrays.sparse import SparseDtype 

110from pandas.core.base import PandasObject 

111import pandas.core.common as com 

112from pandas.core.computation import expressions 

113from pandas.core.construction import ( 

114 ensure_wrapped_if_datetimelike, 

115 extract_array, 

116) 

117from pandas.core.indexers import check_setitem_lengths 

118 

119if TYPE_CHECKING: 

120 from pandas.core.api import Index 

121 from pandas.core.arrays._mixins import NDArrayBackedExtensionArray 

122 

123# comparison is faster than is_object_dtype 

124_dtype_obj = np.dtype("object") 

125 

126 

127def maybe_split(meth: F) -> F: 

128 """ 

129 If we have a multi-column block, split and operate block-wise. Otherwise 

130 use the original method. 

131 """ 

132 

133 @wraps(meth) 

134 def newfunc(self, *args, **kwargs) -> list[Block]: 

135 if self.ndim == 1 or self.shape[0] == 1: 

136 return meth(self, *args, **kwargs) 

137 else: 

138 # Split and operate column-by-column 

139 return self.split_and_operate(meth, *args, **kwargs) 

140 

141 return cast(F, newfunc) 

142 

143 

144class Block(PandasObject): 

145 """ 

146 Canonical n-dimensional unit of homogeneous dtype contained in a pandas 

147 data structure 

148 

149 Index-ignorant; let the container take care of that 

150 """ 

151 

152 values: np.ndarray | ExtensionArray 

153 ndim: int 

154 refs: BlockValuesRefs 

155 __init__: Callable 

156 

157 __slots__ = () 

158 is_numeric = False 

159 is_object = False 

160 is_extension = False 

161 _can_consolidate = True 

162 _validate_ndim = True 

163 

164 @final 

165 @cache_readonly 

166 def _consolidate_key(self): 

167 return self._can_consolidate, self.dtype.name 

168 

169 @final 

170 @cache_readonly 

171 def _can_hold_na(self) -> bool: 

172 """ 

173 Can we store NA values in this Block? 

174 """ 

175 dtype = self.dtype 

176 if isinstance(dtype, np.dtype): 

177 return dtype.kind not in ["b", "i", "u"] 

178 return dtype._can_hold_na 

179 

180 @final 

181 @property 

182 def is_bool(self) -> bool: 

183 """ 

184 We can be bool if a) we are bool dtype or b) object dtype with bool objects. 

185 """ 

186 return self.values.dtype == np.dtype(bool) 

187 

188 @final 

189 def external_values(self): 

190 return external_values(self.values) 

191 

192 @final 

193 @cache_readonly 

194 def fill_value(self): 

195 # Used in reindex_indexer 

196 return na_value_for_dtype(self.dtype, compat=False) 

197 

198 @final 

199 def _standardize_fill_value(self, value): 

200 # if we are passed a scalar None, convert it here 

201 if self.dtype != _dtype_obj and is_valid_na_for_dtype(value, self.dtype): 

202 value = self.fill_value 

203 return value 

204 

205 @property 

206 def mgr_locs(self) -> BlockPlacement: 

207 return self._mgr_locs 

208 

209 @mgr_locs.setter 

210 def mgr_locs(self, new_mgr_locs: BlockPlacement) -> None: 

211 self._mgr_locs = new_mgr_locs 

212 

213 @final 

214 def make_block( 

215 self, values, placement=None, refs: BlockValuesRefs | None = None 

216 ) -> Block: 

217 """ 

218 Create a new block, with type inference propagate any values that are 

219 not specified 

220 """ 

221 if placement is None: 

222 placement = self._mgr_locs 

223 if self.is_extension: 

224 values = ensure_block_shape(values, ndim=self.ndim) 

225 

226 # TODO: perf by not going through new_block 

227 # We assume maybe_coerce_values has already been called 

228 return new_block(values, placement=placement, ndim=self.ndim, refs=refs) 

229 

230 @final 

231 def make_block_same_class( 

232 self, 

233 values, 

234 placement: BlockPlacement | None = None, 

235 refs: BlockValuesRefs | None = None, 

236 ) -> Block: 

237 """Wrap given values in a block of same type as self.""" 

238 # Pre-2.0 we called ensure_wrapped_if_datetimelike because fastparquet 

239 # relied on it, as of 2.0 the caller is responsible for this. 

240 if placement is None: 

241 placement = self._mgr_locs 

242 

243 # We assume maybe_coerce_values has already been called 

244 return type(self)(values, placement=placement, ndim=self.ndim, refs=refs) 

245 

246 @final 

247 def __repr__(self) -> str: 

248 # don't want to print out all of the items here 

249 name = type(self).__name__ 

250 if self.ndim == 1: 

251 result = f"{name}: {len(self)} dtype: {self.dtype}" 

252 else: 

253 shape = " x ".join([str(s) for s in self.shape]) 

254 result = f"{name}: {self.mgr_locs.indexer}, {shape}, dtype: {self.dtype}" 

255 

256 return result 

257 

258 @final 

259 def __len__(self) -> int: 

260 return len(self.values) 

261 

262 @final 

263 def getitem_block(self, slicer: slice | npt.NDArray[np.intp]) -> Block: 

264 """ 

265 Perform __getitem__-like, return result as block. 

266 

267 Only supports slices that preserve dimensionality. 

268 """ 

269 # Note: the only place where we are called with ndarray[intp] 

270 # is from internals.concat, and we can verify that never happens 

271 # with 1-column blocks, i.e. never for ExtensionBlock. 

272 

273 new_mgr_locs = self._mgr_locs[slicer] 

274 

275 new_values = self._slice(slicer) 

276 refs = self.refs if isinstance(slicer, slice) else None 

277 return type(self)(new_values, new_mgr_locs, self.ndim, refs=refs) 

278 

279 @final 

280 def getitem_block_columns( 

281 self, slicer: slice, new_mgr_locs: BlockPlacement 

282 ) -> Block: 

283 """ 

284 Perform __getitem__-like, return result as block. 

285 

286 Only supports slices that preserve dimensionality. 

287 """ 

288 new_values = self._slice(slicer) 

289 

290 if new_values.ndim != self.values.ndim: 

291 raise ValueError("Only same dim slicing is allowed") 

292 

293 return type(self)(new_values, new_mgr_locs, self.ndim, refs=self.refs) 

294 

295 @final 

296 def _can_hold_element(self, element: Any) -> bool: 

297 """require the same dtype as ourselves""" 

298 element = extract_array(element, extract_numpy=True) 

299 return can_hold_element(self.values, element) 

300 

301 @final 

302 def should_store(self, value: ArrayLike) -> bool: 

303 """ 

304 Should we set self.values[indexer] = value inplace or do we need to cast? 

305 

306 Parameters 

307 ---------- 

308 value : np.ndarray or ExtensionArray 

309 

310 Returns 

311 ------- 

312 bool 

313 """ 

314 # faster equivalent to is_dtype_equal(value.dtype, self.dtype) 

315 try: 

316 return value.dtype == self.dtype 

317 except TypeError: 

318 return False 

319 

320 # --------------------------------------------------------------------- 

321 # Apply/Reduce and Helpers 

322 

323 @final 

324 def apply(self, func, **kwargs) -> list[Block]: 

325 """ 

326 apply the function to my values; return a block if we are not 

327 one 

328 """ 

329 result = func(self.values, **kwargs) 

330 

331 return self._split_op_result(result) 

332 

333 @final 

334 def reduce(self, func) -> list[Block]: 

335 # We will apply the function and reshape the result into a single-row 

336 # Block with the same mgr_locs; squeezing will be done at a higher level 

337 assert self.ndim == 2 

338 

339 result = func(self.values) 

340 

341 if self.values.ndim == 1: 

342 # TODO(EA2D): special case not needed with 2D EAs 

343 res_values = np.array([[result]]) 

344 else: 

345 res_values = result.reshape(-1, 1) 

346 

347 nb = self.make_block(res_values) 

348 return [nb] 

349 

350 @final 

351 def _split_op_result(self, result: ArrayLike) -> list[Block]: 

352 # See also: split_and_operate 

353 if result.ndim > 1 and isinstance(result.dtype, ExtensionDtype): 

354 # TODO(EA2D): unnecessary with 2D EAs 

355 # if we get a 2D ExtensionArray, we need to split it into 1D pieces 

356 nbs = [] 

357 for i, loc in enumerate(self._mgr_locs): 

358 if not is_1d_only_ea_obj(result): 

359 vals = result[i : i + 1] 

360 else: 

361 vals = result[i] 

362 

363 block = self.make_block(values=vals, placement=loc) 

364 nbs.append(block) 

365 return nbs 

366 

367 nb = self.make_block(result) 

368 

369 return [nb] 

370 

371 @final 

372 def _split(self) -> list[Block]: 

373 """ 

374 Split a block into a list of single-column blocks. 

375 """ 

376 assert self.ndim == 2 

377 

378 new_blocks = [] 

379 for i, ref_loc in enumerate(self._mgr_locs): 

380 vals = self.values[slice(i, i + 1)] 

381 

382 bp = BlockPlacement(ref_loc) 

383 nb = type(self)(vals, placement=bp, ndim=2, refs=self.refs) 

384 new_blocks.append(nb) 

385 return new_blocks 

386 

387 @final 

388 def split_and_operate(self, func, *args, **kwargs) -> list[Block]: 

389 """ 

390 Split the block and apply func column-by-column. 

391 

392 Parameters 

393 ---------- 

394 func : Block method 

395 *args 

396 **kwargs 

397 

398 Returns 

399 ------- 

400 List[Block] 

401 """ 

402 assert self.ndim == 2 and self.shape[0] != 1 

403 

404 res_blocks = [] 

405 for nb in self._split(): 

406 rbs = func(nb, *args, **kwargs) 

407 res_blocks.extend(rbs) 

408 return res_blocks 

409 

410 # --------------------------------------------------------------------- 

411 # Up/Down-casting 

412 

413 @final 

414 def coerce_to_target_dtype(self, other) -> Block: 

415 """ 

416 coerce the current block to a dtype compat for other 

417 we will return a block, possibly object, and not raise 

418 

419 we can also safely try to coerce to the same dtype 

420 and will receive the same block 

421 """ 

422 new_dtype = find_result_type(self.values, other) 

423 

424 return self.astype(new_dtype, copy=False) 

425 

426 @final 

427 def _maybe_downcast( 

428 self, blocks: list[Block], downcast=None, using_cow: bool = False 

429 ) -> list[Block]: 

430 if downcast is False: 

431 return blocks 

432 

433 if self.dtype == _dtype_obj: 

434 # TODO: does it matter that self.dtype might not match blocks[i].dtype? 

435 # GH#44241 We downcast regardless of the argument; 

436 # respecting 'downcast=None' may be worthwhile at some point, 

437 # but ATM it breaks too much existing code. 

438 # split and convert the blocks 

439 

440 return extend_blocks( 

441 [blk.convert(using_cow=using_cow, copy=not using_cow) for blk in blocks] 

442 ) 

443 

444 if downcast is None: 

445 return blocks 

446 

447 return extend_blocks([b._downcast_2d(downcast, using_cow) for b in blocks]) 

448 

449 @final 

450 @maybe_split 

451 def _downcast_2d(self, dtype, using_cow: bool = False) -> list[Block]: 

452 """ 

453 downcast specialized to 2D case post-validation. 

454 

455 Refactored to allow use of maybe_split. 

456 """ 

457 new_values = maybe_downcast_to_dtype(self.values, dtype=dtype) 

458 refs = self.refs if using_cow and new_values is self.values else None 

459 return [self.make_block(new_values, refs=refs)] 

460 

461 def convert( 

462 self, 

463 *, 

464 copy: bool = True, 

465 using_cow: bool = False, 

466 ) -> list[Block]: 

467 """ 

468 attempt to coerce any object types to better types return a copy 

469 of the block (if copy = True) by definition we are not an ObjectBlock 

470 here! 

471 """ 

472 if not copy and using_cow: 

473 return [self.copy(deep=False)] 

474 return [self.copy()] if copy else [self] 

475 

476 # --------------------------------------------------------------------- 

477 # Array-Like Methods 

478 

479 @cache_readonly 

480 def dtype(self) -> DtypeObj: 

481 return self.values.dtype 

482 

483 @final 

484 def astype( 

485 self, 

486 dtype: DtypeObj, 

487 copy: bool = False, 

488 errors: IgnoreRaise = "raise", 

489 using_cow: bool = False, 

490 ) -> Block: 

491 """ 

492 Coerce to the new dtype. 

493 

494 Parameters 

495 ---------- 

496 dtype : np.dtype or ExtensionDtype 

497 copy : bool, default False 

498 copy if indicated 

499 errors : str, {'raise', 'ignore'}, default 'raise' 

500 - ``raise`` : allow exceptions to be raised 

501 - ``ignore`` : suppress exceptions. On error return original object 

502 using_cow: bool, default False 

503 Signaling if copy on write copy logic is used. 

504 

505 Returns 

506 ------- 

507 Block 

508 """ 

509 values = self.values 

510 

511 new_values = astype_array_safe(values, dtype, copy=copy, errors=errors) 

512 

513 new_values = maybe_coerce_values(new_values) 

514 

515 refs = None 

516 if using_cow and astype_is_view(values.dtype, new_values.dtype): 

517 refs = self.refs 

518 

519 newb = self.make_block(new_values, refs=refs) 

520 if newb.shape != self.shape: 

521 raise TypeError( 

522 f"cannot set astype for copy = [{copy}] for dtype " 

523 f"({self.dtype.name} [{self.shape}]) to different shape " 

524 f"({newb.dtype.name} [{newb.shape}])" 

525 ) 

526 return newb 

527 

528 @final 

529 def to_native_types(self, na_rep: str = "nan", quoting=None, **kwargs) -> Block: 

530 """convert to our native types format""" 

531 result = to_native_types(self.values, na_rep=na_rep, quoting=quoting, **kwargs) 

532 return self.make_block(result) 

533 

534 @final 

535 def copy(self, deep: bool = True) -> Block: 

536 """copy constructor""" 

537 values = self.values 

538 refs: BlockValuesRefs | None 

539 if deep: 

540 values = values.copy() 

541 refs = None 

542 else: 

543 refs = self.refs 

544 return type(self)(values, placement=self._mgr_locs, ndim=self.ndim, refs=refs) 

545 

546 # --------------------------------------------------------------------- 

547 # Replace 

548 

549 @final 

550 def replace( 

551 self, 

552 to_replace, 

553 value, 

554 inplace: bool = False, 

555 # mask may be pre-computed if we're called from replace_list 

556 mask: npt.NDArray[np.bool_] | None = None, 

557 using_cow: bool = False, 

558 ) -> list[Block]: 

559 """ 

560 replace the to_replace value with value, possible to create new 

561 blocks here this is just a call to putmask. 

562 """ 

563 

564 # Note: the checks we do in NDFrame.replace ensure we never get 

565 # here with listlike to_replace or value, as those cases 

566 # go through replace_list 

567 values = self.values 

568 

569 if isinstance(values, Categorical): 

570 # TODO: avoid special-casing 

571 # GH49404 

572 if using_cow and (self.refs.has_reference() or not inplace): 

573 blk = self.copy() 

574 elif using_cow: 

575 blk = self.copy(deep=False) 

576 else: 

577 blk = self if inplace else self.copy() 

578 values = cast(Categorical, blk.values) 

579 values._replace(to_replace=to_replace, value=value, inplace=True) 

580 return [blk] 

581 

582 if not self._can_hold_element(to_replace): 

583 # We cannot hold `to_replace`, so we know immediately that 

584 # replacing it is a no-op. 

585 # Note: If to_replace were a list, NDFrame.replace would call 

586 # replace_list instead of replace. 

587 if using_cow: 

588 return [self.copy(deep=False)] 

589 else: 

590 return [self] if inplace else [self.copy()] 

591 

592 if mask is None: 

593 mask = missing.mask_missing(values, to_replace) 

594 if not mask.any(): 

595 # Note: we get here with test_replace_extension_other incorrectly 

596 # bc _can_hold_element is incorrect. 

597 if using_cow: 

598 return [self.copy(deep=False)] 

599 else: 

600 return [self] if inplace else [self.copy()] 

601 

602 elif self._can_hold_element(value): 

603 # TODO(CoW): Maybe split here as well into columns where mask has True 

604 # and rest? 

605 if using_cow: 

606 if inplace: 

607 blk = self.copy(deep=self.refs.has_reference()) 

608 else: 

609 blk = self.copy() 

610 else: 

611 blk = self if inplace else self.copy() 

612 putmask_inplace(blk.values, mask, value) 

613 if not (self.is_object and value is None): 

614 # if the user *explicitly* gave None, we keep None, otherwise 

615 # may downcast to NaN 

616 blocks = blk.convert(copy=False, using_cow=using_cow) 

617 else: 

618 blocks = [blk] 

619 return blocks 

620 

621 elif self.ndim == 1 or self.shape[0] == 1: 

622 if value is None or value is NA: 

623 blk = self.astype(np.dtype(object)) 

624 else: 

625 blk = self.coerce_to_target_dtype(value) 

626 return blk.replace( 

627 to_replace=to_replace, 

628 value=value, 

629 inplace=True, 

630 mask=mask, 

631 ) 

632 

633 else: 

634 # split so that we only upcast where necessary 

635 blocks = [] 

636 for i, nb in enumerate(self._split()): 

637 blocks.extend( 

638 type(self).replace( 

639 nb, 

640 to_replace=to_replace, 

641 value=value, 

642 inplace=True, 

643 mask=mask[i : i + 1], 

644 using_cow=using_cow, 

645 ) 

646 ) 

647 return blocks 

648 

649 @final 

650 def _replace_regex( 

651 self, 

652 to_replace, 

653 value, 

654 inplace: bool = False, 

655 mask=None, 

656 using_cow: bool = False, 

657 ) -> list[Block]: 

658 """ 

659 Replace elements by the given value. 

660 

661 Parameters 

662 ---------- 

663 to_replace : object or pattern 

664 Scalar to replace or regular expression to match. 

665 value : object 

666 Replacement object. 

667 inplace : bool, default False 

668 Perform inplace modification. 

669 mask : array-like of bool, optional 

670 True indicate corresponding element is ignored. 

671 using_cow: bool, default False 

672 Specifying if copy on write is enabled. 

673 

674 Returns 

675 ------- 

676 List[Block] 

677 """ 

678 if not self._can_hold_element(to_replace): 

679 # i.e. only ObjectBlock, but could in principle include a 

680 # String ExtensionBlock 

681 if using_cow: 

682 return [self.copy(deep=False)] 

683 return [self] if inplace else [self.copy()] 

684 

685 rx = re.compile(to_replace) 

686 

687 if using_cow: 

688 if inplace and not self.refs.has_reference(): 

689 refs = self.refs 

690 new_values = self.values 

691 else: 

692 refs = None 

693 new_values = self.values.copy() 

694 else: 

695 refs = None 

696 new_values = self.values if inplace else self.values.copy() 

697 

698 replace_regex(new_values, rx, value, mask) 

699 

700 block = self.make_block(new_values, refs=refs) 

701 return block.convert(copy=False, using_cow=using_cow) 

702 

703 @final 

704 def replace_list( 

705 self, 

706 src_list: Iterable[Any], 

707 dest_list: Sequence[Any], 

708 inplace: bool = False, 

709 regex: bool = False, 

710 using_cow: bool = False, 

711 ) -> list[Block]: 

712 """ 

713 See BlockManager.replace_list docstring. 

714 """ 

715 values = self.values 

716 

717 if isinstance(values, Categorical): 

718 # TODO: avoid special-casing 

719 # GH49404 

720 if using_cow and inplace: 

721 blk = self.copy(deep=self.refs.has_reference()) 

722 else: 

723 blk = self if inplace else self.copy() 

724 values = cast(Categorical, blk.values) 

725 values._replace(to_replace=src_list, value=dest_list, inplace=True) 

726 return [blk] 

727 

728 # Exclude anything that we know we won't contain 

729 pairs = [ 

730 (x, y) for x, y in zip(src_list, dest_list) if self._can_hold_element(x) 

731 ] 

732 if not len(pairs): 

733 if using_cow: 

734 return [self.copy(deep=False)] 

735 # shortcut, nothing to replace 

736 return [self] if inplace else [self.copy()] 

737 

738 src_len = len(pairs) - 1 

739 

740 if is_string_dtype(values.dtype): 

741 # Calculate the mask once, prior to the call of comp 

742 # in order to avoid repeating the same computations 

743 na_mask = ~isna(values) 

744 masks: Iterable[npt.NDArray[np.bool_]] = ( 

745 extract_bool_array( 

746 cast( 

747 ArrayLike, 

748 compare_or_regex_search( 

749 values, s[0], regex=regex, mask=na_mask 

750 ), 

751 ) 

752 ) 

753 for s in pairs 

754 ) 

755 else: 

756 # GH#38086 faster if we know we dont need to check for regex 

757 masks = (missing.mask_missing(values, s[0]) for s in pairs) 

758 # Materialize if inplace = True, since the masks can change 

759 # as we replace 

760 if inplace: 

761 masks = list(masks) 

762 

763 if using_cow and inplace: 

764 # Don't set up refs here, otherwise we will think that we have 

765 # references when we check again later 

766 rb = [self] 

767 else: 

768 rb = [self if inplace else self.copy()] 

769 

770 for i, ((src, dest), mask) in enumerate(zip(pairs, masks)): 

771 convert = i == src_len # only convert once at the end 

772 new_rb: list[Block] = [] 

773 

774 # GH-39338: _replace_coerce can split a block into 

775 # single-column blocks, so track the index so we know 

776 # where to index into the mask 

777 for blk_num, blk in enumerate(rb): 

778 if len(rb) == 1: 

779 m = mask 

780 else: 

781 mib = mask 

782 assert not isinstance(mib, bool) 

783 m = mib[blk_num : blk_num + 1] 

784 

785 # error: Argument "mask" to "_replace_coerce" of "Block" has 

786 # incompatible type "Union[ExtensionArray, ndarray[Any, Any], bool]"; 

787 # expected "ndarray[Any, dtype[bool_]]" 

788 result = blk._replace_coerce( 

789 to_replace=src, 

790 value=dest, 

791 mask=m, 

792 inplace=inplace, 

793 regex=regex, 

794 using_cow=using_cow, 

795 ) 

796 if convert and blk.is_object and not all(x is None for x in dest_list): 

797 # GH#44498 avoid unwanted cast-back 

798 result = extend_blocks( 

799 [ 

800 b.convert(copy=True and not using_cow, using_cow=using_cow) 

801 for b in result 

802 ] 

803 ) 

804 new_rb.extend(result) 

805 rb = new_rb 

806 return rb 

807 

808 @final 

809 def _replace_coerce( 

810 self, 

811 to_replace, 

812 value, 

813 mask: npt.NDArray[np.bool_], 

814 inplace: bool = True, 

815 regex: bool = False, 

816 using_cow: bool = False, 

817 ) -> list[Block]: 

818 """ 

819 Replace value corresponding to the given boolean array with another 

820 value. 

821 

822 Parameters 

823 ---------- 

824 to_replace : object or pattern 

825 Scalar to replace or regular expression to match. 

826 value : object 

827 Replacement object. 

828 mask : np.ndarray[bool] 

829 True indicate corresponding element is ignored. 

830 inplace : bool, default True 

831 Perform inplace modification. 

832 regex : bool, default False 

833 If true, perform regular expression substitution. 

834 

835 Returns 

836 ------- 

837 List[Block] 

838 """ 

839 if should_use_regex(regex, to_replace): 

840 return self._replace_regex( 

841 to_replace, 

842 value, 

843 inplace=inplace, 

844 mask=mask, 

845 ) 

846 else: 

847 if value is None: 

848 # gh-45601, gh-45836, gh-46634 

849 if mask.any(): 

850 has_ref = self.refs.has_reference() 

851 nb = self.astype(np.dtype(object), copy=False, using_cow=using_cow) 

852 if (nb is self or using_cow) and not inplace: 

853 nb = nb.copy() 

854 elif inplace and has_ref and nb.refs.has_reference(): 

855 # no copy in astype and we had refs before 

856 nb = nb.copy() 

857 putmask_inplace(nb.values, mask, value) 

858 return [nb] 

859 if using_cow: 

860 return [self.copy(deep=False)] 

861 return [self] if inplace else [self.copy()] 

862 return self.replace( 

863 to_replace=to_replace, 

864 value=value, 

865 inplace=inplace, 

866 mask=mask, 

867 using_cow=using_cow, 

868 ) 

869 

870 # --------------------------------------------------------------------- 

871 # 2D Methods - Shared by NumpyBlock and NDArrayBackedExtensionBlock 

872 # but not ExtensionBlock 

873 

874 def _maybe_squeeze_arg(self, arg: np.ndarray) -> np.ndarray: 

875 """ 

876 For compatibility with 1D-only ExtensionArrays. 

877 """ 

878 return arg 

879 

880 def _unwrap_setitem_indexer(self, indexer): 

881 """ 

882 For compatibility with 1D-only ExtensionArrays. 

883 """ 

884 return indexer 

885 

886 # NB: this cannot be made cache_readonly because in mgr.set_values we pin 

887 # new .values that can have different shape GH#42631 

888 @property 

889 def shape(self) -> Shape: 

890 return self.values.shape 

891 

892 def iget(self, i: int | tuple[int, int] | tuple[slice, int]) -> np.ndarray: 

893 # In the case where we have a tuple[slice, int], the slice will always 

894 # be slice(None) 

895 # Note: only reached with self.ndim == 2 

896 # Invalid index type "Union[int, Tuple[int, int], Tuple[slice, int]]" 

897 # for "Union[ndarray[Any, Any], ExtensionArray]"; expected type 

898 # "Union[int, integer[Any]]" 

899 return self.values[i] # type: ignore[index] 

900 

901 def _slice( 

902 self, slicer: slice | npt.NDArray[np.bool_] | npt.NDArray[np.intp] 

903 ) -> ArrayLike: 

904 """return a slice of my values""" 

905 

906 return self.values[slicer] 

907 

908 def set_inplace(self, locs, values: ArrayLike, copy: bool = False) -> None: 

909 """ 

910 Modify block values in-place with new item value. 

911 

912 If copy=True, first copy the underlying values in place before modifying 

913 (for Copy-on-Write). 

914 

915 Notes 

916 ----- 

917 `set_inplace` never creates a new array or new Block, whereas `setitem` 

918 _may_ create a new array and always creates a new Block. 

919 

920 Caller is responsible for checking values.dtype == self.dtype. 

921 """ 

922 if copy: 

923 self.values = self.values.copy() 

924 self.values[locs] = values 

925 

926 def take_nd( 

927 self, 

928 indexer: npt.NDArray[np.intp], 

929 axis: AxisInt, 

930 new_mgr_locs: BlockPlacement | None = None, 

931 fill_value=lib.no_default, 

932 ) -> Block: 

933 """ 

934 Take values according to indexer and return them as a block. 

935 """ 

936 values = self.values 

937 

938 if fill_value is lib.no_default: 

939 fill_value = self.fill_value 

940 allow_fill = False 

941 else: 

942 allow_fill = True 

943 

944 # Note: algos.take_nd has upcast logic similar to coerce_to_target_dtype 

945 new_values = algos.take_nd( 

946 values, indexer, axis=axis, allow_fill=allow_fill, fill_value=fill_value 

947 ) 

948 

949 # Called from three places in managers, all of which satisfy 

950 # these assertions 

951 if isinstance(self, ExtensionBlock): 

952 # NB: in this case, the 'axis' kwarg will be ignored in the 

953 # algos.take_nd call above. 

954 assert not (self.ndim == 1 and new_mgr_locs is None) 

955 assert not (axis == 0 and new_mgr_locs is None) 

956 

957 if new_mgr_locs is None: 

958 new_mgr_locs = self._mgr_locs 

959 

960 if not is_dtype_equal(new_values.dtype, self.dtype): 

961 return self.make_block(new_values, new_mgr_locs) 

962 else: 

963 return self.make_block_same_class(new_values, new_mgr_locs) 

964 

965 def _unstack( 

966 self, 

967 unstacker, 

968 fill_value, 

969 new_placement: npt.NDArray[np.intp], 

970 needs_masking: npt.NDArray[np.bool_], 

971 ): 

972 """ 

973 Return a list of unstacked blocks of self 

974 

975 Parameters 

976 ---------- 

977 unstacker : reshape._Unstacker 

978 fill_value : int 

979 Only used in ExtensionBlock._unstack 

980 new_placement : np.ndarray[np.intp] 

981 allow_fill : bool 

982 needs_masking : np.ndarray[bool] 

983 

984 Returns 

985 ------- 

986 blocks : list of Block 

987 New blocks of unstacked values. 

988 mask : array-like of bool 

989 The mask of columns of `blocks` we should keep. 

990 """ 

991 new_values, mask = unstacker.get_new_values( 

992 self.values.T, fill_value=fill_value 

993 ) 

994 

995 mask = mask.any(0) 

996 # TODO: in all tests we have mask.all(); can we rely on that? 

997 

998 # Note: these next two lines ensure that 

999 # mask.sum() == sum(len(nb.mgr_locs) for nb in blocks) 

1000 # which the calling function needs in order to pass verify_integrity=False 

1001 # to the BlockManager constructor 

1002 new_values = new_values.T[mask] 

1003 new_placement = new_placement[mask] 

1004 

1005 bp = BlockPlacement(new_placement) 

1006 blocks = [new_block_2d(new_values, placement=bp)] 

1007 return blocks, mask 

1008 

1009 # --------------------------------------------------------------------- 

1010 

1011 def setitem(self, indexer, value, using_cow: bool = False) -> Block: 

1012 """ 

1013 Attempt self.values[indexer] = value, possibly creating a new array. 

1014 

1015 Parameters 

1016 ---------- 

1017 indexer : tuple, list-like, array-like, slice, int 

1018 The subset of self.values to set 

1019 value : object 

1020 The value being set 

1021 using_cow: bool, default False 

1022 Signaling if CoW is used. 

1023 

1024 Returns 

1025 ------- 

1026 Block 

1027 

1028 Notes 

1029 ----- 

1030 `indexer` is a direct slice/positional indexer. `value` must 

1031 be a compatible shape. 

1032 """ 

1033 

1034 value = self._standardize_fill_value(value) 

1035 

1036 values = cast(np.ndarray, self.values) 

1037 if self.ndim == 2: 

1038 values = values.T 

1039 

1040 # length checking 

1041 check_setitem_lengths(indexer, value, values) 

1042 

1043 value = extract_array(value, extract_numpy=True) 

1044 try: 

1045 casted = np_can_hold_element(values.dtype, value) 

1046 except LossySetitemError: 

1047 # current dtype cannot store value, coerce to common dtype 

1048 nb = self.coerce_to_target_dtype(value) 

1049 return nb.setitem(indexer, value) 

1050 else: 

1051 if self.dtype == _dtype_obj: 

1052 # TODO: avoid having to construct values[indexer] 

1053 vi = values[indexer] 

1054 if lib.is_list_like(vi): 

1055 # checking lib.is_scalar here fails on 

1056 # test_iloc_setitem_custom_object 

1057 casted = setitem_datetimelike_compat(values, len(vi), casted) 

1058 

1059 if using_cow and self.refs.has_reference(): 

1060 values = values.copy() 

1061 self = self.make_block_same_class( 

1062 values.T if values.ndim == 2 else values 

1063 ) 

1064 if isinstance(casted, np.ndarray) and casted.ndim == 1 and len(casted) == 1: 

1065 # NumPy 1.25 deprecation: https://github.com/numpy/numpy/pull/10615 

1066 casted = casted[0, ...] 

1067 values[indexer] = casted 

1068 return self 

1069 

1070 def putmask(self, mask, new, using_cow: bool = False) -> list[Block]: 

1071 """ 

1072 putmask the data to the block; it is possible that we may create a 

1073 new dtype of block 

1074 

1075 Return the resulting block(s). 

1076 

1077 Parameters 

1078 ---------- 

1079 mask : np.ndarray[bool], SparseArray[bool], or BooleanArray 

1080 new : a ndarray/object 

1081 using_cow: bool, default False 

1082 

1083 Returns 

1084 ------- 

1085 List[Block] 

1086 """ 

1087 orig_mask = mask 

1088 values = cast(np.ndarray, self.values) 

1089 mask, noop = validate_putmask(values.T, mask) 

1090 assert not isinstance(new, (ABCIndex, ABCSeries, ABCDataFrame)) 

1091 

1092 if new is lib.no_default: 

1093 new = self.fill_value 

1094 

1095 new = self._standardize_fill_value(new) 

1096 new = extract_array(new, extract_numpy=True) 

1097 

1098 if noop: 

1099 if using_cow: 

1100 return [self.copy(deep=False)] 

1101 return [self] 

1102 

1103 try: 

1104 casted = np_can_hold_element(values.dtype, new) 

1105 

1106 if using_cow and self.refs.has_reference(): 

1107 # Do this here to avoid copying twice 

1108 values = values.copy() 

1109 self = self.make_block_same_class(values) 

1110 

1111 putmask_without_repeat(values.T, mask, casted) 

1112 if using_cow: 

1113 return [self.copy(deep=False)] 

1114 return [self] 

1115 except LossySetitemError: 

1116 if self.ndim == 1 or self.shape[0] == 1: 

1117 # no need to split columns 

1118 

1119 if not is_list_like(new): 

1120 # using just new[indexer] can't save us the need to cast 

1121 return self.coerce_to_target_dtype(new).putmask(mask, new) 

1122 else: 

1123 indexer = mask.nonzero()[0] 

1124 nb = self.setitem(indexer, new[indexer], using_cow=using_cow) 

1125 return [nb] 

1126 

1127 else: 

1128 is_array = isinstance(new, np.ndarray) 

1129 

1130 res_blocks = [] 

1131 nbs = self._split() 

1132 for i, nb in enumerate(nbs): 

1133 n = new 

1134 if is_array: 

1135 # we have a different value per-column 

1136 n = new[:, i : i + 1] 

1137 

1138 submask = orig_mask[:, i : i + 1] 

1139 rbs = nb.putmask(submask, n, using_cow=using_cow) 

1140 res_blocks.extend(rbs) 

1141 return res_blocks 

1142 

1143 def where( 

1144 self, other, cond, _downcast: str | bool = "infer", using_cow: bool = False 

1145 ) -> list[Block]: 

1146 """ 

1147 evaluate the block; return result block(s) from the result 

1148 

1149 Parameters 

1150 ---------- 

1151 other : a ndarray/object 

1152 cond : np.ndarray[bool], SparseArray[bool], or BooleanArray 

1153 _downcast : str or None, default "infer" 

1154 Private because we only specify it when calling from fillna. 

1155 

1156 Returns 

1157 ------- 

1158 List[Block] 

1159 """ 

1160 assert cond.ndim == self.ndim 

1161 assert not isinstance(other, (ABCIndex, ABCSeries, ABCDataFrame)) 

1162 

1163 transpose = self.ndim == 2 

1164 

1165 cond = extract_bool_array(cond) 

1166 

1167 # EABlocks override where 

1168 values = cast(np.ndarray, self.values) 

1169 orig_other = other 

1170 if transpose: 

1171 values = values.T 

1172 

1173 icond, noop = validate_putmask(values, ~cond) 

1174 if noop: 

1175 # GH-39595: Always return a copy; short-circuit up/downcasting 

1176 if using_cow: 

1177 return [self.copy(deep=False)] 

1178 return [self.copy()] 

1179 

1180 if other is lib.no_default: 

1181 other = self.fill_value 

1182 

1183 other = self._standardize_fill_value(other) 

1184 

1185 try: 

1186 # try/except here is equivalent to a self._can_hold_element check, 

1187 # but this gets us back 'casted' which we will re-use below; 

1188 # without using 'casted', expressions.where may do unwanted upcasts. 

1189 casted = np_can_hold_element(values.dtype, other) 

1190 except (ValueError, TypeError, LossySetitemError): 

1191 # we cannot coerce, return a compat dtype 

1192 

1193 if self.ndim == 1 or self.shape[0] == 1: 

1194 # no need to split columns 

1195 

1196 block = self.coerce_to_target_dtype(other) 

1197 blocks = block.where(orig_other, cond, using_cow=using_cow) 

1198 return self._maybe_downcast( 

1199 blocks, downcast=_downcast, using_cow=using_cow 

1200 ) 

1201 

1202 else: 

1203 # since _maybe_downcast would split blocks anyway, we 

1204 # can avoid some potential upcast/downcast by splitting 

1205 # on the front end. 

1206 is_array = isinstance(other, (np.ndarray, ExtensionArray)) 

1207 

1208 res_blocks = [] 

1209 nbs = self._split() 

1210 for i, nb in enumerate(nbs): 

1211 oth = other 

1212 if is_array: 

1213 # we have a different value per-column 

1214 oth = other[:, i : i + 1] 

1215 

1216 submask = cond[:, i : i + 1] 

1217 rbs = nb.where( 

1218 oth, submask, _downcast=_downcast, using_cow=using_cow 

1219 ) 

1220 res_blocks.extend(rbs) 

1221 return res_blocks 

1222 

1223 else: 

1224 other = casted 

1225 alt = setitem_datetimelike_compat(values, icond.sum(), other) 

1226 if alt is not other: 

1227 if is_list_like(other) and len(other) < len(values): 

1228 # call np.where with other to get the appropriate ValueError 

1229 np.where(~icond, values, other) 

1230 raise NotImplementedError( 

1231 "This should not be reached; call to np.where above is " 

1232 "expected to raise ValueError. Please report a bug at " 

1233 "github.com/pandas-dev/pandas" 

1234 ) 

1235 result = values.copy() 

1236 np.putmask(result, icond, alt) 

1237 else: 

1238 # By the time we get here, we should have all Series/Index 

1239 # args extracted to ndarray 

1240 if ( 

1241 is_list_like(other) 

1242 and not isinstance(other, np.ndarray) 

1243 and len(other) == self.shape[-1] 

1244 ): 

1245 # If we don't do this broadcasting here, then expressions.where 

1246 # will broadcast a 1D other to be row-like instead of 

1247 # column-like. 

1248 other = np.array(other).reshape(values.shape) 

1249 # If lengths don't match (or len(other)==1), we will raise 

1250 # inside expressions.where, see test_series_where 

1251 

1252 # Note: expressions.where may upcast. 

1253 result = expressions.where(~icond, values, other) 

1254 # The np_can_hold_element check _should_ ensure that we always 

1255 # have result.dtype == self.dtype here. 

1256 

1257 if transpose: 

1258 result = result.T 

1259 

1260 return [self.make_block(result)] 

1261 

1262 def fillna( 

1263 self, 

1264 value, 

1265 limit: int | None = None, 

1266 inplace: bool = False, 

1267 downcast=None, 

1268 using_cow: bool = False, 

1269 ) -> list[Block]: 

1270 """ 

1271 fillna on the block with the value. If we fail, then convert to 

1272 ObjectBlock and try again 

1273 """ 

1274 # Caller is responsible for validating limit; if int it is strictly positive 

1275 inplace = validate_bool_kwarg(inplace, "inplace") 

1276 

1277 if not self._can_hold_na: 

1278 # can short-circuit the isna call 

1279 noop = True 

1280 else: 

1281 mask = isna(self.values) 

1282 mask, noop = validate_putmask(self.values, mask) 

1283 

1284 if noop: 

1285 # we can't process the value, but nothing to do 

1286 if inplace: 

1287 if using_cow: 

1288 return [self.copy(deep=False)] 

1289 # Arbitrarily imposing the convention that we ignore downcast 

1290 # on no-op when inplace=True 

1291 return [self] 

1292 else: 

1293 # GH#45423 consistent downcasting on no-ops. 

1294 nb = self.copy(deep=not using_cow) 

1295 nbs = nb._maybe_downcast([nb], downcast=downcast, using_cow=using_cow) 

1296 return nbs 

1297 

1298 if limit is not None: 

1299 mask[mask.cumsum(self.ndim - 1) > limit] = False 

1300 

1301 if inplace: 

1302 nbs = self.putmask(mask.T, value, using_cow=using_cow) 

1303 else: 

1304 # without _downcast, we would break 

1305 # test_fillna_dtype_conversion_equiv_replace 

1306 nbs = self.where(value, ~mask.T, _downcast=False) 

1307 

1308 # Note: blk._maybe_downcast vs self._maybe_downcast(nbs) 

1309 # makes a difference bc blk may have object dtype, which has 

1310 # different behavior in _maybe_downcast. 

1311 return extend_blocks( 

1312 [ 

1313 blk._maybe_downcast([blk], downcast=downcast, using_cow=using_cow) 

1314 for blk in nbs 

1315 ] 

1316 ) 

1317 

1318 def interpolate( 

1319 self, 

1320 *, 

1321 method: FillnaOptions = "pad", 

1322 axis: AxisInt = 0, 

1323 index: Index | None = None, 

1324 inplace: bool = False, 

1325 limit: int | None = None, 

1326 limit_direction: str = "forward", 

1327 limit_area: str | None = None, 

1328 fill_value: Any | None = None, 

1329 downcast: str | None = None, 

1330 using_cow: bool = False, 

1331 **kwargs, 

1332 ) -> list[Block]: 

1333 inplace = validate_bool_kwarg(inplace, "inplace") 

1334 

1335 if not self._can_hold_na: 

1336 # If there are no NAs, then interpolate is a no-op 

1337 if using_cow: 

1338 return [self.copy(deep=False)] 

1339 return [self] if inplace else [self.copy()] 

1340 

1341 try: 

1342 m = missing.clean_fill_method(method) 

1343 except ValueError: 

1344 m = None 

1345 if m is None and self.dtype.kind != "f": 

1346 # only deal with floats 

1347 # bc we already checked that can_hold_na, we don't have int dtype here 

1348 # test_interp_basic checks that we make a copy here 

1349 if using_cow: 

1350 return [self.copy(deep=False)] 

1351 return [self] if inplace else [self.copy()] 

1352 

1353 if self.is_object and self.ndim == 2 and self.shape[0] != 1 and axis == 0: 

1354 # split improves performance in ndarray.copy() 

1355 return self.split_and_operate( 

1356 type(self).interpolate, 

1357 method=method, 

1358 axis=axis, 

1359 index=index, 

1360 inplace=inplace, 

1361 limit=limit, 

1362 limit_direction=limit_direction, 

1363 limit_area=limit_area, 

1364 fill_value=fill_value, 

1365 downcast=downcast, 

1366 **kwargs, 

1367 ) 

1368 

1369 refs = None 

1370 if inplace: 

1371 if using_cow and self.refs.has_reference(): 

1372 data = self.values.copy() 

1373 else: 

1374 data = self.values 

1375 refs = self.refs 

1376 else: 

1377 data = self.values.copy() 

1378 data = cast(np.ndarray, data) # bc overridden by ExtensionBlock 

1379 

1380 missing.interpolate_array_2d( 

1381 data, 

1382 method=method, 

1383 axis=axis, 

1384 index=index, 

1385 limit=limit, 

1386 limit_direction=limit_direction, 

1387 limit_area=limit_area, 

1388 fill_value=fill_value, 

1389 **kwargs, 

1390 ) 

1391 

1392 nb = self.make_block_same_class(data, refs=refs) 

1393 return nb._maybe_downcast([nb], downcast, using_cow) 

1394 

1395 def diff(self, n: int, axis: AxisInt = 1) -> list[Block]: 

1396 """return block for the diff of the values""" 

1397 # only reached with ndim == 2 and axis == 1 

1398 new_values = algos.diff(self.values, n, axis=axis) 

1399 return [self.make_block(values=new_values)] 

1400 

1401 def shift( 

1402 self, periods: int, axis: AxisInt = 0, fill_value: Any = None 

1403 ) -> list[Block]: 

1404 """shift the block by periods, possibly upcast""" 

1405 # convert integer to float if necessary. need to do a lot more than 

1406 # that, handle boolean etc also 

1407 

1408 # Note: periods is never 0 here, as that is handled at the top of 

1409 # NDFrame.shift. If that ever changes, we can do a check for periods=0 

1410 # and possibly avoid coercing. 

1411 

1412 if not lib.is_scalar(fill_value) and self.dtype != _dtype_obj: 

1413 # with object dtype there is nothing to promote, and the user can 

1414 # pass pretty much any weird fill_value they like 

1415 # see test_shift_object_non_scalar_fill 

1416 raise ValueError("fill_value must be a scalar") 

1417 

1418 fill_value = self._standardize_fill_value(fill_value) 

1419 

1420 try: 

1421 # error: Argument 1 to "np_can_hold_element" has incompatible type 

1422 # "Union[dtype[Any], ExtensionDtype]"; expected "dtype[Any]" 

1423 casted = np_can_hold_element( 

1424 self.dtype, fill_value # type: ignore[arg-type] 

1425 ) 

1426 except LossySetitemError: 

1427 nb = self.coerce_to_target_dtype(fill_value) 

1428 return nb.shift(periods, axis=axis, fill_value=fill_value) 

1429 

1430 else: 

1431 values = cast(np.ndarray, self.values) 

1432 new_values = shift(values, periods, axis, casted) 

1433 return [self.make_block(new_values)] 

1434 

1435 @final 

1436 def quantile( 

1437 self, 

1438 qs: Index, # with dtype float64 

1439 interpolation: QuantileInterpolation = "linear", 

1440 axis: AxisInt = 0, 

1441 ) -> Block: 

1442 """ 

1443 compute the quantiles of the 

1444 

1445 Parameters 

1446 ---------- 

1447 qs : Index 

1448 The quantiles to be computed in float64. 

1449 interpolation : str, default 'linear' 

1450 Type of interpolation. 

1451 axis : int, default 0 

1452 Axis to compute. 

1453 

1454 Returns 

1455 ------- 

1456 Block 

1457 """ 

1458 # We should always have ndim == 2 because Series dispatches to DataFrame 

1459 assert self.ndim == 2 

1460 assert axis == 1 # only ever called this way 

1461 assert is_list_like(qs) # caller is responsible for this 

1462 

1463 result = quantile_compat(self.values, np.asarray(qs._values), interpolation) 

1464 # ensure_block_shape needed for cases where we start with EA and result 

1465 # is ndarray, e.g. IntegerArray, SparseArray 

1466 result = ensure_block_shape(result, ndim=2) 

1467 return new_block_2d(result, placement=self._mgr_locs) 

1468 

1469 def round(self, decimals: int, using_cow: bool = False) -> Block: 

1470 """ 

1471 Rounds the values. 

1472 If the block is not of an integer or float dtype, nothing happens. 

1473 This is consistent with DataFrame.round behavivor. 

1474 (Note: Series.round would raise) 

1475 

1476 Parameters 

1477 ---------- 

1478 decimals: int, 

1479 Number of decimal places to round to. 

1480 Caller is responsible for validating this 

1481 using_cow: bool, 

1482 Whether Copy on Write is enabled right now 

1483 """ 

1484 if not self.is_numeric or self.is_bool: 

1485 return self.copy(deep=not using_cow) 

1486 refs = None 

1487 # TODO: round only defined on BaseMaskedArray 

1488 # Series also does this, so would need to fix both places 

1489 # error: Item "ExtensionArray" of "Union[ndarray[Any, Any], ExtensionArray]" 

1490 # has no attribute "round" 

1491 values = self.values.round(decimals) # type: ignore[union-attr] 

1492 if values is self.values: 

1493 refs = self.refs 

1494 if not using_cow: 

1495 # Normally would need to do this before, but 

1496 # numpy only returns same array when round operation 

1497 # is no-op 

1498 # https://github.com/numpy/numpy/blob/486878b37fc7439a3b2b87747f50db9b62fea8eb/numpy/core/src/multiarray/calculation.c#L625-L636 

1499 values = values.copy() 

1500 return self.make_block_same_class(values, refs=refs) 

1501 

1502 # --------------------------------------------------------------------- 

1503 # Abstract Methods Overridden By EABackedBlock and NumpyBlock 

1504 

1505 def delete(self, loc) -> list[Block]: 

1506 """Deletes the locs from the block. 

1507 

1508 We split the block to avoid copying the underlying data. We create new 

1509 blocks for every connected segment of the initial block that is not deleted. 

1510 The new blocks point to the initial array. 

1511 """ 

1512 if not is_list_like(loc): 

1513 loc = [loc] 

1514 

1515 if self.ndim == 1: 

1516 values = cast(np.ndarray, self.values) 

1517 values = np.delete(values, loc) 

1518 mgr_locs = self._mgr_locs.delete(loc) 

1519 return [type(self)(values, placement=mgr_locs, ndim=self.ndim)] 

1520 

1521 if np.max(loc) >= self.values.shape[0]: 

1522 raise IndexError 

1523 

1524 # Add one out-of-bounds indexer as maximum to collect 

1525 # all columns after our last indexer if any 

1526 loc = np.concatenate([loc, [self.values.shape[0]]]) 

1527 mgr_locs_arr = self._mgr_locs.as_array 

1528 new_blocks: list[Block] = [] 

1529 

1530 previous_loc = -1 

1531 # TODO(CoW): This is tricky, if parent block goes out of scope 

1532 # all split blocks are referencing each other even though they 

1533 # don't share data 

1534 refs = self.refs if self.refs.has_reference() else None 

1535 for idx in loc: 

1536 if idx == previous_loc + 1: 

1537 # There is no column between current and last idx 

1538 pass 

1539 else: 

1540 # No overload variant of "__getitem__" of "ExtensionArray" matches 

1541 # argument type "Tuple[slice, slice]" 

1542 values = self.values[previous_loc + 1 : idx, :] # type: ignore[call-overload] # noqa 

1543 locs = mgr_locs_arr[previous_loc + 1 : idx] 

1544 nb = type(self)( 

1545 values, placement=BlockPlacement(locs), ndim=self.ndim, refs=refs 

1546 ) 

1547 new_blocks.append(nb) 

1548 

1549 previous_loc = idx 

1550 

1551 return new_blocks 

1552 

1553 @property 

1554 def is_view(self) -> bool: 

1555 """return a boolean if I am possibly a view""" 

1556 raise AbstractMethodError(self) 

1557 

1558 @property 

1559 def array_values(self) -> ExtensionArray: 

1560 """ 

1561 The array that Series.array returns. Always an ExtensionArray. 

1562 """ 

1563 raise AbstractMethodError(self) 

1564 

1565 def get_values(self, dtype: DtypeObj | None = None) -> np.ndarray: 

1566 """ 

1567 return an internal format, currently just the ndarray 

1568 this is often overridden to handle to_dense like operations 

1569 """ 

1570 raise AbstractMethodError(self) 

1571 

1572 def values_for_json(self) -> np.ndarray: 

1573 raise AbstractMethodError(self) 

1574 

1575 

1576class EABackedBlock(Block): 

1577 """ 

1578 Mixin for Block subclasses backed by ExtensionArray. 

1579 """ 

1580 

1581 values: ExtensionArray 

1582 

1583 def setitem(self, indexer, value, using_cow: bool = False): 

1584 """ 

1585 Attempt self.values[indexer] = value, possibly creating a new array. 

1586 

1587 This differs from Block.setitem by not allowing setitem to change 

1588 the dtype of the Block. 

1589 

1590 Parameters 

1591 ---------- 

1592 indexer : tuple, list-like, array-like, slice, int 

1593 The subset of self.values to set 

1594 value : object 

1595 The value being set 

1596 using_cow: bool, default False 

1597 Signaling if CoW is used. 

1598 

1599 Returns 

1600 ------- 

1601 Block 

1602 

1603 Notes 

1604 ----- 

1605 `indexer` is a direct slice/positional indexer. `value` must 

1606 be a compatible shape. 

1607 """ 

1608 orig_indexer = indexer 

1609 orig_value = value 

1610 

1611 indexer = self._unwrap_setitem_indexer(indexer) 

1612 value = self._maybe_squeeze_arg(value) 

1613 

1614 values = self.values 

1615 if values.ndim == 2: 

1616 # TODO(GH#45419): string[pyarrow] tests break if we transpose 

1617 # unconditionally 

1618 values = values.T 

1619 check_setitem_lengths(indexer, value, values) 

1620 

1621 try: 

1622 values[indexer] = value 

1623 except (ValueError, TypeError) as err: 

1624 _catch_deprecated_value_error(err) 

1625 

1626 if is_interval_dtype(self.dtype): 

1627 # see TestSetitemFloatIntervalWithIntIntervalValues 

1628 nb = self.coerce_to_target_dtype(orig_value) 

1629 return nb.setitem(orig_indexer, orig_value) 

1630 

1631 elif isinstance(self, NDArrayBackedExtensionBlock): 

1632 nb = self.coerce_to_target_dtype(orig_value) 

1633 return nb.setitem(orig_indexer, orig_value) 

1634 

1635 else: 

1636 raise 

1637 

1638 else: 

1639 return self 

1640 

1641 def where( 

1642 self, other, cond, _downcast: str | bool = "infer", using_cow: bool = False 

1643 ) -> list[Block]: 

1644 # _downcast private bc we only specify it when calling from fillna 

1645 arr = self.values.T 

1646 

1647 cond = extract_bool_array(cond) 

1648 

1649 orig_other = other 

1650 orig_cond = cond 

1651 other = self._maybe_squeeze_arg(other) 

1652 cond = self._maybe_squeeze_arg(cond) 

1653 

1654 if other is lib.no_default: 

1655 other = self.fill_value 

1656 

1657 icond, noop = validate_putmask(arr, ~cond) 

1658 if noop: 

1659 # GH#44181, GH#45135 

1660 # Avoid a) raising for Interval/PeriodDtype and b) unnecessary object upcast 

1661 if using_cow: 

1662 return [self.copy(deep=False)] 

1663 return [self.copy()] 

1664 

1665 try: 

1666 res_values = arr._where(cond, other).T 

1667 except (ValueError, TypeError) as err: 

1668 _catch_deprecated_value_error(err) 

1669 

1670 if self.ndim == 1 or self.shape[0] == 1: 

1671 if is_interval_dtype(self.dtype): 

1672 # TestSetitemFloatIntervalWithIntIntervalValues 

1673 blk = self.coerce_to_target_dtype(orig_other) 

1674 nbs = blk.where(orig_other, orig_cond, using_cow=using_cow) 

1675 return self._maybe_downcast( 

1676 nbs, downcast=_downcast, using_cow=using_cow 

1677 ) 

1678 

1679 elif isinstance(self, NDArrayBackedExtensionBlock): 

1680 # NB: not (yet) the same as 

1681 # isinstance(values, NDArrayBackedExtensionArray) 

1682 blk = self.coerce_to_target_dtype(orig_other) 

1683 nbs = blk.where(orig_other, orig_cond, using_cow=using_cow) 

1684 return self._maybe_downcast( 

1685 nbs, downcast=_downcast, using_cow=using_cow 

1686 ) 

1687 

1688 else: 

1689 raise 

1690 

1691 else: 

1692 # Same pattern we use in Block.putmask 

1693 is_array = isinstance(orig_other, (np.ndarray, ExtensionArray)) 

1694 

1695 res_blocks = [] 

1696 nbs = self._split() 

1697 for i, nb in enumerate(nbs): 

1698 n = orig_other 

1699 if is_array: 

1700 # we have a different value per-column 

1701 n = orig_other[:, i : i + 1] 

1702 

1703 submask = orig_cond[:, i : i + 1] 

1704 rbs = nb.where(n, submask, using_cow=using_cow) 

1705 res_blocks.extend(rbs) 

1706 return res_blocks 

1707 

1708 nb = self.make_block_same_class(res_values) 

1709 return [nb] 

1710 

1711 def putmask(self, mask, new, using_cow: bool = False) -> list[Block]: 

1712 """ 

1713 See Block.putmask.__doc__ 

1714 """ 

1715 mask = extract_bool_array(mask) 

1716 if new is lib.no_default: 

1717 new = self.fill_value 

1718 

1719 values = self.values 

1720 if values.ndim == 2: 

1721 values = values.T 

1722 

1723 orig_new = new 

1724 orig_mask = mask 

1725 new = self._maybe_squeeze_arg(new) 

1726 mask = self._maybe_squeeze_arg(mask) 

1727 

1728 if not mask.any(): 

1729 if using_cow: 

1730 return [self.copy(deep=False)] 

1731 return [self] 

1732 

1733 if using_cow and self.refs.has_reference(): 

1734 values = values.copy() 

1735 self = self.make_block_same_class( # type: ignore[assignment] 

1736 values.T if values.ndim == 2 else values 

1737 ) 

1738 

1739 try: 

1740 # Caller is responsible for ensuring matching lengths 

1741 values._putmask(mask, new) 

1742 except (TypeError, ValueError) as err: 

1743 _catch_deprecated_value_error(err) 

1744 

1745 if self.ndim == 1 or self.shape[0] == 1: 

1746 if is_interval_dtype(self.dtype): 

1747 # Discussion about what we want to support in the general 

1748 # case GH#39584 

1749 blk = self.coerce_to_target_dtype(orig_new) 

1750 return blk.putmask(orig_mask, orig_new) 

1751 

1752 elif isinstance(self, NDArrayBackedExtensionBlock): 

1753 # NB: not (yet) the same as 

1754 # isinstance(values, NDArrayBackedExtensionArray) 

1755 blk = self.coerce_to_target_dtype(orig_new) 

1756 return blk.putmask(orig_mask, orig_new) 

1757 

1758 else: 

1759 raise 

1760 

1761 else: 

1762 # Same pattern we use in Block.putmask 

1763 is_array = isinstance(orig_new, (np.ndarray, ExtensionArray)) 

1764 

1765 res_blocks = [] 

1766 nbs = self._split() 

1767 for i, nb in enumerate(nbs): 

1768 n = orig_new 

1769 if is_array: 

1770 # we have a different value per-column 

1771 n = orig_new[:, i : i + 1] 

1772 

1773 submask = orig_mask[:, i : i + 1] 

1774 rbs = nb.putmask(submask, n) 

1775 res_blocks.extend(rbs) 

1776 return res_blocks 

1777 

1778 return [self] 

1779 

1780 def delete(self, loc) -> list[Block]: 

1781 # This will be unnecessary if/when __array_function__ is implemented 

1782 if self.ndim == 1: 

1783 values = self.values.delete(loc) 

1784 mgr_locs = self._mgr_locs.delete(loc) 

1785 return [type(self)(values, placement=mgr_locs, ndim=self.ndim)] 

1786 elif self.values.ndim == 1: 

1787 # We get here through to_stata 

1788 return [] 

1789 return super().delete(loc) 

1790 

1791 @cache_readonly 

1792 def array_values(self) -> ExtensionArray: 

1793 return self.values 

1794 

1795 def get_values(self, dtype: DtypeObj | None = None) -> np.ndarray: 

1796 """ 

1797 return object dtype as boxed values, such as Timestamps/Timedelta 

1798 """ 

1799 values: ArrayLike = self.values 

1800 if dtype == _dtype_obj: 

1801 values = values.astype(object) 

1802 # TODO(EA2D): reshape not needed with 2D EAs 

1803 return np.asarray(values).reshape(self.shape) 

1804 

1805 def values_for_json(self) -> np.ndarray: 

1806 return np.asarray(self.values) 

1807 

1808 def interpolate( 

1809 self, 

1810 *, 

1811 method: FillnaOptions = "pad", 

1812 axis: int = 0, 

1813 inplace: bool = False, 

1814 limit: int | None = None, 

1815 fill_value=None, 

1816 using_cow: bool = False, 

1817 **kwargs, 

1818 ): 

1819 values = self.values 

1820 if values.ndim == 2 and axis == 0: 

1821 # NDArrayBackedExtensionArray.fillna assumes axis=1 

1822 new_values = values.T.fillna(value=fill_value, method=method, limit=limit).T 

1823 else: 

1824 new_values = values.fillna(value=fill_value, method=method, limit=limit) 

1825 return self.make_block_same_class(new_values) 

1826 

1827 

1828class ExtensionBlock(libinternals.Block, EABackedBlock): 

1829 """ 

1830 Block for holding extension types. 

1831 

1832 Notes 

1833 ----- 

1834 This holds all 3rd-party extension array types. It's also the immediate 

1835 parent class for our internal extension types' blocks. 

1836 

1837 ExtensionArrays are limited to 1-D. 

1838 """ 

1839 

1840 _can_consolidate = False 

1841 _validate_ndim = False 

1842 is_extension = True 

1843 

1844 values: ExtensionArray 

1845 

1846 def fillna( 

1847 self, 

1848 value, 

1849 limit: int | None = None, 

1850 inplace: bool = False, 

1851 downcast=None, 

1852 using_cow: bool = False, 

1853 ) -> list[Block]: 

1854 if is_interval_dtype(self.dtype): 

1855 # Block.fillna handles coercion (test_fillna_interval) 

1856 return super().fillna( 

1857 value=value, 

1858 limit=limit, 

1859 inplace=inplace, 

1860 downcast=downcast, 

1861 using_cow=using_cow, 

1862 ) 

1863 if using_cow and self._can_hold_na and not self.values._hasna: 

1864 refs = self.refs 

1865 new_values = self.values 

1866 else: 

1867 refs = None 

1868 new_values = self.values.fillna(value=value, method=None, limit=limit) 

1869 nb = self.make_block_same_class(new_values, refs=refs) 

1870 return nb._maybe_downcast([nb], downcast, using_cow=using_cow) 

1871 

1872 @cache_readonly 

1873 def shape(self) -> Shape: 

1874 # TODO(EA2D): override unnecessary with 2D EAs 

1875 if self.ndim == 1: 

1876 return (len(self.values),) 

1877 return len(self._mgr_locs), len(self.values) 

1878 

1879 def iget(self, i: int | tuple[int, int] | tuple[slice, int]): 

1880 # In the case where we have a tuple[slice, int], the slice will always 

1881 # be slice(None) 

1882 # We _could_ make the annotation more specific, but mypy would 

1883 # complain about override mismatch: 

1884 # Literal[0] | tuple[Literal[0], int] | tuple[slice, int] 

1885 

1886 # Note: only reached with self.ndim == 2 

1887 

1888 if isinstance(i, tuple): 

1889 # TODO(EA2D): unnecessary with 2D EAs 

1890 col, loc = i 

1891 if not com.is_null_slice(col) and col != 0: 

1892 raise IndexError(f"{self} only contains one item") 

1893 if isinstance(col, slice): 

1894 # the is_null_slice check above assures that col is slice(None) 

1895 # so what we want is a view on all our columns and row loc 

1896 if loc < 0: 

1897 loc += len(self.values) 

1898 # Note: loc:loc+1 vs [[loc]] makes a difference when called 

1899 # from fast_xs because we want to get a view back. 

1900 return self.values[loc : loc + 1] 

1901 return self.values[loc] 

1902 else: 

1903 if i != 0: 

1904 raise IndexError(f"{self} only contains one item") 

1905 return self.values 

1906 

1907 def set_inplace(self, locs, values: ArrayLike, copy: bool = False) -> None: 

1908 # When an ndarray, we should have locs.tolist() == [0] 

1909 # When a BlockPlacement we should have list(locs) == [0] 

1910 if copy: 

1911 self.values = self.values.copy() 

1912 self.values[:] = values 

1913 

1914 def _maybe_squeeze_arg(self, arg): 

1915 """ 

1916 If necessary, squeeze a (N, 1) ndarray to (N,) 

1917 """ 

1918 # e.g. if we are passed a 2D mask for putmask 

1919 if ( 

1920 isinstance(arg, (np.ndarray, ExtensionArray)) 

1921 and arg.ndim == self.values.ndim + 1 

1922 ): 

1923 # TODO(EA2D): unnecessary with 2D EAs 

1924 assert arg.shape[1] == 1 

1925 # error: No overload variant of "__getitem__" of "ExtensionArray" 

1926 # matches argument type "Tuple[slice, int]" 

1927 arg = arg[:, 0] # type: ignore[call-overload] 

1928 elif isinstance(arg, ABCDataFrame): 

1929 # 2022-01-06 only reached for setitem 

1930 # TODO: should we avoid getting here with DataFrame? 

1931 assert arg.shape[1] == 1 

1932 arg = arg._ixs(0, axis=1)._values 

1933 

1934 return arg 

1935 

1936 def _unwrap_setitem_indexer(self, indexer): 

1937 """ 

1938 Adapt a 2D-indexer to our 1D values. 

1939 

1940 This is intended for 'setitem', not 'iget' or '_slice'. 

1941 """ 

1942 # TODO: ATM this doesn't work for iget/_slice, can we change that? 

1943 

1944 if isinstance(indexer, tuple) and len(indexer) == 2: 

1945 # TODO(EA2D): not needed with 2D EAs 

1946 # Should never have length > 2. Caller is responsible for checking. 

1947 # Length 1 is reached vis setitem_single_block and setitem_single_column 

1948 # each of which pass indexer=(pi,) 

1949 if all(isinstance(x, np.ndarray) and x.ndim == 2 for x in indexer): 

1950 # GH#44703 went through indexing.maybe_convert_ix 

1951 first, second = indexer 

1952 if not ( 

1953 second.size == 1 and (second == 0).all() and first.shape[1] == 1 

1954 ): 

1955 raise NotImplementedError( 

1956 "This should not be reached. Please report a bug at " 

1957 "github.com/pandas-dev/pandas/" 

1958 ) 

1959 indexer = first[:, 0] 

1960 

1961 elif lib.is_integer(indexer[1]) and indexer[1] == 0: 

1962 # reached via setitem_single_block passing the whole indexer 

1963 indexer = indexer[0] 

1964 

1965 elif com.is_null_slice(indexer[1]): 

1966 indexer = indexer[0] 

1967 

1968 elif is_list_like(indexer[1]) and indexer[1][0] == 0: 

1969 indexer = indexer[0] 

1970 

1971 else: 

1972 raise NotImplementedError( 

1973 "This should not be reached. Please report a bug at " 

1974 "github.com/pandas-dev/pandas/" 

1975 ) 

1976 return indexer 

1977 

1978 @property 

1979 def is_view(self) -> bool: 

1980 """Extension arrays are never treated as views.""" 

1981 return False 

1982 

1983 @cache_readonly 

1984 def is_numeric(self): 

1985 return self.values.dtype._is_numeric 

1986 

1987 def _slice( 

1988 self, slicer: slice | npt.NDArray[np.bool_] | npt.NDArray[np.intp] 

1989 ) -> ExtensionArray: 

1990 """ 

1991 Return a slice of my values. 

1992 

1993 Parameters 

1994 ---------- 

1995 slicer : slice, ndarray[int], or ndarray[bool] 

1996 Valid (non-reducing) indexer for self.values. 

1997 

1998 Returns 

1999 ------- 

2000 ExtensionArray 

2001 """ 

2002 # Notes: ndarray[bool] is only reachable when via getitem_mgr, which 

2003 # is only for Series, i.e. self.ndim == 1. 

2004 

2005 # return same dims as we currently have 

2006 if self.ndim == 2: 

2007 # reached via getitem_block via _slice_take_blocks_ax0 

2008 # TODO(EA2D): won't be necessary with 2D EAs 

2009 

2010 if not isinstance(slicer, slice): 

2011 raise AssertionError( 

2012 "invalid slicing for a 1-ndim ExtensionArray", slicer 

2013 ) 

2014 # GH#32959 only full-slicers along fake-dim0 are valid 

2015 # TODO(EA2D): won't be necessary with 2D EAs 

2016 # range(1) instead of self._mgr_locs to avoid exception on [::-1] 

2017 # see test_iloc_getitem_slice_negative_step_ea_block 

2018 new_locs = range(1)[slicer] 

2019 if not len(new_locs): 

2020 raise AssertionError( 

2021 "invalid slicing for a 1-ndim ExtensionArray", slicer 

2022 ) 

2023 slicer = slice(None) 

2024 

2025 return self.values[slicer] 

2026 

2027 @final 

2028 def getitem_block_index(self, slicer: slice) -> ExtensionBlock: 

2029 """ 

2030 Perform __getitem__-like specialized to slicing along index. 

2031 """ 

2032 # GH#42787 in principle this is equivalent to values[..., slicer], but we don't 

2033 # require subclasses of ExtensionArray to support that form (for now). 

2034 new_values = self.values[slicer] 

2035 return type(self)(new_values, self._mgr_locs, ndim=self.ndim, refs=self.refs) 

2036 

2037 def diff(self, n: int, axis: AxisInt = 1) -> list[Block]: 

2038 # only reached with ndim == 2 and axis == 1 

2039 # TODO(EA2D): Can share with NDArrayBackedExtensionBlock 

2040 new_values = algos.diff(self.values, n, axis=0) 

2041 return [self.make_block(values=new_values)] 

2042 

2043 def shift( 

2044 self, periods: int, axis: AxisInt = 0, fill_value: Any = None 

2045 ) -> list[Block]: 

2046 """ 

2047 Shift the block by `periods`. 

2048 

2049 Dispatches to underlying ExtensionArray and re-boxes in an 

2050 ExtensionBlock. 

2051 """ 

2052 new_values = self.values.shift(periods=periods, fill_value=fill_value) 

2053 return [self.make_block_same_class(new_values)] 

2054 

2055 def _unstack( 

2056 self, 

2057 unstacker, 

2058 fill_value, 

2059 new_placement: npt.NDArray[np.intp], 

2060 needs_masking: npt.NDArray[np.bool_], 

2061 ): 

2062 # ExtensionArray-safe unstack. 

2063 # We override ObjectBlock._unstack, which unstacks directly on the 

2064 # values of the array. For EA-backed blocks, this would require 

2065 # converting to a 2-D ndarray of objects. 

2066 # Instead, we unstack an ndarray of integer positions, followed by 

2067 # a `take` on the actual values. 

2068 

2069 # Caller is responsible for ensuring self.shape[-1] == len(unstacker.index) 

2070 new_values, mask = unstacker.arange_result 

2071 

2072 # Note: these next two lines ensure that 

2073 # mask.sum() == sum(len(nb.mgr_locs) for nb in blocks) 

2074 # which the calling function needs in order to pass verify_integrity=False 

2075 # to the BlockManager constructor 

2076 new_values = new_values.T[mask] 

2077 new_placement = new_placement[mask] 

2078 

2079 # needs_masking[i] calculated once in BlockManager.unstack tells 

2080 # us if there are any -1s in the relevant indices. When False, 

2081 # that allows us to go through a faster path in 'take', among 

2082 # other things avoiding e.g. Categorical._validate_scalar. 

2083 blocks = [ 

2084 # TODO: could cast to object depending on fill_value? 

2085 type(self)( 

2086 self.values.take( 

2087 indices, allow_fill=needs_masking[i], fill_value=fill_value 

2088 ), 

2089 BlockPlacement(place), 

2090 ndim=2, 

2091 ) 

2092 for i, (indices, place) in enumerate(zip(new_values, new_placement)) 

2093 ] 

2094 return blocks, mask 

2095 

2096 

2097class NumpyBlock(libinternals.NumpyBlock, Block): 

2098 values: np.ndarray 

2099 

2100 @property 

2101 def is_view(self) -> bool: 

2102 """return a boolean if I am possibly a view""" 

2103 return self.values.base is not None 

2104 

2105 @property 

2106 def array_values(self) -> ExtensionArray: 

2107 return PandasArray(self.values) 

2108 

2109 def get_values(self, dtype: DtypeObj | None = None) -> np.ndarray: 

2110 if dtype == _dtype_obj: 

2111 return self.values.astype(_dtype_obj) 

2112 return self.values 

2113 

2114 def values_for_json(self) -> np.ndarray: 

2115 return self.values 

2116 

2117 

2118class NumericBlock(NumpyBlock): 

2119 __slots__ = () 

2120 is_numeric = True 

2121 

2122 

2123class NDArrayBackedExtensionBlock(libinternals.NDArrayBackedBlock, EABackedBlock): 

2124 """ 

2125 Block backed by an NDArrayBackedExtensionArray 

2126 """ 

2127 

2128 values: NDArrayBackedExtensionArray 

2129 

2130 # error: Signature of "is_extension" incompatible with supertype "Block" 

2131 @cache_readonly 

2132 def is_extension(self) -> bool: # type: ignore[override] 

2133 # i.e. datetime64tz, PeriodDtype 

2134 return not isinstance(self.dtype, np.dtype) 

2135 

2136 @property 

2137 def is_view(self) -> bool: 

2138 """return a boolean if I am possibly a view""" 

2139 # check the ndarray values of the DatetimeIndex values 

2140 return self.values._ndarray.base is not None 

2141 

2142 def diff(self, n: int, axis: AxisInt = 0) -> list[Block]: 

2143 """ 

2144 1st discrete difference. 

2145 

2146 Parameters 

2147 ---------- 

2148 n : int 

2149 Number of periods to diff. 

2150 axis : int, default 0 

2151 Axis to diff upon. 

2152 

2153 Returns 

2154 ------- 

2155 A list with a new Block. 

2156 

2157 Notes 

2158 ----- 

2159 The arguments here are mimicking shift so they are called correctly 

2160 by apply. 

2161 """ 

2162 # only reached with ndim == 2 and axis == 1 

2163 values = self.values 

2164 

2165 new_values = values - values.shift(n, axis=axis) 

2166 return [self.make_block(new_values)] 

2167 

2168 def shift( 

2169 self, periods: int, axis: AxisInt = 0, fill_value: Any = None 

2170 ) -> list[Block]: 

2171 values = self.values 

2172 new_values = values.shift(periods, fill_value=fill_value, axis=axis) 

2173 return [self.make_block_same_class(new_values)] 

2174 

2175 

2176def _catch_deprecated_value_error(err: Exception) -> None: 

2177 """ 

2178 We catch ValueError for now, but only a specific one raised by DatetimeArray 

2179 which will no longer be raised in version.2.0. 

2180 """ 

2181 if isinstance(err, ValueError): 

2182 if isinstance(err, IncompatibleFrequency): 

2183 pass 

2184 elif "'value.closed' is" in str(err): 

2185 # IntervalDtype mismatched 'closed' 

2186 pass 

2187 

2188 

2189class DatetimeLikeBlock(NDArrayBackedExtensionBlock): 

2190 """Block for datetime64[ns], timedelta64[ns].""" 

2191 

2192 __slots__ = () 

2193 is_numeric = False 

2194 values: DatetimeArray | TimedeltaArray 

2195 

2196 def values_for_json(self) -> np.ndarray: 

2197 return self.values._ndarray 

2198 

2199 def interpolate( 

2200 self, 

2201 *, 

2202 method: FillnaOptions = "pad", 

2203 index: Index | None = None, 

2204 axis: int = 0, 

2205 inplace: bool = False, 

2206 limit: int | None = None, 

2207 fill_value=None, 

2208 using_cow: bool = False, 

2209 **kwargs, 

2210 ): 

2211 values = self.values 

2212 

2213 # error: Non-overlapping equality check (left operand type: 

2214 # "Literal['backfill', 'bfill', 'ffill', 'pad']", right operand type: 

2215 # "Literal['linear']") [comparison-overlap] 

2216 if method == "linear": # type: ignore[comparison-overlap] 

2217 # TODO: GH#50950 implement for arbitrary EAs 

2218 refs = None 

2219 if using_cow: 

2220 if inplace and not self.refs.has_reference(): 

2221 data_out = values._ndarray 

2222 refs = self.refs 

2223 else: 

2224 data_out = values._ndarray.copy() 

2225 else: 

2226 data_out = values._ndarray if inplace else values._ndarray.copy() 

2227 missing.interpolate_array_2d( 

2228 data_out, method=method, limit=limit, index=index, axis=axis 

2229 ) 

2230 new_values = type(values)._simple_new(data_out, dtype=values.dtype) 

2231 return self.make_block_same_class(new_values, refs=refs) 

2232 

2233 elif values.ndim == 2 and axis == 0: 

2234 # NDArrayBackedExtensionArray.fillna assumes axis=1 

2235 new_values = values.T.fillna(value=fill_value, method=method, limit=limit).T 

2236 else: 

2237 new_values = values.fillna(value=fill_value, method=method, limit=limit) 

2238 return self.make_block_same_class(new_values) 

2239 

2240 

2241class DatetimeTZBlock(DatetimeLikeBlock): 

2242 """implement a datetime64 block with a tz attribute""" 

2243 

2244 values: DatetimeArray 

2245 

2246 __slots__ = () 

2247 is_extension = True 

2248 _validate_ndim = True 

2249 _can_consolidate = False 

2250 

2251 # Don't use values_for_json from DatetimeLikeBlock since it is 

2252 # an invalid optimization here(drop the tz) 

2253 values_for_json = NDArrayBackedExtensionBlock.values_for_json 

2254 

2255 

2256class ObjectBlock(NumpyBlock): 

2257 __slots__ = () 

2258 is_object = True 

2259 

2260 @maybe_split 

2261 def convert( 

2262 self, 

2263 *, 

2264 copy: bool = True, 

2265 using_cow: bool = False, 

2266 ) -> list[Block]: 

2267 """ 

2268 attempt to cast any object types to better types return a copy of 

2269 the block (if copy = True) by definition we ARE an ObjectBlock!!!!! 

2270 """ 

2271 if self.dtype != _dtype_obj: 

2272 # GH#50067 this should be impossible in ObjectBlock, but until 

2273 # that is fixed, we short-circuit here. 

2274 if using_cow: 

2275 return [self.copy(deep=False)] 

2276 return [self] 

2277 

2278 values = self.values 

2279 if values.ndim == 2: 

2280 # maybe_split ensures we only get here with values.shape[0] == 1, 

2281 # avoid doing .ravel as that might make a copy 

2282 values = values[0] 

2283 

2284 res_values = lib.maybe_convert_objects( 

2285 values, 

2286 convert_datetime=True, 

2287 convert_timedelta=True, 

2288 convert_period=True, 

2289 convert_interval=True, 

2290 ) 

2291 refs = None 

2292 if copy and res_values is values: 

2293 res_values = values.copy() 

2294 elif res_values is values and using_cow: 

2295 refs = self.refs 

2296 

2297 res_values = ensure_block_shape(res_values, self.ndim) 

2298 return [self.make_block(res_values, refs=refs)] 

2299 

2300 

2301# ----------------------------------------------------------------- 

2302# Constructor Helpers 

2303 

2304 

2305def maybe_coerce_values(values: ArrayLike) -> ArrayLike: 

2306 """ 

2307 Input validation for values passed to __init__. Ensure that 

2308 any datetime64/timedelta64 dtypes are in nanoseconds. Ensure 

2309 that we do not have string dtypes. 

2310 

2311 Parameters 

2312 ---------- 

2313 values : np.ndarray or ExtensionArray 

2314 

2315 Returns 

2316 ------- 

2317 values : np.ndarray or ExtensionArray 

2318 """ 

2319 # Caller is responsible for ensuring PandasArray is already extracted. 

2320 

2321 if isinstance(values, np.ndarray): 

2322 values = ensure_wrapped_if_datetimelike(values) 

2323 

2324 if issubclass(values.dtype.type, str): 

2325 values = np.array(values, dtype=object) 

2326 

2327 if isinstance(values, (DatetimeArray, TimedeltaArray)) and values.freq is not None: 

2328 # freq is only stored in DatetimeIndex/TimedeltaIndex, not in Series/DataFrame 

2329 values = values._with_freq(None) 

2330 

2331 return values 

2332 

2333 

2334def get_block_type(dtype: DtypeObj): 

2335 """ 

2336 Find the appropriate Block subclass to use for the given values and dtype. 

2337 

2338 Parameters 

2339 ---------- 

2340 dtype : numpy or pandas dtype 

2341 

2342 Returns 

2343 ------- 

2344 cls : class, subclass of Block 

2345 """ 

2346 # We use kind checks because it is much more performant 

2347 # than is_foo_dtype 

2348 kind = dtype.kind 

2349 

2350 cls: type[Block] 

2351 

2352 if isinstance(dtype, SparseDtype): 

2353 # Need this first(ish) so that Sparse[datetime] is sparse 

2354 cls = ExtensionBlock 

2355 elif isinstance(dtype, DatetimeTZDtype): 

2356 cls = DatetimeTZBlock 

2357 elif isinstance(dtype, PeriodDtype): 

2358 cls = NDArrayBackedExtensionBlock 

2359 elif isinstance(dtype, ExtensionDtype): 

2360 # Note: need to be sure PandasArray is unwrapped before we get here 

2361 cls = ExtensionBlock 

2362 

2363 elif kind in ["M", "m"]: 

2364 cls = DatetimeLikeBlock 

2365 elif kind in ["f", "c", "i", "u", "b"]: 

2366 cls = NumericBlock 

2367 else: 

2368 cls = ObjectBlock 

2369 return cls 

2370 

2371 

2372def new_block_2d( 

2373 values: ArrayLike, placement: BlockPlacement, refs: BlockValuesRefs | None = None 

2374): 

2375 # new_block specialized to case with 

2376 # ndim=2 

2377 # isinstance(placement, BlockPlacement) 

2378 # check_ndim/ensure_block_shape already checked 

2379 klass = get_block_type(values.dtype) 

2380 

2381 values = maybe_coerce_values(values) 

2382 return klass(values, ndim=2, placement=placement, refs=refs) 

2383 

2384 

2385def new_block( 

2386 values, placement, *, ndim: int, refs: BlockValuesRefs | None = None 

2387) -> Block: 

2388 # caller is responsible for ensuring values is NOT a PandasArray 

2389 

2390 if not isinstance(placement, BlockPlacement): 

2391 placement = BlockPlacement(placement) 

2392 

2393 check_ndim(values, placement, ndim) 

2394 

2395 klass = get_block_type(values.dtype) 

2396 

2397 values = maybe_coerce_values(values) 

2398 return klass(values, ndim=ndim, placement=placement, refs=refs) 

2399 

2400 

2401def check_ndim(values, placement: BlockPlacement, ndim: int) -> None: 

2402 """ 

2403 ndim inference and validation. 

2404 

2405 Validates that values.ndim and ndim are consistent. 

2406 Validates that len(values) and len(placement) are consistent. 

2407 

2408 Parameters 

2409 ---------- 

2410 values : array-like 

2411 placement : BlockPlacement 

2412 ndim : int 

2413 

2414 Raises 

2415 ------ 

2416 ValueError : the number of dimensions do not match 

2417 """ 

2418 

2419 if values.ndim > ndim: 

2420 # Check for both np.ndarray and ExtensionArray 

2421 raise ValueError( 

2422 "Wrong number of dimensions. " 

2423 f"values.ndim > ndim [{values.ndim} > {ndim}]" 

2424 ) 

2425 

2426 if not is_1d_only_ea_dtype(values.dtype): 

2427 # TODO(EA2D): special case not needed with 2D EAs 

2428 if values.ndim != ndim: 

2429 raise ValueError( 

2430 "Wrong number of dimensions. " 

2431 f"values.ndim != ndim [{values.ndim} != {ndim}]" 

2432 ) 

2433 if len(placement) != len(values): 

2434 raise ValueError( 

2435 f"Wrong number of items passed {len(values)}, " 

2436 f"placement implies {len(placement)}" 

2437 ) 

2438 elif ndim == 2 and len(placement) != 1: 

2439 # TODO(EA2D): special case unnecessary with 2D EAs 

2440 raise ValueError("need to split") 

2441 

2442 

2443def extract_pandas_array( 

2444 values: np.ndarray | ExtensionArray, dtype: DtypeObj | None, ndim: int 

2445) -> tuple[np.ndarray | ExtensionArray, DtypeObj | None]: 

2446 """ 

2447 Ensure that we don't allow PandasArray / PandasDtype in internals. 

2448 """ 

2449 # For now, blocks should be backed by ndarrays when possible. 

2450 if isinstance(values, ABCPandasArray): 

2451 values = values.to_numpy() 

2452 if ndim and ndim > 1: 

2453 # TODO(EA2D): special case not needed with 2D EAs 

2454 values = np.atleast_2d(values) 

2455 

2456 if isinstance(dtype, PandasDtype): 

2457 dtype = dtype.numpy_dtype 

2458 

2459 return values, dtype 

2460 

2461 

2462# ----------------------------------------------------------------- 

2463 

2464 

2465def extend_blocks(result, blocks=None) -> list[Block]: 

2466 """return a new extended blocks, given the result""" 

2467 if blocks is None: 

2468 blocks = [] 

2469 if isinstance(result, list): 

2470 for r in result: 

2471 if isinstance(r, list): 

2472 blocks.extend(r) 

2473 else: 

2474 blocks.append(r) 

2475 else: 

2476 assert isinstance(result, Block), type(result) 

2477 blocks.append(result) 

2478 return blocks 

2479 

2480 

2481def ensure_block_shape(values: ArrayLike, ndim: int = 1) -> ArrayLike: 

2482 """ 

2483 Reshape if possible to have values.ndim == ndim. 

2484 """ 

2485 

2486 if values.ndim < ndim: 

2487 if not is_1d_only_ea_dtype(values.dtype): 

2488 # TODO(EA2D): https://github.com/pandas-dev/pandas/issues/23023 

2489 # block.shape is incorrect for "2D" ExtensionArrays 

2490 # We can't, and don't need to, reshape. 

2491 values = cast("np.ndarray | DatetimeArray | TimedeltaArray", values) 

2492 values = values.reshape(1, -1) 

2493 

2494 return values 

2495 

2496 

2497def to_native_types( 

2498 values: ArrayLike, 

2499 *, 

2500 na_rep: str = "nan", 

2501 quoting=None, 

2502 float_format=None, 

2503 decimal: str = ".", 

2504 **kwargs, 

2505) -> np.ndarray: 

2506 """convert to our native types format""" 

2507 if isinstance(values, Categorical) and values.categories.dtype.kind in "Mm": 

2508 # GH#40754 Convert categorical datetimes to datetime array 

2509 values = algos.take_nd( 

2510 values.categories._values, 

2511 ensure_platform_int(values._codes), 

2512 fill_value=na_rep, 

2513 ) 

2514 

2515 values = ensure_wrapped_if_datetimelike(values) 

2516 

2517 if isinstance(values, (DatetimeArray, TimedeltaArray)): 

2518 if values.ndim == 1: 

2519 result = values._format_native_types(na_rep=na_rep, **kwargs) 

2520 result = result.astype(object, copy=False) 

2521 return result 

2522 

2523 # GH#21734 Process every column separately, they might have different formats 

2524 results_converted = [] 

2525 for i in range(len(values)): 

2526 result = values[i, :]._format_native_types(na_rep=na_rep, **kwargs) 

2527 results_converted.append(result.astype(object, copy=False)) 

2528 return np.vstack(results_converted) 

2529 

2530 elif values.dtype.kind == "f" and not is_sparse(values): 

2531 # see GH#13418: no special formatting is desired at the 

2532 # output (important for appropriate 'quoting' behaviour), 

2533 # so do not pass it through the FloatArrayFormatter 

2534 if float_format is None and decimal == ".": 

2535 mask = isna(values) 

2536 

2537 if not quoting: 

2538 values = values.astype(str) 

2539 else: 

2540 values = np.array(values, dtype="object") 

2541 

2542 values[mask] = na_rep 

2543 values = values.astype(object, copy=False) 

2544 return values 

2545 

2546 from pandas.io.formats.format import FloatArrayFormatter 

2547 

2548 formatter = FloatArrayFormatter( 

2549 values, 

2550 na_rep=na_rep, 

2551 float_format=float_format, 

2552 decimal=decimal, 

2553 quoting=quoting, 

2554 fixed_width=False, 

2555 ) 

2556 res = formatter.get_result_as_array() 

2557 res = res.astype(object, copy=False) 

2558 return res 

2559 

2560 elif isinstance(values, ExtensionArray): 

2561 mask = isna(values) 

2562 

2563 new_values = np.asarray(values.astype(object)) 

2564 new_values[mask] = na_rep 

2565 return new_values 

2566 

2567 else: 

2568 mask = isna(values) 

2569 itemsize = writers.word_len(na_rep) 

2570 

2571 if values.dtype != _dtype_obj and not quoting and itemsize: 

2572 values = values.astype(str) 

2573 if values.dtype.itemsize / np.dtype("U1").itemsize < itemsize: 

2574 # enlarge for the na_rep 

2575 values = values.astype(f"<U{itemsize}") 

2576 else: 

2577 values = np.array(values, dtype="object") 

2578 

2579 values[mask] = na_rep 

2580 values = values.astype(object, copy=False) 

2581 return values 

2582 

2583 

2584def external_values(values: ArrayLike) -> ArrayLike: 

2585 """ 

2586 The array that Series.values returns (public attribute). 

2587 

2588 This has some historical constraints, and is overridden in block 

2589 subclasses to return the correct array (e.g. period returns 

2590 object ndarray and datetimetz a datetime64[ns] ndarray instead of 

2591 proper extension array). 

2592 """ 

2593 if isinstance(values, (PeriodArray, IntervalArray)): 

2594 return values.astype(object) 

2595 elif isinstance(values, (DatetimeArray, TimedeltaArray)): 

2596 # NB: for datetime64tz this is different from np.asarray(values), since 

2597 # that returns an object-dtype ndarray of Timestamps. 

2598 # Avoid raising in .astype in casting from dt64tz to dt64 

2599 values = values._ndarray 

2600 

2601 if isinstance(values, np.ndarray) and using_copy_on_write(): 

2602 values = values.view() 

2603 values.flags.writeable = False 

2604 

2605 # TODO(CoW) we should also mark our ExtensionArrays as read-only 

2606 

2607 return values