1from __future__ import annotations
2
3from functools import wraps
4import inspect
5import re
6from typing import (
7 TYPE_CHECKING,
8 Any,
9 Callable,
10 Literal,
11 cast,
12 final,
13)
14import warnings
15import weakref
16
17import numpy as np
18
19from pandas._config import (
20 get_option,
21 using_copy_on_write,
22 warn_copy_on_write,
23)
24
25from pandas._libs import (
26 NaT,
27 internals as libinternals,
28 lib,
29)
30from pandas._libs.internals import (
31 BlockPlacement,
32 BlockValuesRefs,
33)
34from pandas._libs.missing import NA
35from pandas._typing import (
36 ArrayLike,
37 AxisInt,
38 DtypeBackend,
39 DtypeObj,
40 F,
41 FillnaOptions,
42 IgnoreRaise,
43 InterpolateOptions,
44 QuantileInterpolation,
45 Self,
46 Shape,
47 npt,
48)
49from pandas.errors import AbstractMethodError
50from pandas.util._decorators import cache_readonly
51from pandas.util._exceptions import find_stack_level
52from pandas.util._validators import validate_bool_kwarg
53
54from pandas.core.dtypes.astype import (
55 astype_array_safe,
56 astype_is_view,
57)
58from pandas.core.dtypes.cast import (
59 LossySetitemError,
60 can_hold_element,
61 convert_dtypes,
62 find_result_type,
63 maybe_downcast_to_dtype,
64 np_can_hold_element,
65)
66from pandas.core.dtypes.common import (
67 is_1d_only_ea_dtype,
68 is_float_dtype,
69 is_integer_dtype,
70 is_list_like,
71 is_scalar,
72 is_string_dtype,
73)
74from pandas.core.dtypes.dtypes import (
75 DatetimeTZDtype,
76 ExtensionDtype,
77 IntervalDtype,
78 NumpyEADtype,
79 PeriodDtype,
80)
81from pandas.core.dtypes.generic import (
82 ABCDataFrame,
83 ABCIndex,
84 ABCNumpyExtensionArray,
85 ABCSeries,
86)
87from pandas.core.dtypes.missing import (
88 is_valid_na_for_dtype,
89 isna,
90 na_value_for_dtype,
91)
92
93from pandas.core import missing
94import pandas.core.algorithms as algos
95from pandas.core.array_algos.putmask import (
96 extract_bool_array,
97 putmask_inplace,
98 putmask_without_repeat,
99 setitem_datetimelike_compat,
100 validate_putmask,
101)
102from pandas.core.array_algos.quantile import quantile_compat
103from pandas.core.array_algos.replace import (
104 compare_or_regex_search,
105 replace_regex,
106 should_use_regex,
107)
108from pandas.core.array_algos.transforms import shift
109from pandas.core.arrays import (
110 Categorical,
111 DatetimeArray,
112 ExtensionArray,
113 IntervalArray,
114 NumpyExtensionArray,
115 PeriodArray,
116 TimedeltaArray,
117)
118from pandas.core.base import PandasObject
119import pandas.core.common as com
120from pandas.core.computation import expressions
121from pandas.core.construction import (
122 ensure_wrapped_if_datetimelike,
123 extract_array,
124)
125from pandas.core.indexers import check_setitem_lengths
126from pandas.core.indexes.base import get_values_for_csv
127
128if TYPE_CHECKING:
129 from collections.abc import (
130 Iterable,
131 Sequence,
132 )
133
134 from pandas.core.api import Index
135 from pandas.core.arrays._mixins import NDArrayBackedExtensionArray
136
137# comparison is faster than is_object_dtype
138_dtype_obj = np.dtype("object")
139
140
141COW_WARNING_GENERAL_MSG = """\
142Setting a value on a view: behaviour will change in pandas 3.0.
143You are mutating a Series or DataFrame object, and currently this mutation will
144also have effect on other Series or DataFrame objects that share data with this
145object. In pandas 3.0 (with Copy-on-Write), updating one Series or DataFrame object
146will never modify another.
147"""
148
149
150COW_WARNING_SETITEM_MSG = """\
151Setting a value on a view: behaviour will change in pandas 3.0.
152Currently, the mutation will also have effect on the object that shares data
153with this object. For example, when setting a value in a Series that was
154extracted from a column of a DataFrame, that DataFrame will also be updated:
155
156 ser = df["col"]
157 ser[0] = 0 <--- in pandas 2, this also updates `df`
158
159In pandas 3.0 (with Copy-on-Write), updating one Series/DataFrame will never
160modify another, and thus in the example above, `df` will not be changed.
161"""
162
163
164def maybe_split(meth: F) -> F:
165 """
166 If we have a multi-column block, split and operate block-wise. Otherwise
167 use the original method.
168 """
169
170 @wraps(meth)
171 def newfunc(self, *args, **kwargs) -> list[Block]:
172 if self.ndim == 1 or self.shape[0] == 1:
173 return meth(self, *args, **kwargs)
174 else:
175 # Split and operate column-by-column
176 return self.split_and_operate(meth, *args, **kwargs)
177
178 return cast(F, newfunc)
179
180
181class Block(PandasObject, libinternals.Block):
182 """
183 Canonical n-dimensional unit of homogeneous dtype contained in a pandas
184 data structure
185
186 Index-ignorant; let the container take care of that
187 """
188
189 values: np.ndarray | ExtensionArray
190 ndim: int
191 refs: BlockValuesRefs
192 __init__: Callable
193
194 __slots__ = ()
195 is_numeric = False
196
197 @final
198 @cache_readonly
199 def _validate_ndim(self) -> bool:
200 """
201 We validate dimension for blocks that can hold 2D values, which for now
202 means numpy dtypes or DatetimeTZDtype.
203 """
204 dtype = self.dtype
205 return not isinstance(dtype, ExtensionDtype) or isinstance(
206 dtype, DatetimeTZDtype
207 )
208
209 @final
210 @cache_readonly
211 def is_object(self) -> bool:
212 return self.values.dtype == _dtype_obj
213
214 @final
215 @cache_readonly
216 def is_extension(self) -> bool:
217 return not lib.is_np_dtype(self.values.dtype)
218
219 @final
220 @cache_readonly
221 def _can_consolidate(self) -> bool:
222 # We _could_ consolidate for DatetimeTZDtype but don't for now.
223 return not self.is_extension
224
225 @final
226 @cache_readonly
227 def _consolidate_key(self):
228 return self._can_consolidate, self.dtype.name
229
230 @final
231 @cache_readonly
232 def _can_hold_na(self) -> bool:
233 """
234 Can we store NA values in this Block?
235 """
236 dtype = self.dtype
237 if isinstance(dtype, np.dtype):
238 return dtype.kind not in "iub"
239 return dtype._can_hold_na
240
241 @final
242 @property
243 def is_bool(self) -> bool:
244 """
245 We can be bool if a) we are bool dtype or b) object dtype with bool objects.
246 """
247 return self.values.dtype == np.dtype(bool)
248
249 @final
250 def external_values(self):
251 return external_values(self.values)
252
253 @final
254 @cache_readonly
255 def fill_value(self):
256 # Used in reindex_indexer
257 return na_value_for_dtype(self.dtype, compat=False)
258
259 @final
260 def _standardize_fill_value(self, value):
261 # if we are passed a scalar None, convert it here
262 if self.dtype != _dtype_obj and is_valid_na_for_dtype(value, self.dtype):
263 value = self.fill_value
264 return value
265
266 @property
267 def mgr_locs(self) -> BlockPlacement:
268 return self._mgr_locs
269
270 @mgr_locs.setter
271 def mgr_locs(self, new_mgr_locs: BlockPlacement) -> None:
272 self._mgr_locs = new_mgr_locs
273
274 @final
275 def make_block(
276 self,
277 values,
278 placement: BlockPlacement | None = None,
279 refs: BlockValuesRefs | None = None,
280 ) -> Block:
281 """
282 Create a new block, with type inference propagate any values that are
283 not specified
284 """
285 if placement is None:
286 placement = self._mgr_locs
287 if self.is_extension:
288 values = ensure_block_shape(values, ndim=self.ndim)
289
290 return new_block(values, placement=placement, ndim=self.ndim, refs=refs)
291
292 @final
293 def make_block_same_class(
294 self,
295 values,
296 placement: BlockPlacement | None = None,
297 refs: BlockValuesRefs | None = None,
298 ) -> Self:
299 """Wrap given values in a block of same type as self."""
300 # Pre-2.0 we called ensure_wrapped_if_datetimelike because fastparquet
301 # relied on it, as of 2.0 the caller is responsible for this.
302 if placement is None:
303 placement = self._mgr_locs
304
305 # We assume maybe_coerce_values has already been called
306 return type(self)(values, placement=placement, ndim=self.ndim, refs=refs)
307
308 @final
309 def __repr__(self) -> str:
310 # don't want to print out all of the items here
311 name = type(self).__name__
312 if self.ndim == 1:
313 result = f"{name}: {len(self)} dtype: {self.dtype}"
314 else:
315 shape = " x ".join([str(s) for s in self.shape])
316 result = f"{name}: {self.mgr_locs.indexer}, {shape}, dtype: {self.dtype}"
317
318 return result
319
320 @final
321 def __len__(self) -> int:
322 return len(self.values)
323
324 @final
325 def slice_block_columns(self, slc: slice) -> Self:
326 """
327 Perform __getitem__-like, return result as block.
328 """
329 new_mgr_locs = self._mgr_locs[slc]
330
331 new_values = self._slice(slc)
332 refs = self.refs
333 return type(self)(new_values, new_mgr_locs, self.ndim, refs=refs)
334
335 @final
336 def take_block_columns(self, indices: npt.NDArray[np.intp]) -> Self:
337 """
338 Perform __getitem__-like, return result as block.
339
340 Only supports slices that preserve dimensionality.
341 """
342 # Note: only called from is from internals.concat, and we can verify
343 # that never happens with 1-column blocks, i.e. never for ExtensionBlock.
344
345 new_mgr_locs = self._mgr_locs[indices]
346
347 new_values = self._slice(indices)
348 return type(self)(new_values, new_mgr_locs, self.ndim, refs=None)
349
350 @final
351 def getitem_block_columns(
352 self, slicer: slice, new_mgr_locs: BlockPlacement, ref_inplace_op: bool = False
353 ) -> Self:
354 """
355 Perform __getitem__-like, return result as block.
356
357 Only supports slices that preserve dimensionality.
358 """
359 new_values = self._slice(slicer)
360 refs = self.refs if not ref_inplace_op or self.refs.has_reference() else None
361 return type(self)(new_values, new_mgr_locs, self.ndim, refs=refs)
362
363 @final
364 def _can_hold_element(self, element: Any) -> bool:
365 """require the same dtype as ourselves"""
366 element = extract_array(element, extract_numpy=True)
367 return can_hold_element(self.values, element)
368
369 @final
370 def should_store(self, value: ArrayLike) -> bool:
371 """
372 Should we set self.values[indexer] = value inplace or do we need to cast?
373
374 Parameters
375 ----------
376 value : np.ndarray or ExtensionArray
377
378 Returns
379 -------
380 bool
381 """
382 return value.dtype == self.dtype
383
384 # ---------------------------------------------------------------------
385 # Apply/Reduce and Helpers
386
387 @final
388 def apply(self, func, **kwargs) -> list[Block]:
389 """
390 apply the function to my values; return a block if we are not
391 one
392 """
393 result = func(self.values, **kwargs)
394
395 result = maybe_coerce_values(result)
396 return self._split_op_result(result)
397
398 @final
399 def reduce(self, func) -> list[Block]:
400 # We will apply the function and reshape the result into a single-row
401 # Block with the same mgr_locs; squeezing will be done at a higher level
402 assert self.ndim == 2
403
404 result = func(self.values)
405
406 if self.values.ndim == 1:
407 res_values = result
408 else:
409 res_values = result.reshape(-1, 1)
410
411 nb = self.make_block(res_values)
412 return [nb]
413
414 @final
415 def _split_op_result(self, result: ArrayLike) -> list[Block]:
416 # See also: split_and_operate
417 if result.ndim > 1 and isinstance(result.dtype, ExtensionDtype):
418 # TODO(EA2D): unnecessary with 2D EAs
419 # if we get a 2D ExtensionArray, we need to split it into 1D pieces
420 nbs = []
421 for i, loc in enumerate(self._mgr_locs):
422 if not is_1d_only_ea_dtype(result.dtype):
423 vals = result[i : i + 1]
424 else:
425 vals = result[i]
426
427 bp = BlockPlacement(loc)
428 block = self.make_block(values=vals, placement=bp)
429 nbs.append(block)
430 return nbs
431
432 nb = self.make_block(result)
433
434 return [nb]
435
436 @final
437 def _split(self) -> list[Block]:
438 """
439 Split a block into a list of single-column blocks.
440 """
441 assert self.ndim == 2
442
443 new_blocks = []
444 for i, ref_loc in enumerate(self._mgr_locs):
445 vals = self.values[slice(i, i + 1)]
446
447 bp = BlockPlacement(ref_loc)
448 nb = type(self)(vals, placement=bp, ndim=2, refs=self.refs)
449 new_blocks.append(nb)
450 return new_blocks
451
452 @final
453 def split_and_operate(self, func, *args, **kwargs) -> list[Block]:
454 """
455 Split the block and apply func column-by-column.
456
457 Parameters
458 ----------
459 func : Block method
460 *args
461 **kwargs
462
463 Returns
464 -------
465 List[Block]
466 """
467 assert self.ndim == 2 and self.shape[0] != 1
468
469 res_blocks = []
470 for nb in self._split():
471 rbs = func(nb, *args, **kwargs)
472 res_blocks.extend(rbs)
473 return res_blocks
474
475 # ---------------------------------------------------------------------
476 # Up/Down-casting
477
478 @final
479 def coerce_to_target_dtype(self, other, warn_on_upcast: bool = False) -> Block:
480 """
481 coerce the current block to a dtype compat for other
482 we will return a block, possibly object, and not raise
483
484 we can also safely try to coerce to the same dtype
485 and will receive the same block
486 """
487 new_dtype = find_result_type(self.values.dtype, other)
488 if new_dtype == self.dtype:
489 # GH#52927 avoid RecursionError
490 raise AssertionError(
491 "Something has gone wrong, please report a bug at "
492 "https://github.com/pandas-dev/pandas/issues"
493 )
494
495 # In a future version of pandas, the default will be that
496 # setting `nan` into an integer series won't raise.
497 if (
498 is_scalar(other)
499 and is_integer_dtype(self.values.dtype)
500 and isna(other)
501 and other is not NaT
502 and not (
503 isinstance(other, (np.datetime64, np.timedelta64)) and np.isnat(other)
504 )
505 ):
506 warn_on_upcast = False
507 elif (
508 isinstance(other, np.ndarray)
509 and other.ndim == 1
510 and is_integer_dtype(self.values.dtype)
511 and is_float_dtype(other.dtype)
512 and lib.has_only_ints_or_nan(other)
513 ):
514 warn_on_upcast = False
515
516 if warn_on_upcast:
517 warnings.warn(
518 f"Setting an item of incompatible dtype is deprecated "
519 "and will raise an error in a future version of pandas. "
520 f"Value '{other}' has dtype incompatible with {self.values.dtype}, "
521 "please explicitly cast to a compatible dtype first.",
522 FutureWarning,
523 stacklevel=find_stack_level(),
524 )
525 if self.values.dtype == new_dtype:
526 raise AssertionError(
527 f"Did not expect new dtype {new_dtype} to equal self.dtype "
528 f"{self.values.dtype}. Please report a bug at "
529 "https://github.com/pandas-dev/pandas/issues."
530 )
531 return self.astype(new_dtype, copy=False)
532
533 @final
534 def _maybe_downcast(
535 self,
536 blocks: list[Block],
537 downcast,
538 using_cow: bool,
539 caller: str,
540 ) -> list[Block]:
541 if downcast is False:
542 return blocks
543
544 if self.dtype == _dtype_obj:
545 # TODO: does it matter that self.dtype might not match blocks[i].dtype?
546 # GH#44241 We downcast regardless of the argument;
547 # respecting 'downcast=None' may be worthwhile at some point,
548 # but ATM it breaks too much existing code.
549 # split and convert the blocks
550
551 if caller == "fillna" and get_option("future.no_silent_downcasting"):
552 return blocks
553
554 nbs = extend_blocks(
555 [blk.convert(using_cow=using_cow, copy=not using_cow) for blk in blocks]
556 )
557 if caller == "fillna":
558 if len(nbs) != len(blocks) or not all(
559 x.dtype == y.dtype for x, y in zip(nbs, blocks)
560 ):
561 # GH#54261
562 warnings.warn(
563 "Downcasting object dtype arrays on .fillna, .ffill, .bfill "
564 "is deprecated and will change in a future version. "
565 "Call result.infer_objects(copy=False) instead. "
566 "To opt-in to the future "
567 "behavior, set "
568 "`pd.set_option('future.no_silent_downcasting', True)`",
569 FutureWarning,
570 stacklevel=find_stack_level(),
571 )
572
573 return nbs
574
575 elif downcast is None:
576 return blocks
577 elif caller == "where" and get_option("future.no_silent_downcasting") is True:
578 return blocks
579 else:
580 nbs = extend_blocks([b._downcast_2d(downcast, using_cow) for b in blocks])
581
582 # When _maybe_downcast is called with caller="where", it is either
583 # a) with downcast=False, which is a no-op (the desired future behavior)
584 # b) with downcast="infer", which is _not_ passed by the user.
585 # In the latter case the future behavior is to stop doing inference,
586 # so we issue a warning if and only if some inference occurred.
587 if caller == "where":
588 # GH#53656
589 if len(blocks) != len(nbs) or any(
590 left.dtype != right.dtype for left, right in zip(blocks, nbs)
591 ):
592 # In this case _maybe_downcast was _not_ a no-op, so the behavior
593 # will change, so we issue a warning.
594 warnings.warn(
595 "Downcasting behavior in Series and DataFrame methods 'where', "
596 "'mask', and 'clip' is deprecated. In a future "
597 "version this will not infer object dtypes or cast all-round "
598 "floats to integers. Instead call "
599 "result.infer_objects(copy=False) for object inference, "
600 "or cast round floats explicitly. To opt-in to the future "
601 "behavior, set "
602 "`pd.set_option('future.no_silent_downcasting', True)`",
603 FutureWarning,
604 stacklevel=find_stack_level(),
605 )
606
607 return nbs
608
609 @final
610 @maybe_split
611 def _downcast_2d(self, dtype, using_cow: bool = False) -> list[Block]:
612 """
613 downcast specialized to 2D case post-validation.
614
615 Refactored to allow use of maybe_split.
616 """
617 new_values = maybe_downcast_to_dtype(self.values, dtype=dtype)
618 new_values = maybe_coerce_values(new_values)
619 refs = self.refs if new_values is self.values else None
620 return [self.make_block(new_values, refs=refs)]
621
622 @final
623 def convert(
624 self,
625 *,
626 copy: bool = True,
627 using_cow: bool = False,
628 ) -> list[Block]:
629 """
630 Attempt to coerce any object types to better types. Return a copy
631 of the block (if copy = True).
632 """
633 if not self.is_object:
634 if not copy and using_cow:
635 return [self.copy(deep=False)]
636 return [self.copy()] if copy else [self]
637
638 if self.ndim != 1 and self.shape[0] != 1:
639 blocks = self.split_and_operate(
640 Block.convert, copy=copy, using_cow=using_cow
641 )
642 if all(blk.dtype.kind == "O" for blk in blocks):
643 # Avoid fragmenting the block if convert is a no-op
644 if using_cow:
645 return [self.copy(deep=False)]
646 return [self.copy()] if copy else [self]
647 return blocks
648
649 values = self.values
650 if values.ndim == 2:
651 # the check above ensures we only get here with values.shape[0] == 1,
652 # avoid doing .ravel as that might make a copy
653 values = values[0]
654
655 res_values = lib.maybe_convert_objects(
656 values, # type: ignore[arg-type]
657 convert_non_numeric=True,
658 )
659 refs = None
660 if copy and res_values is values:
661 res_values = values.copy()
662 elif res_values is values:
663 refs = self.refs
664
665 res_values = ensure_block_shape(res_values, self.ndim)
666 res_values = maybe_coerce_values(res_values)
667 return [self.make_block(res_values, refs=refs)]
668
669 def convert_dtypes(
670 self,
671 copy: bool,
672 using_cow: bool,
673 infer_objects: bool = True,
674 convert_string: bool = True,
675 convert_integer: bool = True,
676 convert_boolean: bool = True,
677 convert_floating: bool = True,
678 dtype_backend: DtypeBackend = "numpy_nullable",
679 ) -> list[Block]:
680 if infer_objects and self.is_object:
681 blks = self.convert(copy=False, using_cow=using_cow)
682 else:
683 blks = [self]
684
685 if not any(
686 [convert_floating, convert_integer, convert_boolean, convert_string]
687 ):
688 return [b.copy(deep=copy) for b in blks]
689
690 rbs = []
691 for blk in blks:
692 # Determine dtype column by column
693 sub_blks = [blk] if blk.ndim == 1 or self.shape[0] == 1 else blk._split()
694 dtypes = [
695 convert_dtypes(
696 b.values,
697 convert_string,
698 convert_integer,
699 convert_boolean,
700 convert_floating,
701 infer_objects,
702 dtype_backend,
703 )
704 for b in sub_blks
705 ]
706 if all(dtype == self.dtype for dtype in dtypes):
707 # Avoid block splitting if no dtype changes
708 rbs.append(blk.copy(deep=copy))
709 continue
710
711 for dtype, b in zip(dtypes, sub_blks):
712 rbs.append(b.astype(dtype=dtype, copy=copy, squeeze=b.ndim != 1))
713 return rbs
714
715 # ---------------------------------------------------------------------
716 # Array-Like Methods
717
718 @final
719 @cache_readonly
720 def dtype(self) -> DtypeObj:
721 return self.values.dtype
722
723 @final
724 def astype(
725 self,
726 dtype: DtypeObj,
727 copy: bool = False,
728 errors: IgnoreRaise = "raise",
729 using_cow: bool = False,
730 squeeze: bool = False,
731 ) -> Block:
732 """
733 Coerce to the new dtype.
734
735 Parameters
736 ----------
737 dtype : np.dtype or ExtensionDtype
738 copy : bool, default False
739 copy if indicated
740 errors : str, {'raise', 'ignore'}, default 'raise'
741 - ``raise`` : allow exceptions to be raised
742 - ``ignore`` : suppress exceptions. On error return original object
743 using_cow: bool, default False
744 Signaling if copy on write copy logic is used.
745 squeeze : bool, default False
746 squeeze values to ndim=1 if only one column is given
747
748 Returns
749 -------
750 Block
751 """
752 values = self.values
753 if squeeze and values.ndim == 2 and is_1d_only_ea_dtype(dtype):
754 if values.shape[0] != 1:
755 raise ValueError("Can not squeeze with more than one column.")
756 values = values[0, :] # type: ignore[call-overload]
757
758 new_values = astype_array_safe(values, dtype, copy=copy, errors=errors)
759
760 new_values = maybe_coerce_values(new_values)
761
762 refs = None
763 if (using_cow or not copy) and astype_is_view(values.dtype, new_values.dtype):
764 refs = self.refs
765
766 newb = self.make_block(new_values, refs=refs)
767 if newb.shape != self.shape:
768 raise TypeError(
769 f"cannot set astype for copy = [{copy}] for dtype "
770 f"({self.dtype.name} [{self.shape}]) to different shape "
771 f"({newb.dtype.name} [{newb.shape}])"
772 )
773 return newb
774
775 @final
776 def get_values_for_csv(
777 self, *, float_format, date_format, decimal, na_rep: str = "nan", quoting=None
778 ) -> Block:
779 """convert to our native types format"""
780 result = get_values_for_csv(
781 self.values,
782 na_rep=na_rep,
783 quoting=quoting,
784 float_format=float_format,
785 date_format=date_format,
786 decimal=decimal,
787 )
788 return self.make_block(result)
789
790 @final
791 def copy(self, deep: bool = True) -> Self:
792 """copy constructor"""
793 values = self.values
794 refs: BlockValuesRefs | None
795 if deep:
796 values = values.copy()
797 refs = None
798 else:
799 refs = self.refs
800 return type(self)(values, placement=self._mgr_locs, ndim=self.ndim, refs=refs)
801
802 # ---------------------------------------------------------------------
803 # Copy-on-Write Helpers
804
805 @final
806 def _maybe_copy(self, using_cow: bool, inplace: bool) -> Self:
807 if using_cow and inplace:
808 deep = self.refs.has_reference()
809 blk = self.copy(deep=deep)
810 else:
811 blk = self if inplace else self.copy()
812 return blk
813
814 @final
815 def _get_refs_and_copy(self, using_cow: bool, inplace: bool):
816 refs = None
817 copy = not inplace
818 if inplace:
819 if using_cow and self.refs.has_reference():
820 copy = True
821 else:
822 refs = self.refs
823 return copy, refs
824
825 # ---------------------------------------------------------------------
826 # Replace
827
828 @final
829 def replace(
830 self,
831 to_replace,
832 value,
833 inplace: bool = False,
834 # mask may be pre-computed if we're called from replace_list
835 mask: npt.NDArray[np.bool_] | None = None,
836 using_cow: bool = False,
837 already_warned=None,
838 ) -> list[Block]:
839 """
840 replace the to_replace value with value, possible to create new
841 blocks here this is just a call to putmask.
842 """
843
844 # Note: the checks we do in NDFrame.replace ensure we never get
845 # here with listlike to_replace or value, as those cases
846 # go through replace_list
847 values = self.values
848
849 if isinstance(values, Categorical):
850 # TODO: avoid special-casing
851 # GH49404
852 blk = self._maybe_copy(using_cow, inplace)
853 values = cast(Categorical, blk.values)
854 values._replace(to_replace=to_replace, value=value, inplace=True)
855 return [blk]
856
857 if not self._can_hold_element(to_replace):
858 # We cannot hold `to_replace`, so we know immediately that
859 # replacing it is a no-op.
860 # Note: If to_replace were a list, NDFrame.replace would call
861 # replace_list instead of replace.
862 if using_cow:
863 return [self.copy(deep=False)]
864 else:
865 return [self] if inplace else [self.copy()]
866
867 if mask is None:
868 mask = missing.mask_missing(values, to_replace)
869 if not mask.any():
870 # Note: we get here with test_replace_extension_other incorrectly
871 # bc _can_hold_element is incorrect.
872 if using_cow:
873 return [self.copy(deep=False)]
874 else:
875 return [self] if inplace else [self.copy()]
876
877 elif self._can_hold_element(value):
878 # TODO(CoW): Maybe split here as well into columns where mask has True
879 # and rest?
880 blk = self._maybe_copy(using_cow, inplace)
881 putmask_inplace(blk.values, mask, value)
882 if (
883 inplace
884 and warn_copy_on_write()
885 and already_warned is not None
886 and not already_warned.warned_already
887 ):
888 if self.refs.has_reference():
889 warnings.warn(
890 COW_WARNING_GENERAL_MSG,
891 FutureWarning,
892 stacklevel=find_stack_level(),
893 )
894 already_warned.warned_already = True
895
896 if not (self.is_object and value is None):
897 # if the user *explicitly* gave None, we keep None, otherwise
898 # may downcast to NaN
899 if get_option("future.no_silent_downcasting") is True:
900 blocks = [blk]
901 else:
902 blocks = blk.convert(copy=False, using_cow=using_cow)
903 if len(blocks) > 1 or blocks[0].dtype != blk.dtype:
904 warnings.warn(
905 # GH#54710
906 "Downcasting behavior in `replace` is deprecated and "
907 "will be removed in a future version. To retain the old "
908 "behavior, explicitly call "
909 "`result.infer_objects(copy=False)`. "
910 "To opt-in to the future "
911 "behavior, set "
912 "`pd.set_option('future.no_silent_downcasting', True)`",
913 FutureWarning,
914 stacklevel=find_stack_level(),
915 )
916 else:
917 blocks = [blk]
918 return blocks
919
920 elif self.ndim == 1 or self.shape[0] == 1:
921 if value is None or value is NA:
922 blk = self.astype(np.dtype(object))
923 else:
924 blk = self.coerce_to_target_dtype(value)
925 return blk.replace(
926 to_replace=to_replace,
927 value=value,
928 inplace=True,
929 mask=mask,
930 )
931
932 else:
933 # split so that we only upcast where necessary
934 blocks = []
935 for i, nb in enumerate(self._split()):
936 blocks.extend(
937 type(self).replace(
938 nb,
939 to_replace=to_replace,
940 value=value,
941 inplace=True,
942 mask=mask[i : i + 1],
943 using_cow=using_cow,
944 )
945 )
946 return blocks
947
948 @final
949 def _replace_regex(
950 self,
951 to_replace,
952 value,
953 inplace: bool = False,
954 mask=None,
955 using_cow: bool = False,
956 already_warned=None,
957 ) -> list[Block]:
958 """
959 Replace elements by the given value.
960
961 Parameters
962 ----------
963 to_replace : object or pattern
964 Scalar to replace or regular expression to match.
965 value : object
966 Replacement object.
967 inplace : bool, default False
968 Perform inplace modification.
969 mask : array-like of bool, optional
970 True indicate corresponding element is ignored.
971 using_cow: bool, default False
972 Specifying if copy on write is enabled.
973
974 Returns
975 -------
976 List[Block]
977 """
978 if not self._can_hold_element(to_replace):
979 # i.e. only if self.is_object is True, but could in principle include a
980 # String ExtensionBlock
981 if using_cow:
982 return [self.copy(deep=False)]
983 return [self] if inplace else [self.copy()]
984
985 rx = re.compile(to_replace)
986
987 block = self._maybe_copy(using_cow, inplace)
988
989 replace_regex(block.values, rx, value, mask)
990
991 if (
992 inplace
993 and warn_copy_on_write()
994 and already_warned is not None
995 and not already_warned.warned_already
996 ):
997 if self.refs.has_reference():
998 warnings.warn(
999 COW_WARNING_GENERAL_MSG,
1000 FutureWarning,
1001 stacklevel=find_stack_level(),
1002 )
1003 already_warned.warned_already = True
1004
1005 nbs = block.convert(copy=False, using_cow=using_cow)
1006 opt = get_option("future.no_silent_downcasting")
1007 if (len(nbs) > 1 or nbs[0].dtype != block.dtype) and not opt:
1008 warnings.warn(
1009 # GH#54710
1010 "Downcasting behavior in `replace` is deprecated and "
1011 "will be removed in a future version. To retain the old "
1012 "behavior, explicitly call `result.infer_objects(copy=False)`. "
1013 "To opt-in to the future "
1014 "behavior, set "
1015 "`pd.set_option('future.no_silent_downcasting', True)`",
1016 FutureWarning,
1017 stacklevel=find_stack_level(),
1018 )
1019 return nbs
1020
1021 @final
1022 def replace_list(
1023 self,
1024 src_list: Iterable[Any],
1025 dest_list: Sequence[Any],
1026 inplace: bool = False,
1027 regex: bool = False,
1028 using_cow: bool = False,
1029 already_warned=None,
1030 ) -> list[Block]:
1031 """
1032 See BlockManager.replace_list docstring.
1033 """
1034 values = self.values
1035
1036 if isinstance(values, Categorical):
1037 # TODO: avoid special-casing
1038 # GH49404
1039 blk = self._maybe_copy(using_cow, inplace)
1040 values = cast(Categorical, blk.values)
1041 values._replace(to_replace=src_list, value=dest_list, inplace=True)
1042 return [blk]
1043
1044 # Exclude anything that we know we won't contain
1045 pairs = [
1046 (x, y) for x, y in zip(src_list, dest_list) if self._can_hold_element(x)
1047 ]
1048 if not len(pairs):
1049 if using_cow:
1050 return [self.copy(deep=False)]
1051 # shortcut, nothing to replace
1052 return [self] if inplace else [self.copy()]
1053
1054 src_len = len(pairs) - 1
1055
1056 if is_string_dtype(values.dtype):
1057 # Calculate the mask once, prior to the call of comp
1058 # in order to avoid repeating the same computations
1059 na_mask = ~isna(values)
1060 masks: Iterable[npt.NDArray[np.bool_]] = (
1061 extract_bool_array(
1062 cast(
1063 ArrayLike,
1064 compare_or_regex_search(
1065 values, s[0], regex=regex, mask=na_mask
1066 ),
1067 )
1068 )
1069 for s in pairs
1070 )
1071 else:
1072 # GH#38086 faster if we know we dont need to check for regex
1073 masks = (missing.mask_missing(values, s[0]) for s in pairs)
1074 # Materialize if inplace = True, since the masks can change
1075 # as we replace
1076 if inplace:
1077 masks = list(masks)
1078
1079 if using_cow:
1080 # Don't set up refs here, otherwise we will think that we have
1081 # references when we check again later
1082 rb = [self]
1083 else:
1084 rb = [self if inplace else self.copy()]
1085
1086 if (
1087 inplace
1088 and warn_copy_on_write()
1089 and already_warned is not None
1090 and not already_warned.warned_already
1091 ):
1092 if self.refs.has_reference():
1093 warnings.warn(
1094 COW_WARNING_GENERAL_MSG,
1095 FutureWarning,
1096 stacklevel=find_stack_level(),
1097 )
1098 already_warned.warned_already = True
1099
1100 opt = get_option("future.no_silent_downcasting")
1101 for i, ((src, dest), mask) in enumerate(zip(pairs, masks)):
1102 convert = i == src_len # only convert once at the end
1103 new_rb: list[Block] = []
1104
1105 # GH-39338: _replace_coerce can split a block into
1106 # single-column blocks, so track the index so we know
1107 # where to index into the mask
1108 for blk_num, blk in enumerate(rb):
1109 if len(rb) == 1:
1110 m = mask
1111 else:
1112 mib = mask
1113 assert not isinstance(mib, bool)
1114 m = mib[blk_num : blk_num + 1]
1115
1116 # error: Argument "mask" to "_replace_coerce" of "Block" has
1117 # incompatible type "Union[ExtensionArray, ndarray[Any, Any], bool]";
1118 # expected "ndarray[Any, dtype[bool_]]"
1119 result = blk._replace_coerce(
1120 to_replace=src,
1121 value=dest,
1122 mask=m,
1123 inplace=inplace,
1124 regex=regex,
1125 using_cow=using_cow,
1126 )
1127
1128 if using_cow and i != src_len:
1129 # This is ugly, but we have to get rid of intermediate refs
1130 # that did not go out of scope yet, otherwise we will trigger
1131 # many unnecessary copies
1132 for b in result:
1133 ref = weakref.ref(b)
1134 b.refs.referenced_blocks.pop(
1135 b.refs.referenced_blocks.index(ref)
1136 )
1137
1138 if (
1139 not opt
1140 and convert
1141 and blk.is_object
1142 and not all(x is None for x in dest_list)
1143 ):
1144 # GH#44498 avoid unwanted cast-back
1145 nbs = []
1146 for res_blk in result:
1147 converted = res_blk.convert(
1148 copy=True and not using_cow, using_cow=using_cow
1149 )
1150 if len(converted) > 1 or converted[0].dtype != res_blk.dtype:
1151 warnings.warn(
1152 # GH#54710
1153 "Downcasting behavior in `replace` is deprecated "
1154 "and will be removed in a future version. To "
1155 "retain the old behavior, explicitly call "
1156 "`result.infer_objects(copy=False)`. "
1157 "To opt-in to the future "
1158 "behavior, set "
1159 "`pd.set_option('future.no_silent_downcasting', True)`",
1160 FutureWarning,
1161 stacklevel=find_stack_level(),
1162 )
1163 nbs.extend(converted)
1164 result = nbs
1165 new_rb.extend(result)
1166 rb = new_rb
1167 return rb
1168
1169 @final
1170 def _replace_coerce(
1171 self,
1172 to_replace,
1173 value,
1174 mask: npt.NDArray[np.bool_],
1175 inplace: bool = True,
1176 regex: bool = False,
1177 using_cow: bool = False,
1178 ) -> list[Block]:
1179 """
1180 Replace value corresponding to the given boolean array with another
1181 value.
1182
1183 Parameters
1184 ----------
1185 to_replace : object or pattern
1186 Scalar to replace or regular expression to match.
1187 value : object
1188 Replacement object.
1189 mask : np.ndarray[bool]
1190 True indicate corresponding element is ignored.
1191 inplace : bool, default True
1192 Perform inplace modification.
1193 regex : bool, default False
1194 If true, perform regular expression substitution.
1195
1196 Returns
1197 -------
1198 List[Block]
1199 """
1200 if should_use_regex(regex, to_replace):
1201 return self._replace_regex(
1202 to_replace,
1203 value,
1204 inplace=inplace,
1205 mask=mask,
1206 )
1207 else:
1208 if value is None:
1209 # gh-45601, gh-45836, gh-46634
1210 if mask.any():
1211 has_ref = self.refs.has_reference()
1212 nb = self.astype(np.dtype(object), copy=False, using_cow=using_cow)
1213 if (nb is self or using_cow) and not inplace:
1214 nb = nb.copy()
1215 elif inplace and has_ref and nb.refs.has_reference() and using_cow:
1216 # no copy in astype and we had refs before
1217 nb = nb.copy()
1218 putmask_inplace(nb.values, mask, value)
1219 return [nb]
1220 if using_cow:
1221 return [self]
1222 return [self] if inplace else [self.copy()]
1223 return self.replace(
1224 to_replace=to_replace,
1225 value=value,
1226 inplace=inplace,
1227 mask=mask,
1228 using_cow=using_cow,
1229 )
1230
1231 # ---------------------------------------------------------------------
1232 # 2D Methods - Shared by NumpyBlock and NDArrayBackedExtensionBlock
1233 # but not ExtensionBlock
1234
1235 def _maybe_squeeze_arg(self, arg: np.ndarray) -> np.ndarray:
1236 """
1237 For compatibility with 1D-only ExtensionArrays.
1238 """
1239 return arg
1240
1241 def _unwrap_setitem_indexer(self, indexer):
1242 """
1243 For compatibility with 1D-only ExtensionArrays.
1244 """
1245 return indexer
1246
1247 # NB: this cannot be made cache_readonly because in mgr.set_values we pin
1248 # new .values that can have different shape GH#42631
1249 @property
1250 def shape(self) -> Shape:
1251 return self.values.shape
1252
1253 def iget(self, i: int | tuple[int, int] | tuple[slice, int]) -> np.ndarray:
1254 # In the case where we have a tuple[slice, int], the slice will always
1255 # be slice(None)
1256 # Note: only reached with self.ndim == 2
1257 # Invalid index type "Union[int, Tuple[int, int], Tuple[slice, int]]"
1258 # for "Union[ndarray[Any, Any], ExtensionArray]"; expected type
1259 # "Union[int, integer[Any]]"
1260 return self.values[i] # type: ignore[index]
1261
1262 def _slice(
1263 self, slicer: slice | npt.NDArray[np.bool_] | npt.NDArray[np.intp]
1264 ) -> ArrayLike:
1265 """return a slice of my values"""
1266
1267 return self.values[slicer]
1268
1269 def set_inplace(self, locs, values: ArrayLike, copy: bool = False) -> None:
1270 """
1271 Modify block values in-place with new item value.
1272
1273 If copy=True, first copy the underlying values in place before modifying
1274 (for Copy-on-Write).
1275
1276 Notes
1277 -----
1278 `set_inplace` never creates a new array or new Block, whereas `setitem`
1279 _may_ create a new array and always creates a new Block.
1280
1281 Caller is responsible for checking values.dtype == self.dtype.
1282 """
1283 if copy:
1284 self.values = self.values.copy()
1285 self.values[locs] = values
1286
1287 @final
1288 def take_nd(
1289 self,
1290 indexer: npt.NDArray[np.intp],
1291 axis: AxisInt,
1292 new_mgr_locs: BlockPlacement | None = None,
1293 fill_value=lib.no_default,
1294 ) -> Block:
1295 """
1296 Take values according to indexer and return them as a block.
1297 """
1298 values = self.values
1299
1300 if fill_value is lib.no_default:
1301 fill_value = self.fill_value
1302 allow_fill = False
1303 else:
1304 allow_fill = True
1305
1306 # Note: algos.take_nd has upcast logic similar to coerce_to_target_dtype
1307 new_values = algos.take_nd(
1308 values, indexer, axis=axis, allow_fill=allow_fill, fill_value=fill_value
1309 )
1310
1311 # Called from three places in managers, all of which satisfy
1312 # these assertions
1313 if isinstance(self, ExtensionBlock):
1314 # NB: in this case, the 'axis' kwarg will be ignored in the
1315 # algos.take_nd call above.
1316 assert not (self.ndim == 1 and new_mgr_locs is None)
1317 assert not (axis == 0 and new_mgr_locs is None)
1318
1319 if new_mgr_locs is None:
1320 new_mgr_locs = self._mgr_locs
1321
1322 if new_values.dtype != self.dtype:
1323 return self.make_block(new_values, new_mgr_locs)
1324 else:
1325 return self.make_block_same_class(new_values, new_mgr_locs)
1326
1327 def _unstack(
1328 self,
1329 unstacker,
1330 fill_value,
1331 new_placement: npt.NDArray[np.intp],
1332 needs_masking: npt.NDArray[np.bool_],
1333 ):
1334 """
1335 Return a list of unstacked blocks of self
1336
1337 Parameters
1338 ----------
1339 unstacker : reshape._Unstacker
1340 fill_value : int
1341 Only used in ExtensionBlock._unstack
1342 new_placement : np.ndarray[np.intp]
1343 allow_fill : bool
1344 needs_masking : np.ndarray[bool]
1345
1346 Returns
1347 -------
1348 blocks : list of Block
1349 New blocks of unstacked values.
1350 mask : array-like of bool
1351 The mask of columns of `blocks` we should keep.
1352 """
1353 new_values, mask = unstacker.get_new_values(
1354 self.values.T, fill_value=fill_value
1355 )
1356
1357 mask = mask.any(0)
1358 # TODO: in all tests we have mask.all(); can we rely on that?
1359
1360 # Note: these next two lines ensure that
1361 # mask.sum() == sum(len(nb.mgr_locs) for nb in blocks)
1362 # which the calling function needs in order to pass verify_integrity=False
1363 # to the BlockManager constructor
1364 new_values = new_values.T[mask]
1365 new_placement = new_placement[mask]
1366
1367 bp = BlockPlacement(new_placement)
1368 blocks = [new_block_2d(new_values, placement=bp)]
1369 return blocks, mask
1370
1371 # ---------------------------------------------------------------------
1372
1373 def setitem(self, indexer, value, using_cow: bool = False) -> Block:
1374 """
1375 Attempt self.values[indexer] = value, possibly creating a new array.
1376
1377 Parameters
1378 ----------
1379 indexer : tuple, list-like, array-like, slice, int
1380 The subset of self.values to set
1381 value : object
1382 The value being set
1383 using_cow: bool, default False
1384 Signaling if CoW is used.
1385
1386 Returns
1387 -------
1388 Block
1389
1390 Notes
1391 -----
1392 `indexer` is a direct slice/positional indexer. `value` must
1393 be a compatible shape.
1394 """
1395
1396 value = self._standardize_fill_value(value)
1397
1398 values = cast(np.ndarray, self.values)
1399 if self.ndim == 2:
1400 values = values.T
1401
1402 # length checking
1403 check_setitem_lengths(indexer, value, values)
1404
1405 if self.dtype != _dtype_obj:
1406 # GH48933: extract_array would convert a pd.Series value to np.ndarray
1407 value = extract_array(value, extract_numpy=True)
1408 try:
1409 casted = np_can_hold_element(values.dtype, value)
1410 except LossySetitemError:
1411 # current dtype cannot store value, coerce to common dtype
1412 nb = self.coerce_to_target_dtype(value, warn_on_upcast=True)
1413 return nb.setitem(indexer, value)
1414 else:
1415 if self.dtype == _dtype_obj:
1416 # TODO: avoid having to construct values[indexer]
1417 vi = values[indexer]
1418 if lib.is_list_like(vi):
1419 # checking lib.is_scalar here fails on
1420 # test_iloc_setitem_custom_object
1421 casted = setitem_datetimelike_compat(values, len(vi), casted)
1422
1423 self = self._maybe_copy(using_cow, inplace=True)
1424 values = cast(np.ndarray, self.values.T)
1425 if isinstance(casted, np.ndarray) and casted.ndim == 1 and len(casted) == 1:
1426 # NumPy 1.25 deprecation: https://github.com/numpy/numpy/pull/10615
1427 casted = casted[0, ...]
1428 try:
1429 values[indexer] = casted
1430 except (TypeError, ValueError) as err:
1431 if is_list_like(casted):
1432 raise ValueError(
1433 "setting an array element with a sequence."
1434 ) from err
1435 raise
1436 return self
1437
1438 def putmask(
1439 self, mask, new, using_cow: bool = False, already_warned=None
1440 ) -> list[Block]:
1441 """
1442 putmask the data to the block; it is possible that we may create a
1443 new dtype of block
1444
1445 Return the resulting block(s).
1446
1447 Parameters
1448 ----------
1449 mask : np.ndarray[bool], SparseArray[bool], or BooleanArray
1450 new : a ndarray/object
1451 using_cow: bool, default False
1452
1453 Returns
1454 -------
1455 List[Block]
1456 """
1457 orig_mask = mask
1458 values = cast(np.ndarray, self.values)
1459 mask, noop = validate_putmask(values.T, mask)
1460 assert not isinstance(new, (ABCIndex, ABCSeries, ABCDataFrame))
1461
1462 if new is lib.no_default:
1463 new = self.fill_value
1464
1465 new = self._standardize_fill_value(new)
1466 new = extract_array(new, extract_numpy=True)
1467
1468 if noop:
1469 if using_cow:
1470 return [self.copy(deep=False)]
1471 return [self]
1472
1473 if (
1474 warn_copy_on_write()
1475 and already_warned is not None
1476 and not already_warned.warned_already
1477 ):
1478 if self.refs.has_reference():
1479 warnings.warn(
1480 COW_WARNING_GENERAL_MSG,
1481 FutureWarning,
1482 stacklevel=find_stack_level(),
1483 )
1484 already_warned.warned_already = True
1485
1486 try:
1487 casted = np_can_hold_element(values.dtype, new)
1488
1489 self = self._maybe_copy(using_cow, inplace=True)
1490 values = cast(np.ndarray, self.values)
1491
1492 putmask_without_repeat(values.T, mask, casted)
1493 return [self]
1494 except LossySetitemError:
1495 if self.ndim == 1 or self.shape[0] == 1:
1496 # no need to split columns
1497
1498 if not is_list_like(new):
1499 # using just new[indexer] can't save us the need to cast
1500 return self.coerce_to_target_dtype(
1501 new, warn_on_upcast=True
1502 ).putmask(mask, new)
1503 else:
1504 indexer = mask.nonzero()[0]
1505 nb = self.setitem(indexer, new[indexer], using_cow=using_cow)
1506 return [nb]
1507
1508 else:
1509 is_array = isinstance(new, np.ndarray)
1510
1511 res_blocks = []
1512 nbs = self._split()
1513 for i, nb in enumerate(nbs):
1514 n = new
1515 if is_array:
1516 # we have a different value per-column
1517 n = new[:, i : i + 1]
1518
1519 submask = orig_mask[:, i : i + 1]
1520 rbs = nb.putmask(submask, n, using_cow=using_cow)
1521 res_blocks.extend(rbs)
1522 return res_blocks
1523
1524 def where(
1525 self, other, cond, _downcast: str | bool = "infer", using_cow: bool = False
1526 ) -> list[Block]:
1527 """
1528 evaluate the block; return result block(s) from the result
1529
1530 Parameters
1531 ----------
1532 other : a ndarray/object
1533 cond : np.ndarray[bool], SparseArray[bool], or BooleanArray
1534 _downcast : str or None, default "infer"
1535 Private because we only specify it when calling from fillna.
1536
1537 Returns
1538 -------
1539 List[Block]
1540 """
1541 assert cond.ndim == self.ndim
1542 assert not isinstance(other, (ABCIndex, ABCSeries, ABCDataFrame))
1543
1544 transpose = self.ndim == 2
1545
1546 cond = extract_bool_array(cond)
1547
1548 # EABlocks override where
1549 values = cast(np.ndarray, self.values)
1550 orig_other = other
1551 if transpose:
1552 values = values.T
1553
1554 icond, noop = validate_putmask(values, ~cond)
1555 if noop:
1556 # GH-39595: Always return a copy; short-circuit up/downcasting
1557 if using_cow:
1558 return [self.copy(deep=False)]
1559 return [self.copy()]
1560
1561 if other is lib.no_default:
1562 other = self.fill_value
1563
1564 other = self._standardize_fill_value(other)
1565
1566 try:
1567 # try/except here is equivalent to a self._can_hold_element check,
1568 # but this gets us back 'casted' which we will reuse below;
1569 # without using 'casted', expressions.where may do unwanted upcasts.
1570 casted = np_can_hold_element(values.dtype, other)
1571 except (ValueError, TypeError, LossySetitemError):
1572 # we cannot coerce, return a compat dtype
1573
1574 if self.ndim == 1 or self.shape[0] == 1:
1575 # no need to split columns
1576
1577 block = self.coerce_to_target_dtype(other)
1578 blocks = block.where(orig_other, cond, using_cow=using_cow)
1579 return self._maybe_downcast(
1580 blocks, downcast=_downcast, using_cow=using_cow, caller="where"
1581 )
1582
1583 else:
1584 # since _maybe_downcast would split blocks anyway, we
1585 # can avoid some potential upcast/downcast by splitting
1586 # on the front end.
1587 is_array = isinstance(other, (np.ndarray, ExtensionArray))
1588
1589 res_blocks = []
1590 nbs = self._split()
1591 for i, nb in enumerate(nbs):
1592 oth = other
1593 if is_array:
1594 # we have a different value per-column
1595 oth = other[:, i : i + 1]
1596
1597 submask = cond[:, i : i + 1]
1598 rbs = nb.where(
1599 oth, submask, _downcast=_downcast, using_cow=using_cow
1600 )
1601 res_blocks.extend(rbs)
1602 return res_blocks
1603
1604 else:
1605 other = casted
1606 alt = setitem_datetimelike_compat(values, icond.sum(), other)
1607 if alt is not other:
1608 if is_list_like(other) and len(other) < len(values):
1609 # call np.where with other to get the appropriate ValueError
1610 np.where(~icond, values, other)
1611 raise NotImplementedError(
1612 "This should not be reached; call to np.where above is "
1613 "expected to raise ValueError. Please report a bug at "
1614 "github.com/pandas-dev/pandas"
1615 )
1616 result = values.copy()
1617 np.putmask(result, icond, alt)
1618 else:
1619 # By the time we get here, we should have all Series/Index
1620 # args extracted to ndarray
1621 if (
1622 is_list_like(other)
1623 and not isinstance(other, np.ndarray)
1624 and len(other) == self.shape[-1]
1625 ):
1626 # If we don't do this broadcasting here, then expressions.where
1627 # will broadcast a 1D other to be row-like instead of
1628 # column-like.
1629 other = np.array(other).reshape(values.shape)
1630 # If lengths don't match (or len(other)==1), we will raise
1631 # inside expressions.where, see test_series_where
1632
1633 # Note: expressions.where may upcast.
1634 result = expressions.where(~icond, values, other)
1635 # The np_can_hold_element check _should_ ensure that we always
1636 # have result.dtype == self.dtype here.
1637
1638 if transpose:
1639 result = result.T
1640
1641 return [self.make_block(result)]
1642
1643 def fillna(
1644 self,
1645 value,
1646 limit: int | None = None,
1647 inplace: bool = False,
1648 downcast=None,
1649 using_cow: bool = False,
1650 already_warned=None,
1651 ) -> list[Block]:
1652 """
1653 fillna on the block with the value. If we fail, then convert to
1654 block to hold objects instead and try again
1655 """
1656 # Caller is responsible for validating limit; if int it is strictly positive
1657 inplace = validate_bool_kwarg(inplace, "inplace")
1658
1659 if not self._can_hold_na:
1660 # can short-circuit the isna call
1661 noop = True
1662 else:
1663 mask = isna(self.values)
1664 mask, noop = validate_putmask(self.values, mask)
1665
1666 if noop:
1667 # we can't process the value, but nothing to do
1668 if inplace:
1669 if using_cow:
1670 return [self.copy(deep=False)]
1671 # Arbitrarily imposing the convention that we ignore downcast
1672 # on no-op when inplace=True
1673 return [self]
1674 else:
1675 # GH#45423 consistent downcasting on no-ops.
1676 nb = self.copy(deep=not using_cow)
1677 nbs = nb._maybe_downcast(
1678 [nb], downcast=downcast, using_cow=using_cow, caller="fillna"
1679 )
1680 return nbs
1681
1682 if limit is not None:
1683 mask[mask.cumsum(self.ndim - 1) > limit] = False
1684
1685 if inplace:
1686 nbs = self.putmask(
1687 mask.T, value, using_cow=using_cow, already_warned=already_warned
1688 )
1689 else:
1690 # without _downcast, we would break
1691 # test_fillna_dtype_conversion_equiv_replace
1692 nbs = self.where(value, ~mask.T, _downcast=False)
1693
1694 # Note: blk._maybe_downcast vs self._maybe_downcast(nbs)
1695 # makes a difference bc blk may have object dtype, which has
1696 # different behavior in _maybe_downcast.
1697 return extend_blocks(
1698 [
1699 blk._maybe_downcast(
1700 [blk], downcast=downcast, using_cow=using_cow, caller="fillna"
1701 )
1702 for blk in nbs
1703 ]
1704 )
1705
1706 def pad_or_backfill(
1707 self,
1708 *,
1709 method: FillnaOptions,
1710 axis: AxisInt = 0,
1711 inplace: bool = False,
1712 limit: int | None = None,
1713 limit_area: Literal["inside", "outside"] | None = None,
1714 downcast: Literal["infer"] | None = None,
1715 using_cow: bool = False,
1716 already_warned=None,
1717 ) -> list[Block]:
1718 if not self._can_hold_na:
1719 # If there are no NAs, then interpolate is a no-op
1720 if using_cow:
1721 return [self.copy(deep=False)]
1722 return [self] if inplace else [self.copy()]
1723
1724 copy, refs = self._get_refs_and_copy(using_cow, inplace)
1725
1726 # Dispatch to the NumpyExtensionArray method.
1727 # We know self.array_values is a NumpyExtensionArray bc EABlock overrides
1728 vals = cast(NumpyExtensionArray, self.array_values)
1729 if axis == 1:
1730 vals = vals.T
1731 new_values = vals._pad_or_backfill(
1732 method=method,
1733 limit=limit,
1734 limit_area=limit_area,
1735 copy=copy,
1736 )
1737 if (
1738 not copy
1739 and warn_copy_on_write()
1740 and already_warned is not None
1741 and not already_warned.warned_already
1742 ):
1743 if self.refs.has_reference():
1744 warnings.warn(
1745 COW_WARNING_GENERAL_MSG,
1746 FutureWarning,
1747 stacklevel=find_stack_level(),
1748 )
1749 already_warned.warned_already = True
1750 if axis == 1:
1751 new_values = new_values.T
1752
1753 data = extract_array(new_values, extract_numpy=True)
1754
1755 nb = self.make_block_same_class(data, refs=refs)
1756 return nb._maybe_downcast([nb], downcast, using_cow, caller="fillna")
1757
1758 @final
1759 def interpolate(
1760 self,
1761 *,
1762 method: InterpolateOptions,
1763 index: Index,
1764 inplace: bool = False,
1765 limit: int | None = None,
1766 limit_direction: Literal["forward", "backward", "both"] = "forward",
1767 limit_area: Literal["inside", "outside"] | None = None,
1768 downcast: Literal["infer"] | None = None,
1769 using_cow: bool = False,
1770 already_warned=None,
1771 **kwargs,
1772 ) -> list[Block]:
1773 inplace = validate_bool_kwarg(inplace, "inplace")
1774 # error: Non-overlapping equality check [...]
1775 if method == "asfreq": # type: ignore[comparison-overlap]
1776 # clean_fill_method used to allow this
1777 missing.clean_fill_method(method)
1778
1779 if not self._can_hold_na:
1780 # If there are no NAs, then interpolate is a no-op
1781 if using_cow:
1782 return [self.copy(deep=False)]
1783 return [self] if inplace else [self.copy()]
1784
1785 # TODO(3.0): this case will not be reachable once GH#53638 is enforced
1786 if self.dtype == _dtype_obj:
1787 # only deal with floats
1788 # bc we already checked that can_hold_na, we don't have int dtype here
1789 # test_interp_basic checks that we make a copy here
1790 if using_cow:
1791 return [self.copy(deep=False)]
1792 return [self] if inplace else [self.copy()]
1793
1794 copy, refs = self._get_refs_and_copy(using_cow, inplace)
1795
1796 # Dispatch to the EA method.
1797 new_values = self.array_values.interpolate(
1798 method=method,
1799 axis=self.ndim - 1,
1800 index=index,
1801 limit=limit,
1802 limit_direction=limit_direction,
1803 limit_area=limit_area,
1804 copy=copy,
1805 **kwargs,
1806 )
1807 data = extract_array(new_values, extract_numpy=True)
1808
1809 if (
1810 not copy
1811 and warn_copy_on_write()
1812 and already_warned is not None
1813 and not already_warned.warned_already
1814 ):
1815 if self.refs.has_reference():
1816 warnings.warn(
1817 COW_WARNING_GENERAL_MSG,
1818 FutureWarning,
1819 stacklevel=find_stack_level(),
1820 )
1821 already_warned.warned_already = True
1822
1823 nb = self.make_block_same_class(data, refs=refs)
1824 return nb._maybe_downcast([nb], downcast, using_cow, caller="interpolate")
1825
1826 @final
1827 def diff(self, n: int) -> list[Block]:
1828 """return block for the diff of the values"""
1829 # only reached with ndim == 2
1830 # TODO(EA2D): transpose will be unnecessary with 2D EAs
1831 new_values = algos.diff(self.values.T, n, axis=0).T
1832 return [self.make_block(values=new_values)]
1833
1834 def shift(self, periods: int, fill_value: Any = None) -> list[Block]:
1835 """shift the block by periods, possibly upcast"""
1836 # convert integer to float if necessary. need to do a lot more than
1837 # that, handle boolean etc also
1838 axis = self.ndim - 1
1839
1840 # Note: periods is never 0 here, as that is handled at the top of
1841 # NDFrame.shift. If that ever changes, we can do a check for periods=0
1842 # and possibly avoid coercing.
1843
1844 if not lib.is_scalar(fill_value) and self.dtype != _dtype_obj:
1845 # with object dtype there is nothing to promote, and the user can
1846 # pass pretty much any weird fill_value they like
1847 # see test_shift_object_non_scalar_fill
1848 raise ValueError("fill_value must be a scalar")
1849
1850 fill_value = self._standardize_fill_value(fill_value)
1851
1852 try:
1853 # error: Argument 1 to "np_can_hold_element" has incompatible type
1854 # "Union[dtype[Any], ExtensionDtype]"; expected "dtype[Any]"
1855 casted = np_can_hold_element(
1856 self.dtype, fill_value # type: ignore[arg-type]
1857 )
1858 except LossySetitemError:
1859 nb = self.coerce_to_target_dtype(fill_value)
1860 return nb.shift(periods, fill_value=fill_value)
1861
1862 else:
1863 values = cast(np.ndarray, self.values)
1864 new_values = shift(values, periods, axis, casted)
1865 return [self.make_block_same_class(new_values)]
1866
1867 @final
1868 def quantile(
1869 self,
1870 qs: Index, # with dtype float64
1871 interpolation: QuantileInterpolation = "linear",
1872 ) -> Block:
1873 """
1874 compute the quantiles of the
1875
1876 Parameters
1877 ----------
1878 qs : Index
1879 The quantiles to be computed in float64.
1880 interpolation : str, default 'linear'
1881 Type of interpolation.
1882
1883 Returns
1884 -------
1885 Block
1886 """
1887 # We should always have ndim == 2 because Series dispatches to DataFrame
1888 assert self.ndim == 2
1889 assert is_list_like(qs) # caller is responsible for this
1890
1891 result = quantile_compat(self.values, np.asarray(qs._values), interpolation)
1892 # ensure_block_shape needed for cases where we start with EA and result
1893 # is ndarray, e.g. IntegerArray, SparseArray
1894 result = ensure_block_shape(result, ndim=2)
1895 return new_block_2d(result, placement=self._mgr_locs)
1896
1897 @final
1898 def round(self, decimals: int, using_cow: bool = False) -> Self:
1899 """
1900 Rounds the values.
1901 If the block is not of an integer or float dtype, nothing happens.
1902 This is consistent with DataFrame.round behavivor.
1903 (Note: Series.round would raise)
1904
1905 Parameters
1906 ----------
1907 decimals: int,
1908 Number of decimal places to round to.
1909 Caller is responsible for validating this
1910 using_cow: bool,
1911 Whether Copy on Write is enabled right now
1912 """
1913 if not self.is_numeric or self.is_bool:
1914 return self.copy(deep=not using_cow)
1915 refs = None
1916 # TODO: round only defined on BaseMaskedArray
1917 # Series also does this, so would need to fix both places
1918 # error: Item "ExtensionArray" of "Union[ndarray[Any, Any], ExtensionArray]"
1919 # has no attribute "round"
1920 values = self.values.round(decimals) # type: ignore[union-attr]
1921 if values is self.values:
1922 if not using_cow:
1923 # Normally would need to do this before, but
1924 # numpy only returns same array when round operation
1925 # is no-op
1926 # https://github.com/numpy/numpy/blob/486878b37fc7439a3b2b87747f50db9b62fea8eb/numpy/core/src/multiarray/calculation.c#L625-L636
1927 values = values.copy()
1928 else:
1929 refs = self.refs
1930 return self.make_block_same_class(values, refs=refs)
1931
1932 # ---------------------------------------------------------------------
1933 # Abstract Methods Overridden By EABackedBlock and NumpyBlock
1934
1935 def delete(self, loc) -> list[Block]:
1936 """Deletes the locs from the block.
1937
1938 We split the block to avoid copying the underlying data. We create new
1939 blocks for every connected segment of the initial block that is not deleted.
1940 The new blocks point to the initial array.
1941 """
1942 if not is_list_like(loc):
1943 loc = [loc]
1944
1945 if self.ndim == 1:
1946 values = cast(np.ndarray, self.values)
1947 values = np.delete(values, loc)
1948 mgr_locs = self._mgr_locs.delete(loc)
1949 return [type(self)(values, placement=mgr_locs, ndim=self.ndim)]
1950
1951 if np.max(loc) >= self.values.shape[0]:
1952 raise IndexError
1953
1954 # Add one out-of-bounds indexer as maximum to collect
1955 # all columns after our last indexer if any
1956 loc = np.concatenate([loc, [self.values.shape[0]]])
1957 mgr_locs_arr = self._mgr_locs.as_array
1958 new_blocks: list[Block] = []
1959
1960 previous_loc = -1
1961 # TODO(CoW): This is tricky, if parent block goes out of scope
1962 # all split blocks are referencing each other even though they
1963 # don't share data
1964 refs = self.refs if self.refs.has_reference() else None
1965 for idx in loc:
1966 if idx == previous_loc + 1:
1967 # There is no column between current and last idx
1968 pass
1969 else:
1970 # No overload variant of "__getitem__" of "ExtensionArray" matches
1971 # argument type "Tuple[slice, slice]"
1972 values = self.values[previous_loc + 1 : idx, :] # type: ignore[call-overload]
1973 locs = mgr_locs_arr[previous_loc + 1 : idx]
1974 nb = type(self)(
1975 values, placement=BlockPlacement(locs), ndim=self.ndim, refs=refs
1976 )
1977 new_blocks.append(nb)
1978
1979 previous_loc = idx
1980
1981 return new_blocks
1982
1983 @property
1984 def is_view(self) -> bool:
1985 """return a boolean if I am possibly a view"""
1986 raise AbstractMethodError(self)
1987
1988 @property
1989 def array_values(self) -> ExtensionArray:
1990 """
1991 The array that Series.array returns. Always an ExtensionArray.
1992 """
1993 raise AbstractMethodError(self)
1994
1995 def get_values(self, dtype: DtypeObj | None = None) -> np.ndarray:
1996 """
1997 return an internal format, currently just the ndarray
1998 this is often overridden to handle to_dense like operations
1999 """
2000 raise AbstractMethodError(self)
2001
2002
2003class EABackedBlock(Block):
2004 """
2005 Mixin for Block subclasses backed by ExtensionArray.
2006 """
2007
2008 values: ExtensionArray
2009
2010 @final
2011 def shift(self, periods: int, fill_value: Any = None) -> list[Block]:
2012 """
2013 Shift the block by `periods`.
2014
2015 Dispatches to underlying ExtensionArray and re-boxes in an
2016 ExtensionBlock.
2017 """
2018 # Transpose since EA.shift is always along axis=0, while we want to shift
2019 # along rows.
2020 new_values = self.values.T.shift(periods=periods, fill_value=fill_value).T
2021 return [self.make_block_same_class(new_values)]
2022
2023 @final
2024 def setitem(self, indexer, value, using_cow: bool = False):
2025 """
2026 Attempt self.values[indexer] = value, possibly creating a new array.
2027
2028 This differs from Block.setitem by not allowing setitem to change
2029 the dtype of the Block.
2030
2031 Parameters
2032 ----------
2033 indexer : tuple, list-like, array-like, slice, int
2034 The subset of self.values to set
2035 value : object
2036 The value being set
2037 using_cow: bool, default False
2038 Signaling if CoW is used.
2039
2040 Returns
2041 -------
2042 Block
2043
2044 Notes
2045 -----
2046 `indexer` is a direct slice/positional indexer. `value` must
2047 be a compatible shape.
2048 """
2049 orig_indexer = indexer
2050 orig_value = value
2051
2052 indexer = self._unwrap_setitem_indexer(indexer)
2053 value = self._maybe_squeeze_arg(value)
2054
2055 values = self.values
2056 if values.ndim == 2:
2057 # TODO(GH#45419): string[pyarrow] tests break if we transpose
2058 # unconditionally
2059 values = values.T
2060 check_setitem_lengths(indexer, value, values)
2061
2062 try:
2063 values[indexer] = value
2064 except (ValueError, TypeError):
2065 if isinstance(self.dtype, IntervalDtype):
2066 # see TestSetitemFloatIntervalWithIntIntervalValues
2067 nb = self.coerce_to_target_dtype(orig_value, warn_on_upcast=True)
2068 return nb.setitem(orig_indexer, orig_value)
2069
2070 elif isinstance(self, NDArrayBackedExtensionBlock):
2071 nb = self.coerce_to_target_dtype(orig_value, warn_on_upcast=True)
2072 return nb.setitem(orig_indexer, orig_value)
2073
2074 else:
2075 raise
2076
2077 else:
2078 return self
2079
2080 @final
2081 def where(
2082 self, other, cond, _downcast: str | bool = "infer", using_cow: bool = False
2083 ) -> list[Block]:
2084 # _downcast private bc we only specify it when calling from fillna
2085 arr = self.values.T
2086
2087 cond = extract_bool_array(cond)
2088
2089 orig_other = other
2090 orig_cond = cond
2091 other = self._maybe_squeeze_arg(other)
2092 cond = self._maybe_squeeze_arg(cond)
2093
2094 if other is lib.no_default:
2095 other = self.fill_value
2096
2097 icond, noop = validate_putmask(arr, ~cond)
2098 if noop:
2099 # GH#44181, GH#45135
2100 # Avoid a) raising for Interval/PeriodDtype and b) unnecessary object upcast
2101 if using_cow:
2102 return [self.copy(deep=False)]
2103 return [self.copy()]
2104
2105 try:
2106 res_values = arr._where(cond, other).T
2107 except (ValueError, TypeError):
2108 if self.ndim == 1 or self.shape[0] == 1:
2109 if isinstance(self.dtype, IntervalDtype):
2110 # TestSetitemFloatIntervalWithIntIntervalValues
2111 blk = self.coerce_to_target_dtype(orig_other)
2112 nbs = blk.where(orig_other, orig_cond, using_cow=using_cow)
2113 return self._maybe_downcast(
2114 nbs, downcast=_downcast, using_cow=using_cow, caller="where"
2115 )
2116
2117 elif isinstance(self, NDArrayBackedExtensionBlock):
2118 # NB: not (yet) the same as
2119 # isinstance(values, NDArrayBackedExtensionArray)
2120 blk = self.coerce_to_target_dtype(orig_other)
2121 nbs = blk.where(orig_other, orig_cond, using_cow=using_cow)
2122 return self._maybe_downcast(
2123 nbs, downcast=_downcast, using_cow=using_cow, caller="where"
2124 )
2125
2126 else:
2127 raise
2128
2129 else:
2130 # Same pattern we use in Block.putmask
2131 is_array = isinstance(orig_other, (np.ndarray, ExtensionArray))
2132
2133 res_blocks = []
2134 nbs = self._split()
2135 for i, nb in enumerate(nbs):
2136 n = orig_other
2137 if is_array:
2138 # we have a different value per-column
2139 n = orig_other[:, i : i + 1]
2140
2141 submask = orig_cond[:, i : i + 1]
2142 rbs = nb.where(n, submask, using_cow=using_cow)
2143 res_blocks.extend(rbs)
2144 return res_blocks
2145
2146 nb = self.make_block_same_class(res_values)
2147 return [nb]
2148
2149 @final
2150 def putmask(
2151 self, mask, new, using_cow: bool = False, already_warned=None
2152 ) -> list[Block]:
2153 """
2154 See Block.putmask.__doc__
2155 """
2156 mask = extract_bool_array(mask)
2157 if new is lib.no_default:
2158 new = self.fill_value
2159
2160 orig_new = new
2161 orig_mask = mask
2162 new = self._maybe_squeeze_arg(new)
2163 mask = self._maybe_squeeze_arg(mask)
2164
2165 if not mask.any():
2166 if using_cow:
2167 return [self.copy(deep=False)]
2168 return [self]
2169
2170 if (
2171 warn_copy_on_write()
2172 and already_warned is not None
2173 and not already_warned.warned_already
2174 ):
2175 if self.refs.has_reference():
2176 warnings.warn(
2177 COW_WARNING_GENERAL_MSG,
2178 FutureWarning,
2179 stacklevel=find_stack_level(),
2180 )
2181 already_warned.warned_already = True
2182
2183 self = self._maybe_copy(using_cow, inplace=True)
2184 values = self.values
2185 if values.ndim == 2:
2186 values = values.T
2187
2188 try:
2189 # Caller is responsible for ensuring matching lengths
2190 values._putmask(mask, new)
2191 except (TypeError, ValueError):
2192 if self.ndim == 1 or self.shape[0] == 1:
2193 if isinstance(self.dtype, IntervalDtype):
2194 # Discussion about what we want to support in the general
2195 # case GH#39584
2196 blk = self.coerce_to_target_dtype(orig_new, warn_on_upcast=True)
2197 return blk.putmask(orig_mask, orig_new)
2198
2199 elif isinstance(self, NDArrayBackedExtensionBlock):
2200 # NB: not (yet) the same as
2201 # isinstance(values, NDArrayBackedExtensionArray)
2202 blk = self.coerce_to_target_dtype(orig_new, warn_on_upcast=True)
2203 return blk.putmask(orig_mask, orig_new)
2204
2205 else:
2206 raise
2207
2208 else:
2209 # Same pattern we use in Block.putmask
2210 is_array = isinstance(orig_new, (np.ndarray, ExtensionArray))
2211
2212 res_blocks = []
2213 nbs = self._split()
2214 for i, nb in enumerate(nbs):
2215 n = orig_new
2216 if is_array:
2217 # we have a different value per-column
2218 n = orig_new[:, i : i + 1]
2219
2220 submask = orig_mask[:, i : i + 1]
2221 rbs = nb.putmask(submask, n)
2222 res_blocks.extend(rbs)
2223 return res_blocks
2224
2225 return [self]
2226
2227 @final
2228 def delete(self, loc) -> list[Block]:
2229 # This will be unnecessary if/when __array_function__ is implemented
2230 if self.ndim == 1:
2231 values = self.values.delete(loc)
2232 mgr_locs = self._mgr_locs.delete(loc)
2233 return [type(self)(values, placement=mgr_locs, ndim=self.ndim)]
2234 elif self.values.ndim == 1:
2235 # We get here through to_stata
2236 return []
2237 return super().delete(loc)
2238
2239 @final
2240 @cache_readonly
2241 def array_values(self) -> ExtensionArray:
2242 return self.values
2243
2244 @final
2245 def get_values(self, dtype: DtypeObj | None = None) -> np.ndarray:
2246 """
2247 return object dtype as boxed values, such as Timestamps/Timedelta
2248 """
2249 values: ArrayLike = self.values
2250 if dtype == _dtype_obj:
2251 values = values.astype(object)
2252 # TODO(EA2D): reshape not needed with 2D EAs
2253 return np.asarray(values).reshape(self.shape)
2254
2255 @final
2256 def pad_or_backfill(
2257 self,
2258 *,
2259 method: FillnaOptions,
2260 axis: AxisInt = 0,
2261 inplace: bool = False,
2262 limit: int | None = None,
2263 limit_area: Literal["inside", "outside"] | None = None,
2264 downcast: Literal["infer"] | None = None,
2265 using_cow: bool = False,
2266 already_warned=None,
2267 ) -> list[Block]:
2268 values = self.values
2269
2270 kwargs: dict[str, Any] = {"method": method, "limit": limit}
2271 if "limit_area" in inspect.signature(values._pad_or_backfill).parameters:
2272 kwargs["limit_area"] = limit_area
2273 elif limit_area is not None:
2274 raise NotImplementedError(
2275 f"{type(values).__name__} does not implement limit_area "
2276 "(added in pandas 2.2). 3rd-party ExtnsionArray authors "
2277 "need to add this argument to _pad_or_backfill."
2278 )
2279
2280 if values.ndim == 2 and axis == 1:
2281 # NDArrayBackedExtensionArray.fillna assumes axis=0
2282 new_values = values.T._pad_or_backfill(**kwargs).T
2283 else:
2284 new_values = values._pad_or_backfill(**kwargs)
2285 return [self.make_block_same_class(new_values)]
2286
2287
2288class ExtensionBlock(EABackedBlock):
2289 """
2290 Block for holding extension types.
2291
2292 Notes
2293 -----
2294 This holds all 3rd-party extension array types. It's also the immediate
2295 parent class for our internal extension types' blocks.
2296
2297 ExtensionArrays are limited to 1-D.
2298 """
2299
2300 values: ExtensionArray
2301
2302 def fillna(
2303 self,
2304 value,
2305 limit: int | None = None,
2306 inplace: bool = False,
2307 downcast=None,
2308 using_cow: bool = False,
2309 already_warned=None,
2310 ) -> list[Block]:
2311 if isinstance(self.dtype, IntervalDtype):
2312 # Block.fillna handles coercion (test_fillna_interval)
2313 return super().fillna(
2314 value=value,
2315 limit=limit,
2316 inplace=inplace,
2317 downcast=downcast,
2318 using_cow=using_cow,
2319 already_warned=already_warned,
2320 )
2321 if using_cow and self._can_hold_na and not self.values._hasna:
2322 refs = self.refs
2323 new_values = self.values
2324 else:
2325 copy, refs = self._get_refs_and_copy(using_cow, inplace)
2326
2327 try:
2328 new_values = self.values.fillna(
2329 value=value, method=None, limit=limit, copy=copy
2330 )
2331 except TypeError:
2332 # 3rd party EA that has not implemented copy keyword yet
2333 refs = None
2334 new_values = self.values.fillna(value=value, method=None, limit=limit)
2335 # issue the warning *after* retrying, in case the TypeError
2336 # was caused by an invalid fill_value
2337 warnings.warn(
2338 # GH#53278
2339 "ExtensionArray.fillna added a 'copy' keyword in pandas "
2340 "2.1.0. In a future version, ExtensionArray subclasses will "
2341 "need to implement this keyword or an exception will be "
2342 "raised. In the interim, the keyword is ignored by "
2343 f"{type(self.values).__name__}.",
2344 DeprecationWarning,
2345 stacklevel=find_stack_level(),
2346 )
2347 else:
2348 if (
2349 not copy
2350 and warn_copy_on_write()
2351 and already_warned is not None
2352 and not already_warned.warned_already
2353 ):
2354 if self.refs.has_reference():
2355 warnings.warn(
2356 COW_WARNING_GENERAL_MSG,
2357 FutureWarning,
2358 stacklevel=find_stack_level(),
2359 )
2360 already_warned.warned_already = True
2361
2362 nb = self.make_block_same_class(new_values, refs=refs)
2363 return nb._maybe_downcast([nb], downcast, using_cow=using_cow, caller="fillna")
2364
2365 @cache_readonly
2366 def shape(self) -> Shape:
2367 # TODO(EA2D): override unnecessary with 2D EAs
2368 if self.ndim == 1:
2369 return (len(self.values),)
2370 return len(self._mgr_locs), len(self.values)
2371
2372 def iget(self, i: int | tuple[int, int] | tuple[slice, int]):
2373 # In the case where we have a tuple[slice, int], the slice will always
2374 # be slice(None)
2375 # We _could_ make the annotation more specific, but mypy would
2376 # complain about override mismatch:
2377 # Literal[0] | tuple[Literal[0], int] | tuple[slice, int]
2378
2379 # Note: only reached with self.ndim == 2
2380
2381 if isinstance(i, tuple):
2382 # TODO(EA2D): unnecessary with 2D EAs
2383 col, loc = i
2384 if not com.is_null_slice(col) and col != 0:
2385 raise IndexError(f"{self} only contains one item")
2386 if isinstance(col, slice):
2387 # the is_null_slice check above assures that col is slice(None)
2388 # so what we want is a view on all our columns and row loc
2389 if loc < 0:
2390 loc += len(self.values)
2391 # Note: loc:loc+1 vs [[loc]] makes a difference when called
2392 # from fast_xs because we want to get a view back.
2393 return self.values[loc : loc + 1]
2394 return self.values[loc]
2395 else:
2396 if i != 0:
2397 raise IndexError(f"{self} only contains one item")
2398 return self.values
2399
2400 def set_inplace(self, locs, values: ArrayLike, copy: bool = False) -> None:
2401 # When an ndarray, we should have locs.tolist() == [0]
2402 # When a BlockPlacement we should have list(locs) == [0]
2403 if copy:
2404 self.values = self.values.copy()
2405 self.values[:] = values
2406
2407 def _maybe_squeeze_arg(self, arg):
2408 """
2409 If necessary, squeeze a (N, 1) ndarray to (N,)
2410 """
2411 # e.g. if we are passed a 2D mask for putmask
2412 if (
2413 isinstance(arg, (np.ndarray, ExtensionArray))
2414 and arg.ndim == self.values.ndim + 1
2415 ):
2416 # TODO(EA2D): unnecessary with 2D EAs
2417 assert arg.shape[1] == 1
2418 # error: No overload variant of "__getitem__" of "ExtensionArray"
2419 # matches argument type "Tuple[slice, int]"
2420 arg = arg[:, 0] # type: ignore[call-overload]
2421 elif isinstance(arg, ABCDataFrame):
2422 # 2022-01-06 only reached for setitem
2423 # TODO: should we avoid getting here with DataFrame?
2424 assert arg.shape[1] == 1
2425 arg = arg._ixs(0, axis=1)._values
2426
2427 return arg
2428
2429 def _unwrap_setitem_indexer(self, indexer):
2430 """
2431 Adapt a 2D-indexer to our 1D values.
2432
2433 This is intended for 'setitem', not 'iget' or '_slice'.
2434 """
2435 # TODO: ATM this doesn't work for iget/_slice, can we change that?
2436
2437 if isinstance(indexer, tuple) and len(indexer) == 2:
2438 # TODO(EA2D): not needed with 2D EAs
2439 # Should never have length > 2. Caller is responsible for checking.
2440 # Length 1 is reached vis setitem_single_block and setitem_single_column
2441 # each of which pass indexer=(pi,)
2442 if all(isinstance(x, np.ndarray) and x.ndim == 2 for x in indexer):
2443 # GH#44703 went through indexing.maybe_convert_ix
2444 first, second = indexer
2445 if not (
2446 second.size == 1 and (second == 0).all() and first.shape[1] == 1
2447 ):
2448 raise NotImplementedError(
2449 "This should not be reached. Please report a bug at "
2450 "github.com/pandas-dev/pandas/"
2451 )
2452 indexer = first[:, 0]
2453
2454 elif lib.is_integer(indexer[1]) and indexer[1] == 0:
2455 # reached via setitem_single_block passing the whole indexer
2456 indexer = indexer[0]
2457
2458 elif com.is_null_slice(indexer[1]):
2459 indexer = indexer[0]
2460
2461 elif is_list_like(indexer[1]) and indexer[1][0] == 0:
2462 indexer = indexer[0]
2463
2464 else:
2465 raise NotImplementedError(
2466 "This should not be reached. Please report a bug at "
2467 "github.com/pandas-dev/pandas/"
2468 )
2469 return indexer
2470
2471 @property
2472 def is_view(self) -> bool:
2473 """Extension arrays are never treated as views."""
2474 return False
2475
2476 # error: Cannot override writeable attribute with read-only property
2477 @cache_readonly
2478 def is_numeric(self) -> bool: # type: ignore[override]
2479 return self.values.dtype._is_numeric
2480
2481 def _slice(
2482 self, slicer: slice | npt.NDArray[np.bool_] | npt.NDArray[np.intp]
2483 ) -> ExtensionArray:
2484 """
2485 Return a slice of my values.
2486
2487 Parameters
2488 ----------
2489 slicer : slice, ndarray[int], or ndarray[bool]
2490 Valid (non-reducing) indexer for self.values.
2491
2492 Returns
2493 -------
2494 ExtensionArray
2495 """
2496 # Notes: ndarray[bool] is only reachable when via get_rows_with_mask, which
2497 # is only for Series, i.e. self.ndim == 1.
2498
2499 # return same dims as we currently have
2500 if self.ndim == 2:
2501 # reached via getitem_block via _slice_take_blocks_ax0
2502 # TODO(EA2D): won't be necessary with 2D EAs
2503
2504 if not isinstance(slicer, slice):
2505 raise AssertionError(
2506 "invalid slicing for a 1-ndim ExtensionArray", slicer
2507 )
2508 # GH#32959 only full-slicers along fake-dim0 are valid
2509 # TODO(EA2D): won't be necessary with 2D EAs
2510 # range(1) instead of self._mgr_locs to avoid exception on [::-1]
2511 # see test_iloc_getitem_slice_negative_step_ea_block
2512 new_locs = range(1)[slicer]
2513 if not len(new_locs):
2514 raise AssertionError(
2515 "invalid slicing for a 1-ndim ExtensionArray", slicer
2516 )
2517 slicer = slice(None)
2518
2519 return self.values[slicer]
2520
2521 @final
2522 def slice_block_rows(self, slicer: slice) -> Self:
2523 """
2524 Perform __getitem__-like specialized to slicing along index.
2525 """
2526 # GH#42787 in principle this is equivalent to values[..., slicer], but we don't
2527 # require subclasses of ExtensionArray to support that form (for now).
2528 new_values = self.values[slicer]
2529 return type(self)(new_values, self._mgr_locs, ndim=self.ndim, refs=self.refs)
2530
2531 def _unstack(
2532 self,
2533 unstacker,
2534 fill_value,
2535 new_placement: npt.NDArray[np.intp],
2536 needs_masking: npt.NDArray[np.bool_],
2537 ):
2538 # ExtensionArray-safe unstack.
2539 # We override Block._unstack, which unstacks directly on the
2540 # values of the array. For EA-backed blocks, this would require
2541 # converting to a 2-D ndarray of objects.
2542 # Instead, we unstack an ndarray of integer positions, followed by
2543 # a `take` on the actual values.
2544
2545 # Caller is responsible for ensuring self.shape[-1] == len(unstacker.index)
2546 new_values, mask = unstacker.arange_result
2547
2548 # Note: these next two lines ensure that
2549 # mask.sum() == sum(len(nb.mgr_locs) for nb in blocks)
2550 # which the calling function needs in order to pass verify_integrity=False
2551 # to the BlockManager constructor
2552 new_values = new_values.T[mask]
2553 new_placement = new_placement[mask]
2554
2555 # needs_masking[i] calculated once in BlockManager.unstack tells
2556 # us if there are any -1s in the relevant indices. When False,
2557 # that allows us to go through a faster path in 'take', among
2558 # other things avoiding e.g. Categorical._validate_scalar.
2559 blocks = [
2560 # TODO: could cast to object depending on fill_value?
2561 type(self)(
2562 self.values.take(
2563 indices, allow_fill=needs_masking[i], fill_value=fill_value
2564 ),
2565 BlockPlacement(place),
2566 ndim=2,
2567 )
2568 for i, (indices, place) in enumerate(zip(new_values, new_placement))
2569 ]
2570 return blocks, mask
2571
2572
2573class NumpyBlock(Block):
2574 values: np.ndarray
2575 __slots__ = ()
2576
2577 @property
2578 def is_view(self) -> bool:
2579 """return a boolean if I am possibly a view"""
2580 return self.values.base is not None
2581
2582 @property
2583 def array_values(self) -> ExtensionArray:
2584 return NumpyExtensionArray(self.values)
2585
2586 def get_values(self, dtype: DtypeObj | None = None) -> np.ndarray:
2587 if dtype == _dtype_obj:
2588 return self.values.astype(_dtype_obj)
2589 return self.values
2590
2591 @cache_readonly
2592 def is_numeric(self) -> bool: # type: ignore[override]
2593 dtype = self.values.dtype
2594 kind = dtype.kind
2595
2596 return kind in "fciub"
2597
2598
2599class NumericBlock(NumpyBlock):
2600 # this Block type is kept for backwards-compatibility
2601 # TODO(3.0): delete and remove deprecation in __init__.py.
2602 __slots__ = ()
2603
2604
2605class ObjectBlock(NumpyBlock):
2606 # this Block type is kept for backwards-compatibility
2607 # TODO(3.0): delete and remove deprecation in __init__.py.
2608 __slots__ = ()
2609
2610
2611class NDArrayBackedExtensionBlock(EABackedBlock):
2612 """
2613 Block backed by an NDArrayBackedExtensionArray
2614 """
2615
2616 values: NDArrayBackedExtensionArray
2617
2618 @property
2619 def is_view(self) -> bool:
2620 """return a boolean if I am possibly a view"""
2621 # check the ndarray values of the DatetimeIndex values
2622 return self.values._ndarray.base is not None
2623
2624
2625class DatetimeLikeBlock(NDArrayBackedExtensionBlock):
2626 """Block for datetime64[ns], timedelta64[ns]."""
2627
2628 __slots__ = ()
2629 is_numeric = False
2630 values: DatetimeArray | TimedeltaArray
2631
2632
2633class DatetimeTZBlock(DatetimeLikeBlock):
2634 """implement a datetime64 block with a tz attribute"""
2635
2636 values: DatetimeArray
2637
2638 __slots__ = ()
2639
2640
2641# -----------------------------------------------------------------
2642# Constructor Helpers
2643
2644
2645def maybe_coerce_values(values: ArrayLike) -> ArrayLike:
2646 """
2647 Input validation for values passed to __init__. Ensure that
2648 any datetime64/timedelta64 dtypes are in nanoseconds. Ensure
2649 that we do not have string dtypes.
2650
2651 Parameters
2652 ----------
2653 values : np.ndarray or ExtensionArray
2654
2655 Returns
2656 -------
2657 values : np.ndarray or ExtensionArray
2658 """
2659 # Caller is responsible for ensuring NumpyExtensionArray is already extracted.
2660
2661 if isinstance(values, np.ndarray):
2662 values = ensure_wrapped_if_datetimelike(values)
2663
2664 if issubclass(values.dtype.type, str):
2665 values = np.array(values, dtype=object)
2666
2667 if isinstance(values, (DatetimeArray, TimedeltaArray)) and values.freq is not None:
2668 # freq is only stored in DatetimeIndex/TimedeltaIndex, not in Series/DataFrame
2669 values = values._with_freq(None)
2670
2671 return values
2672
2673
2674def get_block_type(dtype: DtypeObj) -> type[Block]:
2675 """
2676 Find the appropriate Block subclass to use for the given values and dtype.
2677
2678 Parameters
2679 ----------
2680 dtype : numpy or pandas dtype
2681
2682 Returns
2683 -------
2684 cls : class, subclass of Block
2685 """
2686 if isinstance(dtype, DatetimeTZDtype):
2687 return DatetimeTZBlock
2688 elif isinstance(dtype, PeriodDtype):
2689 return NDArrayBackedExtensionBlock
2690 elif isinstance(dtype, ExtensionDtype):
2691 # Note: need to be sure NumpyExtensionArray is unwrapped before we get here
2692 return ExtensionBlock
2693
2694 # We use kind checks because it is much more performant
2695 # than is_foo_dtype
2696 kind = dtype.kind
2697 if kind in "Mm":
2698 return DatetimeLikeBlock
2699
2700 return NumpyBlock
2701
2702
2703def new_block_2d(
2704 values: ArrayLike, placement: BlockPlacement, refs: BlockValuesRefs | None = None
2705):
2706 # new_block specialized to case with
2707 # ndim=2
2708 # isinstance(placement, BlockPlacement)
2709 # check_ndim/ensure_block_shape already checked
2710 klass = get_block_type(values.dtype)
2711
2712 values = maybe_coerce_values(values)
2713 return klass(values, ndim=2, placement=placement, refs=refs)
2714
2715
2716def new_block(
2717 values,
2718 placement: BlockPlacement,
2719 *,
2720 ndim: int,
2721 refs: BlockValuesRefs | None = None,
2722) -> Block:
2723 # caller is responsible for ensuring:
2724 # - values is NOT a NumpyExtensionArray
2725 # - check_ndim/ensure_block_shape already checked
2726 # - maybe_coerce_values already called/unnecessary
2727 klass = get_block_type(values.dtype)
2728 return klass(values, ndim=ndim, placement=placement, refs=refs)
2729
2730
2731def check_ndim(values, placement: BlockPlacement, ndim: int) -> None:
2732 """
2733 ndim inference and validation.
2734
2735 Validates that values.ndim and ndim are consistent.
2736 Validates that len(values) and len(placement) are consistent.
2737
2738 Parameters
2739 ----------
2740 values : array-like
2741 placement : BlockPlacement
2742 ndim : int
2743
2744 Raises
2745 ------
2746 ValueError : the number of dimensions do not match
2747 """
2748
2749 if values.ndim > ndim:
2750 # Check for both np.ndarray and ExtensionArray
2751 raise ValueError(
2752 "Wrong number of dimensions. "
2753 f"values.ndim > ndim [{values.ndim} > {ndim}]"
2754 )
2755
2756 if not is_1d_only_ea_dtype(values.dtype):
2757 # TODO(EA2D): special case not needed with 2D EAs
2758 if values.ndim != ndim:
2759 raise ValueError(
2760 "Wrong number of dimensions. "
2761 f"values.ndim != ndim [{values.ndim} != {ndim}]"
2762 )
2763 if len(placement) != len(values):
2764 raise ValueError(
2765 f"Wrong number of items passed {len(values)}, "
2766 f"placement implies {len(placement)}"
2767 )
2768 elif ndim == 2 and len(placement) != 1:
2769 # TODO(EA2D): special case unnecessary with 2D EAs
2770 raise ValueError("need to split")
2771
2772
2773def extract_pandas_array(
2774 values: ArrayLike, dtype: DtypeObj | None, ndim: int
2775) -> tuple[ArrayLike, DtypeObj | None]:
2776 """
2777 Ensure that we don't allow NumpyExtensionArray / NumpyEADtype in internals.
2778 """
2779 # For now, blocks should be backed by ndarrays when possible.
2780 if isinstance(values, ABCNumpyExtensionArray):
2781 values = values.to_numpy()
2782 if ndim and ndim > 1:
2783 # TODO(EA2D): special case not needed with 2D EAs
2784 values = np.atleast_2d(values)
2785
2786 if isinstance(dtype, NumpyEADtype):
2787 dtype = dtype.numpy_dtype
2788
2789 return values, dtype
2790
2791
2792# -----------------------------------------------------------------
2793
2794
2795def extend_blocks(result, blocks=None) -> list[Block]:
2796 """return a new extended blocks, given the result"""
2797 if blocks is None:
2798 blocks = []
2799 if isinstance(result, list):
2800 for r in result:
2801 if isinstance(r, list):
2802 blocks.extend(r)
2803 else:
2804 blocks.append(r)
2805 else:
2806 assert isinstance(result, Block), type(result)
2807 blocks.append(result)
2808 return blocks
2809
2810
2811def ensure_block_shape(values: ArrayLike, ndim: int = 1) -> ArrayLike:
2812 """
2813 Reshape if possible to have values.ndim == ndim.
2814 """
2815
2816 if values.ndim < ndim:
2817 if not is_1d_only_ea_dtype(values.dtype):
2818 # TODO(EA2D): https://github.com/pandas-dev/pandas/issues/23023
2819 # block.shape is incorrect for "2D" ExtensionArrays
2820 # We can't, and don't need to, reshape.
2821 values = cast("np.ndarray | DatetimeArray | TimedeltaArray", values)
2822 values = values.reshape(1, -1)
2823
2824 return values
2825
2826
2827def external_values(values: ArrayLike) -> ArrayLike:
2828 """
2829 The array that Series.values returns (public attribute).
2830
2831 This has some historical constraints, and is overridden in block
2832 subclasses to return the correct array (e.g. period returns
2833 object ndarray and datetimetz a datetime64[ns] ndarray instead of
2834 proper extension array).
2835 """
2836 if isinstance(values, (PeriodArray, IntervalArray)):
2837 return values.astype(object)
2838 elif isinstance(values, (DatetimeArray, TimedeltaArray)):
2839 # NB: for datetime64tz this is different from np.asarray(values), since
2840 # that returns an object-dtype ndarray of Timestamps.
2841 # Avoid raising in .astype in casting from dt64tz to dt64
2842 values = values._ndarray
2843
2844 if isinstance(values, np.ndarray) and using_copy_on_write():
2845 values = values.view()
2846 values.flags.writeable = False
2847
2848 # TODO(CoW) we should also mark our ExtensionArrays as read-only
2849
2850 return values