1from __future__ import annotations
2
3import numbers
4from typing import (
5 TYPE_CHECKING,
6 cast,
7)
8
9import numpy as np
10
11from pandas._libs import (
12 lib,
13 missing as libmissing,
14)
15from pandas._typing import (
16 Dtype,
17 DtypeObj,
18 type_t,
19)
20
21from pandas.core.dtypes.common import (
22 is_list_like,
23 is_numeric_dtype,
24)
25from pandas.core.dtypes.dtypes import register_extension_dtype
26from pandas.core.dtypes.missing import isna
27
28from pandas.core import ops
29from pandas.core.array_algos import masked_accumulations
30from pandas.core.arrays.masked import (
31 BaseMaskedArray,
32 BaseMaskedDtype,
33)
34
35if TYPE_CHECKING:
36 import pyarrow
37
38 from pandas._typing import npt
39
40
41@register_extension_dtype
42class BooleanDtype(BaseMaskedDtype):
43 """
44 Extension dtype for boolean data.
45
46 .. warning::
47
48 BooleanDtype is considered experimental. The implementation and
49 parts of the API may change without warning.
50
51 Attributes
52 ----------
53 None
54
55 Methods
56 -------
57 None
58
59 Examples
60 --------
61 >>> pd.BooleanDtype()
62 BooleanDtype
63 """
64
65 name = "boolean"
66
67 # https://github.com/python/mypy/issues/4125
68 # error: Signature of "type" incompatible with supertype "BaseMaskedDtype"
69 @property
70 def type(self) -> type: # type: ignore[override]
71 return np.bool_
72
73 @property
74 def kind(self) -> str:
75 return "b"
76
77 @property
78 def numpy_dtype(self) -> np.dtype:
79 return np.dtype("bool")
80
81 @classmethod
82 def construct_array_type(cls) -> type_t[BooleanArray]:
83 """
84 Return the array type associated with this dtype.
85
86 Returns
87 -------
88 type
89 """
90 return BooleanArray
91
92 def __repr__(self) -> str:
93 return "BooleanDtype"
94
95 @property
96 def _is_boolean(self) -> bool:
97 return True
98
99 @property
100 def _is_numeric(self) -> bool:
101 return True
102
103 def __from_arrow__(
104 self, array: pyarrow.Array | pyarrow.ChunkedArray
105 ) -> BooleanArray:
106 """
107 Construct BooleanArray from pyarrow Array/ChunkedArray.
108 """
109 import pyarrow
110
111 if array.type != pyarrow.bool_():
112 raise TypeError(f"Expected array of boolean type, got {array.type} instead")
113
114 if isinstance(array, pyarrow.Array):
115 chunks = [array]
116 else:
117 # pyarrow.ChunkedArray
118 chunks = array.chunks
119
120 results = []
121 for arr in chunks:
122 buflist = arr.buffers()
123 data = pyarrow.BooleanArray.from_buffers(
124 arr.type, len(arr), [None, buflist[1]], offset=arr.offset
125 ).to_numpy(zero_copy_only=False)
126 if arr.null_count != 0:
127 mask = pyarrow.BooleanArray.from_buffers(
128 arr.type, len(arr), [None, buflist[0]], offset=arr.offset
129 ).to_numpy(zero_copy_only=False)
130 mask = ~mask
131 else:
132 mask = np.zeros(len(arr), dtype=bool)
133
134 bool_arr = BooleanArray(data, mask)
135 results.append(bool_arr)
136
137 if not results:
138 return BooleanArray(
139 np.array([], dtype=np.bool_), np.array([], dtype=np.bool_)
140 )
141 else:
142 return BooleanArray._concat_same_type(results)
143
144
145def coerce_to_array(
146 values, mask=None, copy: bool = False
147) -> tuple[np.ndarray, np.ndarray]:
148 """
149 Coerce the input values array to numpy arrays with a mask.
150
151 Parameters
152 ----------
153 values : 1D list-like
154 mask : bool 1D array, optional
155 copy : bool, default False
156 if True, copy the input
157
158 Returns
159 -------
160 tuple of (values, mask)
161 """
162 if isinstance(values, BooleanArray):
163 if mask is not None:
164 raise ValueError("cannot pass mask for BooleanArray input")
165 values, mask = values._data, values._mask
166 if copy:
167 values = values.copy()
168 mask = mask.copy()
169 return values, mask
170
171 mask_values = None
172 if isinstance(values, np.ndarray) and values.dtype == np.bool_:
173 if copy:
174 values = values.copy()
175 elif isinstance(values, np.ndarray) and is_numeric_dtype(values.dtype):
176 mask_values = isna(values)
177
178 values_bool = np.zeros(len(values), dtype=bool)
179 values_bool[~mask_values] = values[~mask_values].astype(bool)
180
181 if not np.all(
182 values_bool[~mask_values].astype(values.dtype) == values[~mask_values]
183 ):
184 raise TypeError("Need to pass bool-like values")
185
186 values = values_bool
187 else:
188 values_object = np.asarray(values, dtype=object)
189
190 inferred_dtype = lib.infer_dtype(values_object, skipna=True)
191 integer_like = ("floating", "integer", "mixed-integer-float")
192 if inferred_dtype not in ("boolean", "empty") + integer_like:
193 raise TypeError("Need to pass bool-like values")
194
195 # mypy does not narrow the type of mask_values to npt.NDArray[np.bool_]
196 # within this branch, it assumes it can also be None
197 mask_values = cast("npt.NDArray[np.bool_]", isna(values_object))
198 values = np.zeros(len(values), dtype=bool)
199 values[~mask_values] = values_object[~mask_values].astype(bool)
200
201 # if the values were integer-like, validate it were actually 0/1's
202 if (inferred_dtype in integer_like) and not (
203 np.all(
204 values[~mask_values].astype(float)
205 == values_object[~mask_values].astype(float)
206 )
207 ):
208 raise TypeError("Need to pass bool-like values")
209
210 if mask is None and mask_values is None:
211 mask = np.zeros(values.shape, dtype=bool)
212 elif mask is None:
213 mask = mask_values
214 else:
215 if isinstance(mask, np.ndarray) and mask.dtype == np.bool_:
216 if mask_values is not None:
217 mask = mask | mask_values
218 else:
219 if copy:
220 mask = mask.copy()
221 else:
222 mask = np.array(mask, dtype=bool)
223 if mask_values is not None:
224 mask = mask | mask_values
225
226 if values.shape != mask.shape:
227 raise ValueError("values.shape and mask.shape must match")
228
229 return values, mask
230
231
232class BooleanArray(BaseMaskedArray):
233 """
234 Array of boolean (True/False) data with missing values.
235
236 This is a pandas Extension array for boolean data, under the hood
237 represented by 2 numpy arrays: a boolean array with the data and
238 a boolean array with the mask (True indicating missing).
239
240 BooleanArray implements Kleene logic (sometimes called three-value
241 logic) for logical operations. See :ref:`boolean.kleene` for more.
242
243 To construct an BooleanArray from generic array-like input, use
244 :func:`pandas.array` specifying ``dtype="boolean"`` (see examples
245 below).
246
247 .. warning::
248
249 BooleanArray is considered experimental. The implementation and
250 parts of the API may change without warning.
251
252 Parameters
253 ----------
254 values : numpy.ndarray
255 A 1-d boolean-dtype array with the data.
256 mask : numpy.ndarray
257 A 1-d boolean-dtype array indicating missing values (True
258 indicates missing).
259 copy : bool, default False
260 Whether to copy the `values` and `mask` arrays.
261
262 Attributes
263 ----------
264 None
265
266 Methods
267 -------
268 None
269
270 Returns
271 -------
272 BooleanArray
273
274 Examples
275 --------
276 Create an BooleanArray with :func:`pandas.array`:
277
278 >>> pd.array([True, False, None], dtype="boolean")
279 <BooleanArray>
280 [True, False, <NA>]
281 Length: 3, dtype: boolean
282 """
283
284 # The value used to fill '_data' to avoid upcasting
285 _internal_fill_value = False
286 # Fill values used for any/all
287 # Incompatible types in assignment (expression has type "bool", base class
288 # "BaseMaskedArray" defined the type as "<typing special form>")
289 _truthy_value = True # type: ignore[assignment]
290 _falsey_value = False # type: ignore[assignment]
291 _TRUE_VALUES = {"True", "TRUE", "true", "1", "1.0"}
292 _FALSE_VALUES = {"False", "FALSE", "false", "0", "0.0"}
293
294 def __init__(
295 self, values: np.ndarray, mask: np.ndarray, copy: bool = False
296 ) -> None:
297 if not (isinstance(values, np.ndarray) and values.dtype == np.bool_):
298 raise TypeError(
299 "values should be boolean numpy array. Use "
300 "the 'pd.array' function instead"
301 )
302 self._dtype = BooleanDtype()
303 super().__init__(values, mask, copy=copy)
304
305 @property
306 def dtype(self) -> BooleanDtype:
307 return self._dtype
308
309 @classmethod
310 def _from_sequence_of_strings(
311 cls,
312 strings: list[str],
313 *,
314 dtype: Dtype | None = None,
315 copy: bool = False,
316 true_values: list[str] | None = None,
317 false_values: list[str] | None = None,
318 ) -> BooleanArray:
319 true_values_union = cls._TRUE_VALUES.union(true_values or [])
320 false_values_union = cls._FALSE_VALUES.union(false_values or [])
321
322 def map_string(s) -> bool:
323 if s in true_values_union:
324 return True
325 elif s in false_values_union:
326 return False
327 else:
328 raise ValueError(f"{s} cannot be cast to bool")
329
330 scalars = np.array(strings, dtype=object)
331 mask = isna(scalars)
332 scalars[~mask] = list(map(map_string, scalars[~mask]))
333 return cls._from_sequence(scalars, dtype=dtype, copy=copy)
334
335 _HANDLED_TYPES = (np.ndarray, numbers.Number, bool, np.bool_)
336
337 @classmethod
338 def _coerce_to_array(
339 cls, value, *, dtype: DtypeObj, copy: bool = False
340 ) -> tuple[np.ndarray, np.ndarray]:
341 if dtype:
342 assert dtype == "boolean"
343 return coerce_to_array(value, copy=copy)
344
345 def _logical_method(self, other, op):
346 assert op.__name__ in {"or_", "ror_", "and_", "rand_", "xor", "rxor"}
347 other_is_scalar = lib.is_scalar(other)
348 mask = None
349
350 if isinstance(other, BooleanArray):
351 other, mask = other._data, other._mask
352 elif is_list_like(other):
353 other = np.asarray(other, dtype="bool")
354 if other.ndim > 1:
355 raise NotImplementedError("can only perform ops with 1-d structures")
356 other, mask = coerce_to_array(other, copy=False)
357 elif isinstance(other, np.bool_):
358 other = other.item()
359
360 if other_is_scalar and other is not libmissing.NA and not lib.is_bool(other):
361 raise TypeError(
362 "'other' should be pandas.NA or a bool. "
363 f"Got {type(other).__name__} instead."
364 )
365
366 if not other_is_scalar and len(self) != len(other):
367 raise ValueError("Lengths must match")
368
369 if op.__name__ in {"or_", "ror_"}:
370 result, mask = ops.kleene_or(self._data, other, self._mask, mask)
371 elif op.__name__ in {"and_", "rand_"}:
372 result, mask = ops.kleene_and(self._data, other, self._mask, mask)
373 else:
374 # i.e. xor, rxor
375 result, mask = ops.kleene_xor(self._data, other, self._mask, mask)
376
377 # i.e. BooleanArray
378 return self._maybe_mask_result(result, mask)
379
380 def _accumulate(
381 self, name: str, *, skipna: bool = True, **kwargs
382 ) -> BaseMaskedArray:
383 data = self._data
384 mask = self._mask
385 if name in ("cummin", "cummax"):
386 op = getattr(masked_accumulations, name)
387 data, mask = op(data, mask, skipna=skipna, **kwargs)
388 return type(self)(data, mask, copy=False)
389 else:
390 from pandas.core.arrays import IntegerArray
391
392 return IntegerArray(data.astype(int), mask)._accumulate(
393 name, skipna=skipna, **kwargs
394 )