Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/pandas/core/arrays/boolean.py: 27%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

179 statements  

1from __future__ import annotations 

2 

3import numbers 

4from typing import ( 

5 TYPE_CHECKING, 

6 ClassVar, 

7 cast, 

8) 

9 

10import numpy as np 

11 

12from pandas._libs import ( 

13 lib, 

14 missing as libmissing, 

15) 

16 

17from pandas.core.dtypes.common import is_list_like 

18from pandas.core.dtypes.dtypes import register_extension_dtype 

19from pandas.core.dtypes.missing import isna 

20 

21from pandas.core import ops 

22from pandas.core.array_algos import masked_accumulations 

23from pandas.core.arrays.masked import ( 

24 BaseMaskedArray, 

25 BaseMaskedDtype, 

26) 

27 

28if TYPE_CHECKING: 

29 import pyarrow 

30 

31 from pandas._typing import ( 

32 Dtype, 

33 DtypeObj, 

34 Self, 

35 npt, 

36 type_t, 

37 ) 

38 

39 

40@register_extension_dtype 

41class BooleanDtype(BaseMaskedDtype): 

42 """ 

43 Extension dtype for boolean data. 

44 

45 .. warning:: 

46 

47 BooleanDtype is considered experimental. The implementation and 

48 parts of the API may change without warning. 

49 

50 Attributes 

51 ---------- 

52 None 

53 

54 Methods 

55 ------- 

56 None 

57 

58 Examples 

59 -------- 

60 >>> pd.BooleanDtype() 

61 BooleanDtype 

62 """ 

63 

64 name: ClassVar[str] = "boolean" 

65 

66 # https://github.com/python/mypy/issues/4125 

67 # error: Signature of "type" incompatible with supertype "BaseMaskedDtype" 

68 @property 

69 def type(self) -> type: # type: ignore[override] 

70 return np.bool_ 

71 

72 @property 

73 def kind(self) -> str: 

74 return "b" 

75 

76 @property 

77 def numpy_dtype(self) -> np.dtype: 

78 return np.dtype("bool") 

79 

80 @classmethod 

81 def construct_array_type(cls) -> type_t[BooleanArray]: 

82 """ 

83 Return the array type associated with this dtype. 

84 

85 Returns 

86 ------- 

87 type 

88 """ 

89 return BooleanArray 

90 

91 def __repr__(self) -> str: 

92 return "BooleanDtype" 

93 

94 @property 

95 def _is_boolean(self) -> bool: 

96 return True 

97 

98 @property 

99 def _is_numeric(self) -> bool: 

100 return True 

101 

102 def __from_arrow__( 

103 self, array: pyarrow.Array | pyarrow.ChunkedArray 

104 ) -> BooleanArray: 

105 """ 

106 Construct BooleanArray from pyarrow Array/ChunkedArray. 

107 """ 

108 import pyarrow 

109 

110 if array.type != pyarrow.bool_() and not pyarrow.types.is_null(array.type): 

111 raise TypeError(f"Expected array of boolean type, got {array.type} instead") 

112 

113 if isinstance(array, pyarrow.Array): 

114 chunks = [array] 

115 length = len(array) 

116 else: 

117 # pyarrow.ChunkedArray 

118 chunks = array.chunks 

119 length = array.length() 

120 

121 if pyarrow.types.is_null(array.type): 

122 mask = np.ones(length, dtype=bool) 

123 # No need to init data, since all null 

124 data = np.empty(length, dtype=bool) 

125 return BooleanArray(data, mask) 

126 

127 results = [] 

128 for arr in chunks: 

129 buflist = arr.buffers() 

130 data = pyarrow.BooleanArray.from_buffers( 

131 arr.type, len(arr), [None, buflist[1]], offset=arr.offset 

132 ).to_numpy(zero_copy_only=False) 

133 if arr.null_count != 0: 

134 mask = pyarrow.BooleanArray.from_buffers( 

135 arr.type, len(arr), [None, buflist[0]], offset=arr.offset 

136 ).to_numpy(zero_copy_only=False) 

137 mask = ~mask 

138 else: 

139 mask = np.zeros(len(arr), dtype=bool) 

140 

141 bool_arr = BooleanArray(data, mask) 

142 results.append(bool_arr) 

143 

144 if not results: 

145 return BooleanArray( 

146 np.array([], dtype=np.bool_), np.array([], dtype=np.bool_) 

147 ) 

148 else: 

149 return BooleanArray._concat_same_type(results) 

150 

151 

152def coerce_to_array( 

153 values, mask=None, copy: bool = False 

154) -> tuple[np.ndarray, np.ndarray]: 

155 """ 

156 Coerce the input values array to numpy arrays with a mask. 

157 

158 Parameters 

159 ---------- 

160 values : 1D list-like 

161 mask : bool 1D array, optional 

162 copy : bool, default False 

163 if True, copy the input 

164 

165 Returns 

166 ------- 

167 tuple of (values, mask) 

168 """ 

169 if isinstance(values, BooleanArray): 

170 if mask is not None: 

171 raise ValueError("cannot pass mask for BooleanArray input") 

172 values, mask = values._data, values._mask 

173 if copy: 

174 values = values.copy() 

175 mask = mask.copy() 

176 return values, mask 

177 

178 mask_values = None 

179 if isinstance(values, np.ndarray) and values.dtype == np.bool_: 

180 if copy: 

181 values = values.copy() 

182 elif isinstance(values, np.ndarray) and values.dtype.kind in "iufcb": 

183 mask_values = isna(values) 

184 

185 values_bool = np.zeros(len(values), dtype=bool) 

186 values_bool[~mask_values] = values[~mask_values].astype(bool) 

187 

188 if not np.all( 

189 values_bool[~mask_values].astype(values.dtype) == values[~mask_values] 

190 ): 

191 raise TypeError("Need to pass bool-like values") 

192 

193 values = values_bool 

194 else: 

195 values_object = np.asarray(values, dtype=object) 

196 

197 inferred_dtype = lib.infer_dtype(values_object, skipna=True) 

198 integer_like = ("floating", "integer", "mixed-integer-float") 

199 if inferred_dtype not in ("boolean", "empty") + integer_like: 

200 raise TypeError("Need to pass bool-like values") 

201 

202 # mypy does not narrow the type of mask_values to npt.NDArray[np.bool_] 

203 # within this branch, it assumes it can also be None 

204 mask_values = cast("npt.NDArray[np.bool_]", isna(values_object)) 

205 values = np.zeros(len(values), dtype=bool) 

206 values[~mask_values] = values_object[~mask_values].astype(bool) 

207 

208 # if the values were integer-like, validate it were actually 0/1's 

209 if (inferred_dtype in integer_like) and not ( 

210 np.all( 

211 values[~mask_values].astype(float) 

212 == values_object[~mask_values].astype(float) 

213 ) 

214 ): 

215 raise TypeError("Need to pass bool-like values") 

216 

217 if mask is None and mask_values is None: 

218 mask = np.zeros(values.shape, dtype=bool) 

219 elif mask is None: 

220 mask = mask_values 

221 else: 

222 if isinstance(mask, np.ndarray) and mask.dtype == np.bool_: 

223 if mask_values is not None: 

224 mask = mask | mask_values 

225 else: 

226 if copy: 

227 mask = mask.copy() 

228 else: 

229 mask = np.array(mask, dtype=bool) 

230 if mask_values is not None: 

231 mask = mask | mask_values 

232 

233 if values.shape != mask.shape: 

234 raise ValueError("values.shape and mask.shape must match") 

235 

236 return values, mask 

237 

238 

239class BooleanArray(BaseMaskedArray): 

240 """ 

241 Array of boolean (True/False) data with missing values. 

242 

243 This is a pandas Extension array for boolean data, under the hood 

244 represented by 2 numpy arrays: a boolean array with the data and 

245 a boolean array with the mask (True indicating missing). 

246 

247 BooleanArray implements Kleene logic (sometimes called three-value 

248 logic) for logical operations. See :ref:`boolean.kleene` for more. 

249 

250 To construct an BooleanArray from generic array-like input, use 

251 :func:`pandas.array` specifying ``dtype="boolean"`` (see examples 

252 below). 

253 

254 .. warning:: 

255 

256 BooleanArray is considered experimental. The implementation and 

257 parts of the API may change without warning. 

258 

259 Parameters 

260 ---------- 

261 values : numpy.ndarray 

262 A 1-d boolean-dtype array with the data. 

263 mask : numpy.ndarray 

264 A 1-d boolean-dtype array indicating missing values (True 

265 indicates missing). 

266 copy : bool, default False 

267 Whether to copy the `values` and `mask` arrays. 

268 

269 Attributes 

270 ---------- 

271 None 

272 

273 Methods 

274 ------- 

275 None 

276 

277 Returns 

278 ------- 

279 BooleanArray 

280 

281 Examples 

282 -------- 

283 Create an BooleanArray with :func:`pandas.array`: 

284 

285 >>> pd.array([True, False, None], dtype="boolean") 

286 <BooleanArray> 

287 [True, False, <NA>] 

288 Length: 3, dtype: boolean 

289 """ 

290 

291 # The value used to fill '_data' to avoid upcasting 

292 _internal_fill_value = False 

293 # Fill values used for any/all 

294 # Incompatible types in assignment (expression has type "bool", base class 

295 # "BaseMaskedArray" defined the type as "<typing special form>") 

296 _truthy_value = True # type: ignore[assignment] 

297 _falsey_value = False # type: ignore[assignment] 

298 _TRUE_VALUES = {"True", "TRUE", "true", "1", "1.0"} 

299 _FALSE_VALUES = {"False", "FALSE", "false", "0", "0.0"} 

300 

301 @classmethod 

302 def _simple_new(cls, values: np.ndarray, mask: npt.NDArray[np.bool_]) -> Self: 

303 result = super()._simple_new(values, mask) 

304 result._dtype = BooleanDtype() 

305 return result 

306 

307 def __init__( 

308 self, values: np.ndarray, mask: np.ndarray, copy: bool = False 

309 ) -> None: 

310 if not (isinstance(values, np.ndarray) and values.dtype == np.bool_): 

311 raise TypeError( 

312 "values should be boolean numpy array. Use " 

313 "the 'pd.array' function instead" 

314 ) 

315 self._dtype = BooleanDtype() 

316 super().__init__(values, mask, copy=copy) 

317 

318 @property 

319 def dtype(self) -> BooleanDtype: 

320 return self._dtype 

321 

322 @classmethod 

323 def _from_sequence_of_strings( 

324 cls, 

325 strings: list[str], 

326 *, 

327 dtype: Dtype | None = None, 

328 copy: bool = False, 

329 true_values: list[str] | None = None, 

330 false_values: list[str] | None = None, 

331 ) -> BooleanArray: 

332 true_values_union = cls._TRUE_VALUES.union(true_values or []) 

333 false_values_union = cls._FALSE_VALUES.union(false_values or []) 

334 

335 def map_string(s) -> bool: 

336 if s in true_values_union: 

337 return True 

338 elif s in false_values_union: 

339 return False 

340 else: 

341 raise ValueError(f"{s} cannot be cast to bool") 

342 

343 scalars = np.array(strings, dtype=object) 

344 mask = isna(scalars) 

345 scalars[~mask] = list(map(map_string, scalars[~mask])) 

346 return cls._from_sequence(scalars, dtype=dtype, copy=copy) 

347 

348 _HANDLED_TYPES = (np.ndarray, numbers.Number, bool, np.bool_) 

349 

350 @classmethod 

351 def _coerce_to_array( 

352 cls, value, *, dtype: DtypeObj, copy: bool = False 

353 ) -> tuple[np.ndarray, np.ndarray]: 

354 if dtype: 

355 assert dtype == "boolean" 

356 return coerce_to_array(value, copy=copy) 

357 

358 def _logical_method(self, other, op): 

359 assert op.__name__ in {"or_", "ror_", "and_", "rand_", "xor", "rxor"} 

360 other_is_scalar = lib.is_scalar(other) 

361 mask = None 

362 

363 if isinstance(other, BooleanArray): 

364 other, mask = other._data, other._mask 

365 elif is_list_like(other): 

366 other = np.asarray(other, dtype="bool") 

367 if other.ndim > 1: 

368 raise NotImplementedError("can only perform ops with 1-d structures") 

369 other, mask = coerce_to_array(other, copy=False) 

370 elif isinstance(other, np.bool_): 

371 other = other.item() 

372 

373 if other_is_scalar and other is not libmissing.NA and not lib.is_bool(other): 

374 raise TypeError( 

375 "'other' should be pandas.NA or a bool. " 

376 f"Got {type(other).__name__} instead." 

377 ) 

378 

379 if not other_is_scalar and len(self) != len(other): 

380 raise ValueError("Lengths must match") 

381 

382 if op.__name__ in {"or_", "ror_"}: 

383 result, mask = ops.kleene_or(self._data, other, self._mask, mask) 

384 elif op.__name__ in {"and_", "rand_"}: 

385 result, mask = ops.kleene_and(self._data, other, self._mask, mask) 

386 else: 

387 # i.e. xor, rxor 

388 result, mask = ops.kleene_xor(self._data, other, self._mask, mask) 

389 

390 # i.e. BooleanArray 

391 return self._maybe_mask_result(result, mask) 

392 

393 def _accumulate( 

394 self, name: str, *, skipna: bool = True, **kwargs 

395 ) -> BaseMaskedArray: 

396 data = self._data 

397 mask = self._mask 

398 if name in ("cummin", "cummax"): 

399 op = getattr(masked_accumulations, name) 

400 data, mask = op(data, mask, skipna=skipna, **kwargs) 

401 return self._simple_new(data, mask) 

402 else: 

403 from pandas.core.arrays import IntegerArray 

404 

405 return IntegerArray(data.astype(int), mask)._accumulate( 

406 name, skipna=skipna, **kwargs 

407 )