Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pandas/core/arrays/boolean.py: 28%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

169 statements  

1from __future__ import annotations 

2 

3import numbers 

4from typing import ( 

5 TYPE_CHECKING, 

6 cast, 

7) 

8 

9import numpy as np 

10 

11from pandas._libs import ( 

12 lib, 

13 missing as libmissing, 

14) 

15from pandas._typing import ( 

16 Dtype, 

17 DtypeObj, 

18 type_t, 

19) 

20 

21from pandas.core.dtypes.common import ( 

22 is_list_like, 

23 is_numeric_dtype, 

24) 

25from pandas.core.dtypes.dtypes import register_extension_dtype 

26from pandas.core.dtypes.missing import isna 

27 

28from pandas.core import ops 

29from pandas.core.array_algos import masked_accumulations 

30from pandas.core.arrays.masked import ( 

31 BaseMaskedArray, 

32 BaseMaskedDtype, 

33) 

34 

35if TYPE_CHECKING: 

36 import pyarrow 

37 

38 from pandas._typing import npt 

39 

40 

41@register_extension_dtype 

42class BooleanDtype(BaseMaskedDtype): 

43 """ 

44 Extension dtype for boolean data. 

45 

46 .. warning:: 

47 

48 BooleanDtype is considered experimental. The implementation and 

49 parts of the API may change without warning. 

50 

51 Attributes 

52 ---------- 

53 None 

54 

55 Methods 

56 ------- 

57 None 

58 

59 Examples 

60 -------- 

61 >>> pd.BooleanDtype() 

62 BooleanDtype 

63 """ 

64 

65 name = "boolean" 

66 

67 # https://github.com/python/mypy/issues/4125 

68 # error: Signature of "type" incompatible with supertype "BaseMaskedDtype" 

69 @property 

70 def type(self) -> type: # type: ignore[override] 

71 return np.bool_ 

72 

73 @property 

74 def kind(self) -> str: 

75 return "b" 

76 

77 @property 

78 def numpy_dtype(self) -> np.dtype: 

79 return np.dtype("bool") 

80 

81 @classmethod 

82 def construct_array_type(cls) -> type_t[BooleanArray]: 

83 """ 

84 Return the array type associated with this dtype. 

85 

86 Returns 

87 ------- 

88 type 

89 """ 

90 return BooleanArray 

91 

92 def __repr__(self) -> str: 

93 return "BooleanDtype" 

94 

95 @property 

96 def _is_boolean(self) -> bool: 

97 return True 

98 

99 @property 

100 def _is_numeric(self) -> bool: 

101 return True 

102 

103 def __from_arrow__( 

104 self, array: pyarrow.Array | pyarrow.ChunkedArray 

105 ) -> BooleanArray: 

106 """ 

107 Construct BooleanArray from pyarrow Array/ChunkedArray. 

108 """ 

109 import pyarrow 

110 

111 if array.type != pyarrow.bool_(): 

112 raise TypeError(f"Expected array of boolean type, got {array.type} instead") 

113 

114 if isinstance(array, pyarrow.Array): 

115 chunks = [array] 

116 else: 

117 # pyarrow.ChunkedArray 

118 chunks = array.chunks 

119 

120 results = [] 

121 for arr in chunks: 

122 buflist = arr.buffers() 

123 data = pyarrow.BooleanArray.from_buffers( 

124 arr.type, len(arr), [None, buflist[1]], offset=arr.offset 

125 ).to_numpy(zero_copy_only=False) 

126 if arr.null_count != 0: 

127 mask = pyarrow.BooleanArray.from_buffers( 

128 arr.type, len(arr), [None, buflist[0]], offset=arr.offset 

129 ).to_numpy(zero_copy_only=False) 

130 mask = ~mask 

131 else: 

132 mask = np.zeros(len(arr), dtype=bool) 

133 

134 bool_arr = BooleanArray(data, mask) 

135 results.append(bool_arr) 

136 

137 if not results: 

138 return BooleanArray( 

139 np.array([], dtype=np.bool_), np.array([], dtype=np.bool_) 

140 ) 

141 else: 

142 return BooleanArray._concat_same_type(results) 

143 

144 

145def coerce_to_array( 

146 values, mask=None, copy: bool = False 

147) -> tuple[np.ndarray, np.ndarray]: 

148 """ 

149 Coerce the input values array to numpy arrays with a mask. 

150 

151 Parameters 

152 ---------- 

153 values : 1D list-like 

154 mask : bool 1D array, optional 

155 copy : bool, default False 

156 if True, copy the input 

157 

158 Returns 

159 ------- 

160 tuple of (values, mask) 

161 """ 

162 if isinstance(values, BooleanArray): 

163 if mask is not None: 

164 raise ValueError("cannot pass mask for BooleanArray input") 

165 values, mask = values._data, values._mask 

166 if copy: 

167 values = values.copy() 

168 mask = mask.copy() 

169 return values, mask 

170 

171 mask_values = None 

172 if isinstance(values, np.ndarray) and values.dtype == np.bool_: 

173 if copy: 

174 values = values.copy() 

175 elif isinstance(values, np.ndarray) and is_numeric_dtype(values.dtype): 

176 mask_values = isna(values) 

177 

178 values_bool = np.zeros(len(values), dtype=bool) 

179 values_bool[~mask_values] = values[~mask_values].astype(bool) 

180 

181 if not np.all( 

182 values_bool[~mask_values].astype(values.dtype) == values[~mask_values] 

183 ): 

184 raise TypeError("Need to pass bool-like values") 

185 

186 values = values_bool 

187 else: 

188 values_object = np.asarray(values, dtype=object) 

189 

190 inferred_dtype = lib.infer_dtype(values_object, skipna=True) 

191 integer_like = ("floating", "integer", "mixed-integer-float") 

192 if inferred_dtype not in ("boolean", "empty") + integer_like: 

193 raise TypeError("Need to pass bool-like values") 

194 

195 # mypy does not narrow the type of mask_values to npt.NDArray[np.bool_] 

196 # within this branch, it assumes it can also be None 

197 mask_values = cast("npt.NDArray[np.bool_]", isna(values_object)) 

198 values = np.zeros(len(values), dtype=bool) 

199 values[~mask_values] = values_object[~mask_values].astype(bool) 

200 

201 # if the values were integer-like, validate it were actually 0/1's 

202 if (inferred_dtype in integer_like) and not ( 

203 np.all( 

204 values[~mask_values].astype(float) 

205 == values_object[~mask_values].astype(float) 

206 ) 

207 ): 

208 raise TypeError("Need to pass bool-like values") 

209 

210 if mask is None and mask_values is None: 

211 mask = np.zeros(values.shape, dtype=bool) 

212 elif mask is None: 

213 mask = mask_values 

214 else: 

215 if isinstance(mask, np.ndarray) and mask.dtype == np.bool_: 

216 if mask_values is not None: 

217 mask = mask | mask_values 

218 else: 

219 if copy: 

220 mask = mask.copy() 

221 else: 

222 mask = np.array(mask, dtype=bool) 

223 if mask_values is not None: 

224 mask = mask | mask_values 

225 

226 if values.shape != mask.shape: 

227 raise ValueError("values.shape and mask.shape must match") 

228 

229 return values, mask 

230 

231 

232class BooleanArray(BaseMaskedArray): 

233 """ 

234 Array of boolean (True/False) data with missing values. 

235 

236 This is a pandas Extension array for boolean data, under the hood 

237 represented by 2 numpy arrays: a boolean array with the data and 

238 a boolean array with the mask (True indicating missing). 

239 

240 BooleanArray implements Kleene logic (sometimes called three-value 

241 logic) for logical operations. See :ref:`boolean.kleene` for more. 

242 

243 To construct an BooleanArray from generic array-like input, use 

244 :func:`pandas.array` specifying ``dtype="boolean"`` (see examples 

245 below). 

246 

247 .. warning:: 

248 

249 BooleanArray is considered experimental. The implementation and 

250 parts of the API may change without warning. 

251 

252 Parameters 

253 ---------- 

254 values : numpy.ndarray 

255 A 1-d boolean-dtype array with the data. 

256 mask : numpy.ndarray 

257 A 1-d boolean-dtype array indicating missing values (True 

258 indicates missing). 

259 copy : bool, default False 

260 Whether to copy the `values` and `mask` arrays. 

261 

262 Attributes 

263 ---------- 

264 None 

265 

266 Methods 

267 ------- 

268 None 

269 

270 Returns 

271 ------- 

272 BooleanArray 

273 

274 Examples 

275 -------- 

276 Create an BooleanArray with :func:`pandas.array`: 

277 

278 >>> pd.array([True, False, None], dtype="boolean") 

279 <BooleanArray> 

280 [True, False, <NA>] 

281 Length: 3, dtype: boolean 

282 """ 

283 

284 # The value used to fill '_data' to avoid upcasting 

285 _internal_fill_value = False 

286 # Fill values used for any/all 

287 # Incompatible types in assignment (expression has type "bool", base class 

288 # "BaseMaskedArray" defined the type as "<typing special form>") 

289 _truthy_value = True # type: ignore[assignment] 

290 _falsey_value = False # type: ignore[assignment] 

291 _TRUE_VALUES = {"True", "TRUE", "true", "1", "1.0"} 

292 _FALSE_VALUES = {"False", "FALSE", "false", "0", "0.0"} 

293 

294 def __init__( 

295 self, values: np.ndarray, mask: np.ndarray, copy: bool = False 

296 ) -> None: 

297 if not (isinstance(values, np.ndarray) and values.dtype == np.bool_): 

298 raise TypeError( 

299 "values should be boolean numpy array. Use " 

300 "the 'pd.array' function instead" 

301 ) 

302 self._dtype = BooleanDtype() 

303 super().__init__(values, mask, copy=copy) 

304 

305 @property 

306 def dtype(self) -> BooleanDtype: 

307 return self._dtype 

308 

309 @classmethod 

310 def _from_sequence_of_strings( 

311 cls, 

312 strings: list[str], 

313 *, 

314 dtype: Dtype | None = None, 

315 copy: bool = False, 

316 true_values: list[str] | None = None, 

317 false_values: list[str] | None = None, 

318 ) -> BooleanArray: 

319 true_values_union = cls._TRUE_VALUES.union(true_values or []) 

320 false_values_union = cls._FALSE_VALUES.union(false_values or []) 

321 

322 def map_string(s) -> bool: 

323 if s in true_values_union: 

324 return True 

325 elif s in false_values_union: 

326 return False 

327 else: 

328 raise ValueError(f"{s} cannot be cast to bool") 

329 

330 scalars = np.array(strings, dtype=object) 

331 mask = isna(scalars) 

332 scalars[~mask] = list(map(map_string, scalars[~mask])) 

333 return cls._from_sequence(scalars, dtype=dtype, copy=copy) 

334 

335 _HANDLED_TYPES = (np.ndarray, numbers.Number, bool, np.bool_) 

336 

337 @classmethod 

338 def _coerce_to_array( 

339 cls, value, *, dtype: DtypeObj, copy: bool = False 

340 ) -> tuple[np.ndarray, np.ndarray]: 

341 if dtype: 

342 assert dtype == "boolean" 

343 return coerce_to_array(value, copy=copy) 

344 

345 def _logical_method(self, other, op): 

346 assert op.__name__ in {"or_", "ror_", "and_", "rand_", "xor", "rxor"} 

347 other_is_scalar = lib.is_scalar(other) 

348 mask = None 

349 

350 if isinstance(other, BooleanArray): 

351 other, mask = other._data, other._mask 

352 elif is_list_like(other): 

353 other = np.asarray(other, dtype="bool") 

354 if other.ndim > 1: 

355 raise NotImplementedError("can only perform ops with 1-d structures") 

356 other, mask = coerce_to_array(other, copy=False) 

357 elif isinstance(other, np.bool_): 

358 other = other.item() 

359 

360 if other_is_scalar and other is not libmissing.NA and not lib.is_bool(other): 

361 raise TypeError( 

362 "'other' should be pandas.NA or a bool. " 

363 f"Got {type(other).__name__} instead." 

364 ) 

365 

366 if not other_is_scalar and len(self) != len(other): 

367 raise ValueError("Lengths must match") 

368 

369 if op.__name__ in {"or_", "ror_"}: 

370 result, mask = ops.kleene_or(self._data, other, self._mask, mask) 

371 elif op.__name__ in {"and_", "rand_"}: 

372 result, mask = ops.kleene_and(self._data, other, self._mask, mask) 

373 else: 

374 # i.e. xor, rxor 

375 result, mask = ops.kleene_xor(self._data, other, self._mask, mask) 

376 

377 # i.e. BooleanArray 

378 return self._maybe_mask_result(result, mask) 

379 

380 def _accumulate( 

381 self, name: str, *, skipna: bool = True, **kwargs 

382 ) -> BaseMaskedArray: 

383 data = self._data 

384 mask = self._mask 

385 if name in ("cummin", "cummax"): 

386 op = getattr(masked_accumulations, name) 

387 data, mask = op(data, mask, skipna=skipna, **kwargs) 

388 return type(self)(data, mask, copy=False) 

389 else: 

390 from pandas.core.arrays import IntegerArray 

391 

392 return IntegerArray(data.astype(int), mask)._accumulate( 

393 name, skipna=skipna, **kwargs 

394 )