Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pandas/util/_validators.py: 20%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

122 statements  

1""" 

2Module that contains many useful utilities 

3for validating data or function arguments 

4""" 

5from __future__ import annotations 

6 

7from typing import ( 

8 Iterable, 

9 Sequence, 

10 TypeVar, 

11 overload, 

12) 

13 

14import numpy as np 

15 

16from pandas._libs import lib 

17 

18from pandas.core.dtypes.common import ( 

19 is_bool, 

20 is_integer, 

21) 

22 

23BoolishT = TypeVar("BoolishT", bool, int) 

24BoolishNoneT = TypeVar("BoolishNoneT", bool, int, None) 

25 

26 

27def _check_arg_length(fname, args, max_fname_arg_count, compat_args): 

28 """ 

29 Checks whether 'args' has length of at most 'compat_args'. Raises 

30 a TypeError if that is not the case, similar to in Python when a 

31 function is called with too many arguments. 

32 """ 

33 if max_fname_arg_count < 0: 

34 raise ValueError("'max_fname_arg_count' must be non-negative") 

35 

36 if len(args) > len(compat_args): 

37 max_arg_count = len(compat_args) + max_fname_arg_count 

38 actual_arg_count = len(args) + max_fname_arg_count 

39 argument = "argument" if max_arg_count == 1 else "arguments" 

40 

41 raise TypeError( 

42 f"{fname}() takes at most {max_arg_count} {argument} " 

43 f"({actual_arg_count} given)" 

44 ) 

45 

46 

47def _check_for_default_values(fname, arg_val_dict, compat_args): 

48 """ 

49 Check that the keys in `arg_val_dict` are mapped to their 

50 default values as specified in `compat_args`. 

51 

52 Note that this function is to be called only when it has been 

53 checked that arg_val_dict.keys() is a subset of compat_args 

54 """ 

55 for key in arg_val_dict: 

56 # try checking equality directly with '=' operator, 

57 # as comparison may have been overridden for the left 

58 # hand object 

59 try: 

60 v1 = arg_val_dict[key] 

61 v2 = compat_args[key] 

62 

63 # check for None-ness otherwise we could end up 

64 # comparing a numpy array vs None 

65 if (v1 is not None and v2 is None) or (v1 is None and v2 is not None): 

66 match = False 

67 else: 

68 match = v1 == v2 

69 

70 if not is_bool(match): 

71 raise ValueError("'match' is not a boolean") 

72 

73 # could not compare them directly, so try comparison 

74 # using the 'is' operator 

75 except ValueError: 

76 match = arg_val_dict[key] is compat_args[key] 

77 

78 if not match: 

79 raise ValueError( 

80 f"the '{key}' parameter is not supported in " 

81 f"the pandas implementation of {fname}()" 

82 ) 

83 

84 

85def validate_args(fname, args, max_fname_arg_count, compat_args) -> None: 

86 """ 

87 Checks whether the length of the `*args` argument passed into a function 

88 has at most `len(compat_args)` arguments and whether or not all of these 

89 elements in `args` are set to their default values. 

90 

91 Parameters 

92 ---------- 

93 fname : str 

94 The name of the function being passed the `*args` parameter 

95 args : tuple 

96 The `*args` parameter passed into a function 

97 max_fname_arg_count : int 

98 The maximum number of arguments that the function `fname` 

99 can accept, excluding those in `args`. Used for displaying 

100 appropriate error messages. Must be non-negative. 

101 compat_args : dict 

102 A dictionary of keys and their associated default values. 

103 In order to accommodate buggy behaviour in some versions of `numpy`, 

104 where a signature displayed keyword arguments but then passed those 

105 arguments **positionally** internally when calling downstream 

106 implementations, a dict ensures that the original 

107 order of the keyword arguments is enforced. 

108 

109 Raises 

110 ------ 

111 TypeError 

112 If `args` contains more values than there are `compat_args` 

113 ValueError 

114 If `args` contains values that do not correspond to those 

115 of the default values specified in `compat_args` 

116 """ 

117 _check_arg_length(fname, args, max_fname_arg_count, compat_args) 

118 

119 # We do this so that we can provide a more informative 

120 # error message about the parameters that we are not 

121 # supporting in the pandas implementation of 'fname' 

122 kwargs = dict(zip(compat_args, args)) 

123 _check_for_default_values(fname, kwargs, compat_args) 

124 

125 

126def _check_for_invalid_keys(fname, kwargs, compat_args): 

127 """ 

128 Checks whether 'kwargs' contains any keys that are not 

129 in 'compat_args' and raises a TypeError if there is one. 

130 """ 

131 # set(dict) --> set of the dictionary's keys 

132 diff = set(kwargs) - set(compat_args) 

133 

134 if diff: 

135 bad_arg = list(diff)[0] 

136 raise TypeError(f"{fname}() got an unexpected keyword argument '{bad_arg}'") 

137 

138 

139def validate_kwargs(fname, kwargs, compat_args) -> None: 

140 """ 

141 Checks whether parameters passed to the **kwargs argument in a 

142 function `fname` are valid parameters as specified in `*compat_args` 

143 and whether or not they are set to their default values. 

144 

145 Parameters 

146 ---------- 

147 fname : str 

148 The name of the function being passed the `**kwargs` parameter 

149 kwargs : dict 

150 The `**kwargs` parameter passed into `fname` 

151 compat_args: dict 

152 A dictionary of keys that `kwargs` is allowed to have and their 

153 associated default values 

154 

155 Raises 

156 ------ 

157 TypeError if `kwargs` contains keys not in `compat_args` 

158 ValueError if `kwargs` contains keys in `compat_args` that do not 

159 map to the default values specified in `compat_args` 

160 """ 

161 kwds = kwargs.copy() 

162 _check_for_invalid_keys(fname, kwargs, compat_args) 

163 _check_for_default_values(fname, kwds, compat_args) 

164 

165 

166def validate_args_and_kwargs( 

167 fname, args, kwargs, max_fname_arg_count, compat_args 

168) -> None: 

169 """ 

170 Checks whether parameters passed to the *args and **kwargs argument in a 

171 function `fname` are valid parameters as specified in `*compat_args` 

172 and whether or not they are set to their default values. 

173 

174 Parameters 

175 ---------- 

176 fname: str 

177 The name of the function being passed the `**kwargs` parameter 

178 args: tuple 

179 The `*args` parameter passed into a function 

180 kwargs: dict 

181 The `**kwargs` parameter passed into `fname` 

182 max_fname_arg_count: int 

183 The minimum number of arguments that the function `fname` 

184 requires, excluding those in `args`. Used for displaying 

185 appropriate error messages. Must be non-negative. 

186 compat_args: dict 

187 A dictionary of keys that `kwargs` is allowed to 

188 have and their associated default values. 

189 

190 Raises 

191 ------ 

192 TypeError if `args` contains more values than there are 

193 `compat_args` OR `kwargs` contains keys not in `compat_args` 

194 ValueError if `args` contains values not at the default value (`None`) 

195 `kwargs` contains keys in `compat_args` that do not map to the default 

196 value as specified in `compat_args` 

197 

198 See Also 

199 -------- 

200 validate_args : Purely args validation. 

201 validate_kwargs : Purely kwargs validation. 

202 

203 """ 

204 # Check that the total number of arguments passed in (i.e. 

205 # args and kwargs) does not exceed the length of compat_args 

206 _check_arg_length( 

207 fname, args + tuple(kwargs.values()), max_fname_arg_count, compat_args 

208 ) 

209 

210 # Check there is no overlap with the positional and keyword 

211 # arguments, similar to what is done in actual Python functions 

212 args_dict = dict(zip(compat_args, args)) 

213 

214 for key in args_dict: 

215 if key in kwargs: 

216 raise TypeError( 

217 f"{fname}() got multiple values for keyword argument '{key}'" 

218 ) 

219 

220 kwargs.update(args_dict) 

221 validate_kwargs(fname, kwargs, compat_args) 

222 

223 

224def validate_bool_kwarg( 

225 value: BoolishNoneT, arg_name, none_allowed: bool = True, int_allowed: bool = False 

226) -> BoolishNoneT: 

227 """ 

228 Ensure that argument passed in arg_name can be interpreted as boolean. 

229 

230 Parameters 

231 ---------- 

232 value : bool 

233 Value to be validated. 

234 arg_name : str 

235 Name of the argument. To be reflected in the error message. 

236 none_allowed : bool, default True 

237 Whether to consider None to be a valid boolean. 

238 int_allowed : bool, default False 

239 Whether to consider integer value to be a valid boolean. 

240 

241 Returns 

242 ------- 

243 value 

244 The same value as input. 

245 

246 Raises 

247 ------ 

248 ValueError 

249 If the value is not a valid boolean. 

250 """ 

251 good_value = is_bool(value) 

252 if none_allowed: 

253 good_value = good_value or value is None 

254 

255 if int_allowed: 

256 good_value = good_value or isinstance(value, int) 

257 

258 if not good_value: 

259 raise ValueError( 

260 f'For argument "{arg_name}" expected type bool, received ' 

261 f"type {type(value).__name__}." 

262 ) 

263 return value 

264 

265 

266def validate_fillna_kwargs(value, method, validate_scalar_dict_value: bool = True): 

267 """ 

268 Validate the keyword arguments to 'fillna'. 

269 

270 This checks that exactly one of 'value' and 'method' is specified. 

271 If 'method' is specified, this validates that it's a valid method. 

272 

273 Parameters 

274 ---------- 

275 value, method : object 

276 The 'value' and 'method' keyword arguments for 'fillna'. 

277 validate_scalar_dict_value : bool, default True 

278 Whether to validate that 'value' is a scalar or dict. Specifically, 

279 validate that it is not a list or tuple. 

280 

281 Returns 

282 ------- 

283 value, method : object 

284 """ 

285 from pandas.core.missing import clean_fill_method 

286 

287 if value is None and method is None: 

288 raise ValueError("Must specify a fill 'value' or 'method'.") 

289 if value is None and method is not None: 

290 method = clean_fill_method(method) 

291 

292 elif value is not None and method is None: 

293 if validate_scalar_dict_value and isinstance(value, (list, tuple)): 

294 raise TypeError( 

295 '"value" parameter must be a scalar or dict, but ' 

296 f'you passed a "{type(value).__name__}"' 

297 ) 

298 

299 elif value is not None and method is not None: 

300 raise ValueError("Cannot specify both 'value' and 'method'.") 

301 

302 return value, method 

303 

304 

305def validate_percentile(q: float | Iterable[float]) -> np.ndarray: 

306 """ 

307 Validate percentiles (used by describe and quantile). 

308 

309 This function checks if the given float or iterable of floats is a valid percentile 

310 otherwise raises a ValueError. 

311 

312 Parameters 

313 ---------- 

314 q: float or iterable of floats 

315 A single percentile or an iterable of percentiles. 

316 

317 Returns 

318 ------- 

319 ndarray 

320 An ndarray of the percentiles if valid. 

321 

322 Raises 

323 ------ 

324 ValueError if percentiles are not in given interval([0, 1]). 

325 """ 

326 q_arr = np.asarray(q) 

327 # Don't change this to an f-string. The string formatting 

328 # is too expensive for cases where we don't need it. 

329 msg = "percentiles should all be in the interval [0, 1]. Try {} instead." 

330 if q_arr.ndim == 0: 

331 if not 0 <= q_arr <= 1: 

332 raise ValueError(msg.format(q_arr / 100.0)) 

333 else: 

334 if not all(0 <= qs <= 1 for qs in q_arr): 

335 raise ValueError(msg.format(q_arr / 100.0)) 

336 return q_arr 

337 

338 

339@overload 

340def validate_ascending(ascending: BoolishT) -> BoolishT: 

341 ... 

342 

343 

344@overload 

345def validate_ascending(ascending: Sequence[BoolishT]) -> list[BoolishT]: 

346 ... 

347 

348 

349def validate_ascending( 

350 ascending: bool | int | Sequence[BoolishT], 

351) -> bool | int | list[BoolishT]: 

352 """Validate ``ascending`` kwargs for ``sort_index`` method.""" 

353 kwargs = {"none_allowed": False, "int_allowed": True} 

354 if not isinstance(ascending, Sequence): 

355 return validate_bool_kwarg(ascending, "ascending", **kwargs) 

356 

357 return [validate_bool_kwarg(item, "ascending", **kwargs) for item in ascending] 

358 

359 

360def validate_endpoints(closed: str | None) -> tuple[bool, bool]: 

361 """ 

362 Check that the `closed` argument is among [None, "left", "right"] 

363 

364 Parameters 

365 ---------- 

366 closed : {None, "left", "right"} 

367 

368 Returns 

369 ------- 

370 left_closed : bool 

371 right_closed : bool 

372 

373 Raises 

374 ------ 

375 ValueError : if argument is not among valid values 

376 """ 

377 left_closed = False 

378 right_closed = False 

379 

380 if closed is None: 

381 left_closed = True 

382 right_closed = True 

383 elif closed == "left": 

384 left_closed = True 

385 elif closed == "right": 

386 right_closed = True 

387 else: 

388 raise ValueError("Closed has to be either 'left', 'right' or None") 

389 

390 return left_closed, right_closed 

391 

392 

393def validate_inclusive(inclusive: str | None) -> tuple[bool, bool]: 

394 """ 

395 Check that the `inclusive` argument is among {"both", "neither", "left", "right"}. 

396 

397 Parameters 

398 ---------- 

399 inclusive : {"both", "neither", "left", "right"} 

400 

401 Returns 

402 ------- 

403 left_right_inclusive : tuple[bool, bool] 

404 

405 Raises 

406 ------ 

407 ValueError : if argument is not among valid values 

408 """ 

409 left_right_inclusive: tuple[bool, bool] | None = None 

410 

411 if isinstance(inclusive, str): 

412 left_right_inclusive = { 

413 "both": (True, True), 

414 "left": (True, False), 

415 "right": (False, True), 

416 "neither": (False, False), 

417 }.get(inclusive) 

418 

419 if left_right_inclusive is None: 

420 raise ValueError( 

421 "Inclusive has to be either 'both', 'neither', 'left' or 'right'" 

422 ) 

423 

424 return left_right_inclusive 

425 

426 

427def validate_insert_loc(loc: int, length: int) -> int: 

428 """ 

429 Check that we have an integer between -length and length, inclusive. 

430 

431 Standardize negative loc to within [0, length]. 

432 

433 The exceptions we raise on failure match np.insert. 

434 """ 

435 if not is_integer(loc): 

436 raise TypeError(f"loc must be an integer between -{length} and {length}") 

437 

438 if loc < 0: 

439 loc += length 

440 if not 0 <= loc <= length: 

441 raise IndexError(f"loc must be an integer between -{length} and {length}") 

442 return loc 

443 

444 

445def check_dtype_backend(dtype_backend) -> None: 

446 if dtype_backend is not lib.no_default: 

447 if dtype_backend not in ["numpy_nullable", "pyarrow"]: 

448 raise ValueError( 

449 f"dtype_backend {dtype_backend} is invalid, only 'numpy_nullable' and " 

450 f"'pyarrow' are allowed.", 

451 )