Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/pandas/util/_validators.py: 51%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

123 statements  

1""" 

2Module that contains many useful utilities 

3for validating data or function arguments 

4""" 

5from __future__ import annotations 

6 

7from collections.abc import ( 

8 Iterable, 

9 Sequence, 

10) 

11from typing import ( 

12 TypeVar, 

13 overload, 

14) 

15 

16import numpy as np 

17 

18from pandas._libs import lib 

19 

20from pandas.core.dtypes.common import ( 

21 is_bool, 

22 is_integer, 

23) 

24 

25BoolishT = TypeVar("BoolishT", bool, int) 

26BoolishNoneT = TypeVar("BoolishNoneT", bool, int, None) 

27 

28 

29def _check_arg_length(fname, args, max_fname_arg_count, compat_args) -> None: 

30 """ 

31 Checks whether 'args' has length of at most 'compat_args'. Raises 

32 a TypeError if that is not the case, similar to in Python when a 

33 function is called with too many arguments. 

34 """ 

35 if max_fname_arg_count < 0: 

36 raise ValueError("'max_fname_arg_count' must be non-negative") 

37 

38 if len(args) > len(compat_args): 

39 max_arg_count = len(compat_args) + max_fname_arg_count 

40 actual_arg_count = len(args) + max_fname_arg_count 

41 argument = "argument" if max_arg_count == 1 else "arguments" 

42 

43 raise TypeError( 

44 f"{fname}() takes at most {max_arg_count} {argument} " 

45 f"({actual_arg_count} given)" 

46 ) 

47 

48 

49def _check_for_default_values(fname, arg_val_dict, compat_args) -> None: 

50 """ 

51 Check that the keys in `arg_val_dict` are mapped to their 

52 default values as specified in `compat_args`. 

53 

54 Note that this function is to be called only when it has been 

55 checked that arg_val_dict.keys() is a subset of compat_args 

56 """ 

57 for key in arg_val_dict: 

58 # try checking equality directly with '=' operator, 

59 # as comparison may have been overridden for the left 

60 # hand object 

61 try: 

62 v1 = arg_val_dict[key] 

63 v2 = compat_args[key] 

64 

65 # check for None-ness otherwise we could end up 

66 # comparing a numpy array vs None 

67 if (v1 is not None and v2 is None) or (v1 is None and v2 is not None): 

68 match = False 

69 else: 

70 match = v1 == v2 

71 

72 if not is_bool(match): 

73 raise ValueError("'match' is not a boolean") 

74 

75 # could not compare them directly, so try comparison 

76 # using the 'is' operator 

77 except ValueError: 

78 match = arg_val_dict[key] is compat_args[key] 

79 

80 if not match: 

81 raise ValueError( 

82 f"the '{key}' parameter is not supported in " 

83 f"the pandas implementation of {fname}()" 

84 ) 

85 

86 

87def validate_args(fname, args, max_fname_arg_count, compat_args) -> None: 

88 """ 

89 Checks whether the length of the `*args` argument passed into a function 

90 has at most `len(compat_args)` arguments and whether or not all of these 

91 elements in `args` are set to their default values. 

92 

93 Parameters 

94 ---------- 

95 fname : str 

96 The name of the function being passed the `*args` parameter 

97 args : tuple 

98 The `*args` parameter passed into a function 

99 max_fname_arg_count : int 

100 The maximum number of arguments that the function `fname` 

101 can accept, excluding those in `args`. Used for displaying 

102 appropriate error messages. Must be non-negative. 

103 compat_args : dict 

104 A dictionary of keys and their associated default values. 

105 In order to accommodate buggy behaviour in some versions of `numpy`, 

106 where a signature displayed keyword arguments but then passed those 

107 arguments **positionally** internally when calling downstream 

108 implementations, a dict ensures that the original 

109 order of the keyword arguments is enforced. 

110 

111 Raises 

112 ------ 

113 TypeError 

114 If `args` contains more values than there are `compat_args` 

115 ValueError 

116 If `args` contains values that do not correspond to those 

117 of the default values specified in `compat_args` 

118 """ 

119 _check_arg_length(fname, args, max_fname_arg_count, compat_args) 

120 

121 # We do this so that we can provide a more informative 

122 # error message about the parameters that we are not 

123 # supporting in the pandas implementation of 'fname' 

124 kwargs = dict(zip(compat_args, args)) 

125 _check_for_default_values(fname, kwargs, compat_args) 

126 

127 

128def _check_for_invalid_keys(fname, kwargs, compat_args) -> None: 

129 """ 

130 Checks whether 'kwargs' contains any keys that are not 

131 in 'compat_args' and raises a TypeError if there is one. 

132 """ 

133 # set(dict) --> set of the dictionary's keys 

134 diff = set(kwargs) - set(compat_args) 

135 

136 if diff: 

137 bad_arg = next(iter(diff)) 

138 raise TypeError(f"{fname}() got an unexpected keyword argument '{bad_arg}'") 

139 

140 

141def validate_kwargs(fname, kwargs, compat_args) -> None: 

142 """ 

143 Checks whether parameters passed to the **kwargs argument in a 

144 function `fname` are valid parameters as specified in `*compat_args` 

145 and whether or not they are set to their default values. 

146 

147 Parameters 

148 ---------- 

149 fname : str 

150 The name of the function being passed the `**kwargs` parameter 

151 kwargs : dict 

152 The `**kwargs` parameter passed into `fname` 

153 compat_args: dict 

154 A dictionary of keys that `kwargs` is allowed to have and their 

155 associated default values 

156 

157 Raises 

158 ------ 

159 TypeError if `kwargs` contains keys not in `compat_args` 

160 ValueError if `kwargs` contains keys in `compat_args` that do not 

161 map to the default values specified in `compat_args` 

162 """ 

163 kwds = kwargs.copy() 

164 _check_for_invalid_keys(fname, kwargs, compat_args) 

165 _check_for_default_values(fname, kwds, compat_args) 

166 

167 

168def validate_args_and_kwargs( 

169 fname, args, kwargs, max_fname_arg_count, compat_args 

170) -> None: 

171 """ 

172 Checks whether parameters passed to the *args and **kwargs argument in a 

173 function `fname` are valid parameters as specified in `*compat_args` 

174 and whether or not they are set to their default values. 

175 

176 Parameters 

177 ---------- 

178 fname: str 

179 The name of the function being passed the `**kwargs` parameter 

180 args: tuple 

181 The `*args` parameter passed into a function 

182 kwargs: dict 

183 The `**kwargs` parameter passed into `fname` 

184 max_fname_arg_count: int 

185 The minimum number of arguments that the function `fname` 

186 requires, excluding those in `args`. Used for displaying 

187 appropriate error messages. Must be non-negative. 

188 compat_args: dict 

189 A dictionary of keys that `kwargs` is allowed to 

190 have and their associated default values. 

191 

192 Raises 

193 ------ 

194 TypeError if `args` contains more values than there are 

195 `compat_args` OR `kwargs` contains keys not in `compat_args` 

196 ValueError if `args` contains values not at the default value (`None`) 

197 `kwargs` contains keys in `compat_args` that do not map to the default 

198 value as specified in `compat_args` 

199 

200 See Also 

201 -------- 

202 validate_args : Purely args validation. 

203 validate_kwargs : Purely kwargs validation. 

204 

205 """ 

206 # Check that the total number of arguments passed in (i.e. 

207 # args and kwargs) does not exceed the length of compat_args 

208 _check_arg_length( 

209 fname, args + tuple(kwargs.values()), max_fname_arg_count, compat_args 

210 ) 

211 

212 # Check there is no overlap with the positional and keyword 

213 # arguments, similar to what is done in actual Python functions 

214 args_dict = dict(zip(compat_args, args)) 

215 

216 for key in args_dict: 

217 if key in kwargs: 

218 raise TypeError( 

219 f"{fname}() got multiple values for keyword argument '{key}'" 

220 ) 

221 

222 kwargs.update(args_dict) 

223 validate_kwargs(fname, kwargs, compat_args) 

224 

225 

226def validate_bool_kwarg( 

227 value: BoolishNoneT, 

228 arg_name: str, 

229 none_allowed: bool = True, 

230 int_allowed: bool = False, 

231) -> BoolishNoneT: 

232 """ 

233 Ensure that argument passed in arg_name can be interpreted as boolean. 

234 

235 Parameters 

236 ---------- 

237 value : bool 

238 Value to be validated. 

239 arg_name : str 

240 Name of the argument. To be reflected in the error message. 

241 none_allowed : bool, default True 

242 Whether to consider None to be a valid boolean. 

243 int_allowed : bool, default False 

244 Whether to consider integer value to be a valid boolean. 

245 

246 Returns 

247 ------- 

248 value 

249 The same value as input. 

250 

251 Raises 

252 ------ 

253 ValueError 

254 If the value is not a valid boolean. 

255 """ 

256 good_value = is_bool(value) 

257 if none_allowed: 

258 good_value = good_value or (value is None) 

259 

260 if int_allowed: 

261 good_value = good_value or isinstance(value, int) 

262 

263 if not good_value: 

264 raise ValueError( 

265 f'For argument "{arg_name}" expected type bool, received ' 

266 f"type {type(value).__name__}." 

267 ) 

268 return value # pyright: ignore[reportGeneralTypeIssues] 

269 

270 

271def validate_fillna_kwargs(value, method, validate_scalar_dict_value: bool = True): 

272 """ 

273 Validate the keyword arguments to 'fillna'. 

274 

275 This checks that exactly one of 'value' and 'method' is specified. 

276 If 'method' is specified, this validates that it's a valid method. 

277 

278 Parameters 

279 ---------- 

280 value, method : object 

281 The 'value' and 'method' keyword arguments for 'fillna'. 

282 validate_scalar_dict_value : bool, default True 

283 Whether to validate that 'value' is a scalar or dict. Specifically, 

284 validate that it is not a list or tuple. 

285 

286 Returns 

287 ------- 

288 value, method : object 

289 """ 

290 from pandas.core.missing import clean_fill_method 

291 

292 if value is None and method is None: 

293 raise ValueError("Must specify a fill 'value' or 'method'.") 

294 if value is None and method is not None: 

295 method = clean_fill_method(method) 

296 

297 elif value is not None and method is None: 

298 if validate_scalar_dict_value and isinstance(value, (list, tuple)): 

299 raise TypeError( 

300 '"value" parameter must be a scalar or dict, but ' 

301 f'you passed a "{type(value).__name__}"' 

302 ) 

303 

304 elif value is not None and method is not None: 

305 raise ValueError("Cannot specify both 'value' and 'method'.") 

306 

307 return value, method 

308 

309 

310def validate_percentile(q: float | Iterable[float]) -> np.ndarray: 

311 """ 

312 Validate percentiles (used by describe and quantile). 

313 

314 This function checks if the given float or iterable of floats is a valid percentile 

315 otherwise raises a ValueError. 

316 

317 Parameters 

318 ---------- 

319 q: float or iterable of floats 

320 A single percentile or an iterable of percentiles. 

321 

322 Returns 

323 ------- 

324 ndarray 

325 An ndarray of the percentiles if valid. 

326 

327 Raises 

328 ------ 

329 ValueError if percentiles are not in given interval([0, 1]). 

330 """ 

331 q_arr = np.asarray(q) 

332 # Don't change this to an f-string. The string formatting 

333 # is too expensive for cases where we don't need it. 

334 msg = "percentiles should all be in the interval [0, 1]" 

335 if q_arr.ndim == 0: 

336 if not 0 <= q_arr <= 1: 

337 raise ValueError(msg) 

338 else: 

339 if not all(0 <= qs <= 1 for qs in q_arr): 

340 raise ValueError(msg) 

341 return q_arr 

342 

343 

344@overload 

345def validate_ascending(ascending: BoolishT) -> BoolishT: 

346 ... 

347 

348 

349@overload 

350def validate_ascending(ascending: Sequence[BoolishT]) -> list[BoolishT]: 

351 ... 

352 

353 

354def validate_ascending( 

355 ascending: bool | int | Sequence[BoolishT], 

356) -> bool | int | list[BoolishT]: 

357 """Validate ``ascending`` kwargs for ``sort_index`` method.""" 

358 kwargs = {"none_allowed": False, "int_allowed": True} 

359 if not isinstance(ascending, Sequence): 

360 return validate_bool_kwarg(ascending, "ascending", **kwargs) 

361 

362 return [validate_bool_kwarg(item, "ascending", **kwargs) for item in ascending] 

363 

364 

365def validate_endpoints(closed: str | None) -> tuple[bool, bool]: 

366 """ 

367 Check that the `closed` argument is among [None, "left", "right"] 

368 

369 Parameters 

370 ---------- 

371 closed : {None, "left", "right"} 

372 

373 Returns 

374 ------- 

375 left_closed : bool 

376 right_closed : bool 

377 

378 Raises 

379 ------ 

380 ValueError : if argument is not among valid values 

381 """ 

382 left_closed = False 

383 right_closed = False 

384 

385 if closed is None: 

386 left_closed = True 

387 right_closed = True 

388 elif closed == "left": 

389 left_closed = True 

390 elif closed == "right": 

391 right_closed = True 

392 else: 

393 raise ValueError("Closed has to be either 'left', 'right' or None") 

394 

395 return left_closed, right_closed 

396 

397 

398def validate_inclusive(inclusive: str | None) -> tuple[bool, bool]: 

399 """ 

400 Check that the `inclusive` argument is among {"both", "neither", "left", "right"}. 

401 

402 Parameters 

403 ---------- 

404 inclusive : {"both", "neither", "left", "right"} 

405 

406 Returns 

407 ------- 

408 left_right_inclusive : tuple[bool, bool] 

409 

410 Raises 

411 ------ 

412 ValueError : if argument is not among valid values 

413 """ 

414 left_right_inclusive: tuple[bool, bool] | None = None 

415 

416 if isinstance(inclusive, str): 

417 left_right_inclusive = { 

418 "both": (True, True), 

419 "left": (True, False), 

420 "right": (False, True), 

421 "neither": (False, False), 

422 }.get(inclusive) 

423 

424 if left_right_inclusive is None: 

425 raise ValueError( 

426 "Inclusive has to be either 'both', 'neither', 'left' or 'right'" 

427 ) 

428 

429 return left_right_inclusive 

430 

431 

432def validate_insert_loc(loc: int, length: int) -> int: 

433 """ 

434 Check that we have an integer between -length and length, inclusive. 

435 

436 Standardize negative loc to within [0, length]. 

437 

438 The exceptions we raise on failure match np.insert. 

439 """ 

440 if not is_integer(loc): 

441 raise TypeError(f"loc must be an integer between -{length} and {length}") 

442 

443 if loc < 0: 

444 loc += length 

445 if not 0 <= loc <= length: 

446 raise IndexError(f"loc must be an integer between -{length} and {length}") 

447 return loc # pyright: ignore[reportGeneralTypeIssues] 

448 

449 

450def check_dtype_backend(dtype_backend) -> None: 

451 if dtype_backend is not lib.no_default: 

452 if dtype_backend not in ["numpy_nullable", "pyarrow"]: 

453 raise ValueError( 

454 f"dtype_backend {dtype_backend} is invalid, only 'numpy_nullable' and " 

455 f"'pyarrow' are allowed.", 

456 )