Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pandas/core/reshape/tile.py: 13%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

181 statements  

1""" 

2Quantilization functions and related stuff 

3""" 

4from __future__ import annotations 

5 

6from typing import ( 

7 Any, 

8 Callable, 

9 Literal, 

10) 

11 

12import numpy as np 

13 

14from pandas._libs import ( 

15 Timedelta, 

16 Timestamp, 

17) 

18from pandas._libs.lib import infer_dtype 

19from pandas._typing import IntervalLeftRight 

20 

21from pandas.core.dtypes.common import ( 

22 DT64NS_DTYPE, 

23 ensure_platform_int, 

24 is_bool_dtype, 

25 is_categorical_dtype, 

26 is_datetime64_dtype, 

27 is_datetime64tz_dtype, 

28 is_datetime_or_timedelta_dtype, 

29 is_extension_array_dtype, 

30 is_integer, 

31 is_list_like, 

32 is_numeric_dtype, 

33 is_scalar, 

34 is_timedelta64_dtype, 

35) 

36from pandas.core.dtypes.generic import ABCSeries 

37from pandas.core.dtypes.missing import isna 

38 

39from pandas import ( 

40 Categorical, 

41 Index, 

42 IntervalIndex, 

43 to_datetime, 

44 to_timedelta, 

45) 

46from pandas.core import nanops 

47import pandas.core.algorithms as algos 

48 

49 

50def cut( 

51 x, 

52 bins, 

53 right: bool = True, 

54 labels=None, 

55 retbins: bool = False, 

56 precision: int = 3, 

57 include_lowest: bool = False, 

58 duplicates: str = "raise", 

59 ordered: bool = True, 

60): 

61 """ 

62 Bin values into discrete intervals. 

63 

64 Use `cut` when you need to segment and sort data values into bins. This 

65 function is also useful for going from a continuous variable to a 

66 categorical variable. For example, `cut` could convert ages to groups of 

67 age ranges. Supports binning into an equal number of bins, or a 

68 pre-specified array of bins. 

69 

70 Parameters 

71 ---------- 

72 x : array-like 

73 The input array to be binned. Must be 1-dimensional. 

74 bins : int, sequence of scalars, or IntervalIndex 

75 The criteria to bin by. 

76 

77 * int : Defines the number of equal-width bins in the range of `x`. The 

78 range of `x` is extended by .1% on each side to include the minimum 

79 and maximum values of `x`. 

80 * sequence of scalars : Defines the bin edges allowing for non-uniform 

81 width. No extension of the range of `x` is done. 

82 * IntervalIndex : Defines the exact bins to be used. Note that 

83 IntervalIndex for `bins` must be non-overlapping. 

84 

85 right : bool, default True 

86 Indicates whether `bins` includes the rightmost edge or not. If 

87 ``right == True`` (the default), then the `bins` ``[1, 2, 3, 4]`` 

88 indicate (1,2], (2,3], (3,4]. This argument is ignored when 

89 `bins` is an IntervalIndex. 

90 labels : array or False, default None 

91 Specifies the labels for the returned bins. Must be the same length as 

92 the resulting bins. If False, returns only integer indicators of the 

93 bins. This affects the type of the output container (see below). 

94 This argument is ignored when `bins` is an IntervalIndex. If True, 

95 raises an error. When `ordered=False`, labels must be provided. 

96 retbins : bool, default False 

97 Whether to return the bins or not. Useful when bins is provided 

98 as a scalar. 

99 precision : int, default 3 

100 The precision at which to store and display the bins labels. 

101 include_lowest : bool, default False 

102 Whether the first interval should be left-inclusive or not. 

103 duplicates : {default 'raise', 'drop'}, optional 

104 If bin edges are not unique, raise ValueError or drop non-uniques. 

105 ordered : bool, default True 

106 Whether the labels are ordered or not. Applies to returned types 

107 Categorical and Series (with Categorical dtype). If True, 

108 the resulting categorical will be ordered. If False, the resulting 

109 categorical will be unordered (labels must be provided). 

110 

111 .. versionadded:: 1.1.0 

112 

113 Returns 

114 ------- 

115 out : Categorical, Series, or ndarray 

116 An array-like object representing the respective bin for each value 

117 of `x`. The type depends on the value of `labels`. 

118 

119 * None (default) : returns a Series for Series `x` or a 

120 Categorical for all other inputs. The values stored within 

121 are Interval dtype. 

122 

123 * sequence of scalars : returns a Series for Series `x` or a 

124 Categorical for all other inputs. The values stored within 

125 are whatever the type in the sequence is. 

126 

127 * False : returns an ndarray of integers. 

128 

129 bins : numpy.ndarray or IntervalIndex. 

130 The computed or specified bins. Only returned when `retbins=True`. 

131 For scalar or sequence `bins`, this is an ndarray with the computed 

132 bins. If set `duplicates=drop`, `bins` will drop non-unique bin. For 

133 an IntervalIndex `bins`, this is equal to `bins`. 

134 

135 See Also 

136 -------- 

137 qcut : Discretize variable into equal-sized buckets based on rank 

138 or based on sample quantiles. 

139 Categorical : Array type for storing data that come from a 

140 fixed set of values. 

141 Series : One-dimensional array with axis labels (including time series). 

142 IntervalIndex : Immutable Index implementing an ordered, sliceable set. 

143 

144 Notes 

145 ----- 

146 Any NA values will be NA in the result. Out of bounds values will be NA in 

147 the resulting Series or Categorical object. 

148 

149 Reference :ref:`the user guide <reshaping.tile.cut>` for more examples. 

150 

151 Examples 

152 -------- 

153 Discretize into three equal-sized bins. 

154 

155 >>> pd.cut(np.array([1, 7, 5, 4, 6, 3]), 3) 

156 ... # doctest: +ELLIPSIS 

157 [(0.994, 3.0], (5.0, 7.0], (3.0, 5.0], (3.0, 5.0], (5.0, 7.0], ... 

158 Categories (3, interval[float64, right]): [(0.994, 3.0] < (3.0, 5.0] ... 

159 

160 >>> pd.cut(np.array([1, 7, 5, 4, 6, 3]), 3, retbins=True) 

161 ... # doctest: +ELLIPSIS 

162 ([(0.994, 3.0], (5.0, 7.0], (3.0, 5.0], (3.0, 5.0], (5.0, 7.0], ... 

163 Categories (3, interval[float64, right]): [(0.994, 3.0] < (3.0, 5.0] ... 

164 array([0.994, 3. , 5. , 7. ])) 

165 

166 Discovers the same bins, but assign them specific labels. Notice that 

167 the returned Categorical's categories are `labels` and is ordered. 

168 

169 >>> pd.cut(np.array([1, 7, 5, 4, 6, 3]), 

170 ... 3, labels=["bad", "medium", "good"]) 

171 ['bad', 'good', 'medium', 'medium', 'good', 'bad'] 

172 Categories (3, object): ['bad' < 'medium' < 'good'] 

173 

174 ``ordered=False`` will result in unordered categories when labels are passed. 

175 This parameter can be used to allow non-unique labels: 

176 

177 >>> pd.cut(np.array([1, 7, 5, 4, 6, 3]), 3, 

178 ... labels=["B", "A", "B"], ordered=False) 

179 ['B', 'B', 'A', 'A', 'B', 'B'] 

180 Categories (2, object): ['A', 'B'] 

181 

182 ``labels=False`` implies you just want the bins back. 

183 

184 >>> pd.cut([0, 1, 1, 2], bins=4, labels=False) 

185 array([0, 1, 1, 3]) 

186 

187 Passing a Series as an input returns a Series with categorical dtype: 

188 

189 >>> s = pd.Series(np.array([2, 4, 6, 8, 10]), 

190 ... index=['a', 'b', 'c', 'd', 'e']) 

191 >>> pd.cut(s, 3) 

192 ... # doctest: +ELLIPSIS 

193 a (1.992, 4.667] 

194 b (1.992, 4.667] 

195 c (4.667, 7.333] 

196 d (7.333, 10.0] 

197 e (7.333, 10.0] 

198 dtype: category 

199 Categories (3, interval[float64, right]): [(1.992, 4.667] < (4.667, ... 

200 

201 Passing a Series as an input returns a Series with mapping value. 

202 It is used to map numerically to intervals based on bins. 

203 

204 >>> s = pd.Series(np.array([2, 4, 6, 8, 10]), 

205 ... index=['a', 'b', 'c', 'd', 'e']) 

206 >>> pd.cut(s, [0, 2, 4, 6, 8, 10], labels=False, retbins=True, right=False) 

207 ... # doctest: +ELLIPSIS 

208 (a 1.0 

209 b 2.0 

210 c 3.0 

211 d 4.0 

212 e NaN 

213 dtype: float64, 

214 array([ 0, 2, 4, 6, 8, 10])) 

215 

216 Use `drop` optional when bins is not unique 

217 

218 >>> pd.cut(s, [0, 2, 4, 6, 10, 10], labels=False, retbins=True, 

219 ... right=False, duplicates='drop') 

220 ... # doctest: +ELLIPSIS 

221 (a 1.0 

222 b 2.0 

223 c 3.0 

224 d 3.0 

225 e NaN 

226 dtype: float64, 

227 array([ 0, 2, 4, 6, 10])) 

228 

229 Passing an IntervalIndex for `bins` results in those categories exactly. 

230 Notice that values not covered by the IntervalIndex are set to NaN. 0 

231 is to the left of the first bin (which is closed on the right), and 1.5 

232 falls between two bins. 

233 

234 >>> bins = pd.IntervalIndex.from_tuples([(0, 1), (2, 3), (4, 5)]) 

235 >>> pd.cut([0, 0.5, 1.5, 2.5, 4.5], bins) 

236 [NaN, (0.0, 1.0], NaN, (2.0, 3.0], (4.0, 5.0]] 

237 Categories (3, interval[int64, right]): [(0, 1] < (2, 3] < (4, 5]] 

238 """ 

239 # NOTE: this binning code is changed a bit from histogram for var(x) == 0 

240 

241 original = x 

242 x = _preprocess_for_cut(x) 

243 x, dtype = _coerce_to_type(x) 

244 

245 if not np.iterable(bins): 

246 if is_scalar(bins) and bins < 1: 

247 raise ValueError("`bins` should be a positive integer.") 

248 

249 try: # for array-like 

250 sz = x.size 

251 except AttributeError: 

252 x = np.asarray(x) 

253 sz = x.size 

254 

255 if sz == 0: 

256 raise ValueError("Cannot cut empty array") 

257 

258 rng = (nanops.nanmin(x), nanops.nanmax(x)) 

259 mn, mx = (mi + 0.0 for mi in rng) 

260 

261 if np.isinf(mn) or np.isinf(mx): 

262 # GH 24314 

263 raise ValueError( 

264 "cannot specify integer `bins` when input data contains infinity" 

265 ) 

266 if mn == mx: # adjust end points before binning 

267 mn -= 0.001 * abs(mn) if mn != 0 else 0.001 

268 mx += 0.001 * abs(mx) if mx != 0 else 0.001 

269 bins = np.linspace(mn, mx, bins + 1, endpoint=True) 

270 else: # adjust end points after binning 

271 bins = np.linspace(mn, mx, bins + 1, endpoint=True) 

272 adj = (mx - mn) * 0.001 # 0.1% of the range 

273 if right: 

274 bins[0] -= adj 

275 else: 

276 bins[-1] += adj 

277 

278 elif isinstance(bins, IntervalIndex): 

279 if bins.is_overlapping: 

280 raise ValueError("Overlapping IntervalIndex is not accepted.") 

281 

282 else: 

283 if is_datetime64tz_dtype(bins): 

284 bins = np.asarray(bins, dtype=DT64NS_DTYPE) 

285 else: 

286 bins = np.asarray(bins) 

287 bins = _convert_bin_to_numeric_type(bins, dtype) 

288 

289 # GH 26045: cast to float64 to avoid an overflow 

290 if (np.diff(bins.astype("float64")) < 0).any(): 

291 raise ValueError("bins must increase monotonically.") 

292 

293 fac, bins = _bins_to_cuts( 

294 x, 

295 bins, 

296 right=right, 

297 labels=labels, 

298 precision=precision, 

299 include_lowest=include_lowest, 

300 dtype=dtype, 

301 duplicates=duplicates, 

302 ordered=ordered, 

303 ) 

304 

305 return _postprocess_for_cut(fac, bins, retbins, dtype, original) 

306 

307 

308def qcut( 

309 x, 

310 q, 

311 labels=None, 

312 retbins: bool = False, 

313 precision: int = 3, 

314 duplicates: str = "raise", 

315): 

316 """ 

317 Quantile-based discretization function. 

318 

319 Discretize variable into equal-sized buckets based on rank or based 

320 on sample quantiles. For example 1000 values for 10 quantiles would 

321 produce a Categorical object indicating quantile membership for each data point. 

322 

323 Parameters 

324 ---------- 

325 x : 1d ndarray or Series 

326 q : int or list-like of float 

327 Number of quantiles. 10 for deciles, 4 for quartiles, etc. Alternately 

328 array of quantiles, e.g. [0, .25, .5, .75, 1.] for quartiles. 

329 labels : array or False, default None 

330 Used as labels for the resulting bins. Must be of the same length as 

331 the resulting bins. If False, return only integer indicators of the 

332 bins. If True, raises an error. 

333 retbins : bool, optional 

334 Whether to return the (bins, labels) or not. Can be useful if bins 

335 is given as a scalar. 

336 precision : int, optional 

337 The precision at which to store and display the bins labels. 

338 duplicates : {default 'raise', 'drop'}, optional 

339 If bin edges are not unique, raise ValueError or drop non-uniques. 

340 

341 Returns 

342 ------- 

343 out : Categorical or Series or array of integers if labels is False 

344 The return type (Categorical or Series) depends on the input: a Series 

345 of type category if input is a Series else Categorical. Bins are 

346 represented as categories when categorical data is returned. 

347 bins : ndarray of floats 

348 Returned only if `retbins` is True. 

349 

350 Notes 

351 ----- 

352 Out of bounds values will be NA in the resulting Categorical object 

353 

354 Examples 

355 -------- 

356 >>> pd.qcut(range(5), 4) 

357 ... # doctest: +ELLIPSIS 

358 [(-0.001, 1.0], (-0.001, 1.0], (1.0, 2.0], (2.0, 3.0], (3.0, 4.0]] 

359 Categories (4, interval[float64, right]): [(-0.001, 1.0] < (1.0, 2.0] ... 

360 

361 >>> pd.qcut(range(5), 3, labels=["good", "medium", "bad"]) 

362 ... # doctest: +SKIP 

363 [good, good, medium, bad, bad] 

364 Categories (3, object): [good < medium < bad] 

365 

366 >>> pd.qcut(range(5), 4, labels=False) 

367 array([0, 0, 1, 2, 3]) 

368 """ 

369 original = x 

370 x = _preprocess_for_cut(x) 

371 x, dtype = _coerce_to_type(x) 

372 

373 quantiles = np.linspace(0, 1, q + 1) if is_integer(q) else q 

374 

375 x_np = np.asarray(x) 

376 x_np = x_np[~np.isnan(x_np)] 

377 bins = np.quantile(x_np, quantiles) 

378 

379 fac, bins = _bins_to_cuts( 

380 x, 

381 bins, 

382 labels=labels, 

383 precision=precision, 

384 include_lowest=True, 

385 dtype=dtype, 

386 duplicates=duplicates, 

387 ) 

388 

389 return _postprocess_for_cut(fac, bins, retbins, dtype, original) 

390 

391 

392def _bins_to_cuts( 

393 x, 

394 bins: np.ndarray, 

395 right: bool = True, 

396 labels=None, 

397 precision: int = 3, 

398 include_lowest: bool = False, 

399 dtype=None, 

400 duplicates: str = "raise", 

401 ordered: bool = True, 

402): 

403 if not ordered and labels is None: 

404 raise ValueError("'labels' must be provided if 'ordered = False'") 

405 

406 if duplicates not in ["raise", "drop"]: 

407 raise ValueError( 

408 "invalid value for 'duplicates' parameter, valid options are: raise, drop" 

409 ) 

410 

411 if isinstance(bins, IntervalIndex): 

412 # we have a fast-path here 

413 ids = bins.get_indexer(x) 

414 result = Categorical.from_codes(ids, categories=bins, ordered=True) 

415 return result, bins 

416 

417 unique_bins = algos.unique(bins) 

418 if len(unique_bins) < len(bins) and len(bins) != 2: 

419 if duplicates == "raise": 

420 raise ValueError( 

421 f"Bin edges must be unique: {repr(bins)}.\n" 

422 f"You can drop duplicate edges by setting the 'duplicates' kwarg" 

423 ) 

424 bins = unique_bins 

425 

426 side: Literal["left", "right"] = "left" if right else "right" 

427 ids = ensure_platform_int(bins.searchsorted(x, side=side)) 

428 

429 if include_lowest: 

430 ids[np.asarray(x) == bins[0]] = 1 

431 

432 na_mask = isna(x) | (ids == len(bins)) | (ids == 0) 

433 has_nas = na_mask.any() 

434 

435 if labels is not False: 

436 if not (labels is None or is_list_like(labels)): 

437 raise ValueError( 

438 "Bin labels must either be False, None or passed in as a " 

439 "list-like argument" 

440 ) 

441 

442 if labels is None: 

443 labels = _format_labels( 

444 bins, precision, right=right, include_lowest=include_lowest, dtype=dtype 

445 ) 

446 elif ordered and len(set(labels)) != len(labels): 

447 raise ValueError( 

448 "labels must be unique if ordered=True; pass ordered=False " 

449 "for duplicate labels" 

450 ) 

451 else: 

452 if len(labels) != len(bins) - 1: 

453 raise ValueError( 

454 "Bin labels must be one fewer than the number of bin edges" 

455 ) 

456 if not is_categorical_dtype(labels): 

457 labels = Categorical( 

458 labels, 

459 categories=labels if len(set(labels)) == len(labels) else None, 

460 ordered=ordered, 

461 ) 

462 # TODO: handle mismatch between categorical label order and pandas.cut order. 

463 np.putmask(ids, na_mask, 0) 

464 result = algos.take_nd(labels, ids - 1) 

465 

466 else: 

467 result = ids - 1 

468 if has_nas: 

469 result = result.astype(np.float64) 

470 np.putmask(result, na_mask, np.nan) 

471 

472 return result, bins 

473 

474 

475def _coerce_to_type(x): 

476 """ 

477 if the passed data is of datetime/timedelta, bool or nullable int type, 

478 this method converts it to numeric so that cut or qcut method can 

479 handle it 

480 """ 

481 dtype = None 

482 

483 if is_datetime64tz_dtype(x.dtype): 

484 dtype = x.dtype 

485 elif is_datetime64_dtype(x.dtype): 

486 x = to_datetime(x).astype("datetime64[ns]", copy=False) 

487 dtype = np.dtype("datetime64[ns]") 

488 elif is_timedelta64_dtype(x.dtype): 

489 x = to_timedelta(x) 

490 dtype = np.dtype("timedelta64[ns]") 

491 elif is_bool_dtype(x.dtype): 

492 # GH 20303 

493 x = x.astype(np.int64) 

494 # To support cut and qcut for IntegerArray we convert to float dtype. 

495 # Will properly support in the future. 

496 # https://github.com/pandas-dev/pandas/pull/31290 

497 # https://github.com/pandas-dev/pandas/issues/31389 

498 elif is_extension_array_dtype(x.dtype) and is_numeric_dtype(x.dtype): 

499 x = x.to_numpy(dtype=np.float64, na_value=np.nan) 

500 

501 if dtype is not None: 

502 # GH 19768: force NaT to NaN during integer conversion 

503 x = np.where(x.notna(), x.view(np.int64), np.nan) 

504 

505 return x, dtype 

506 

507 

508def _convert_bin_to_numeric_type(bins, dtype): 

509 """ 

510 if the passed bin is of datetime/timedelta type, 

511 this method converts it to integer 

512 

513 Parameters 

514 ---------- 

515 bins : list-like of bins 

516 dtype : dtype of data 

517 

518 Raises 

519 ------ 

520 ValueError if bins are not of a compat dtype to dtype 

521 """ 

522 bins_dtype = infer_dtype(bins, skipna=False) 

523 if is_timedelta64_dtype(dtype): 

524 if bins_dtype in ["timedelta", "timedelta64"]: 

525 bins = to_timedelta(bins).view(np.int64) 

526 else: 

527 raise ValueError("bins must be of timedelta64 dtype") 

528 elif is_datetime64_dtype(dtype) or is_datetime64tz_dtype(dtype): 

529 if bins_dtype in ["datetime", "datetime64"]: 

530 bins = to_datetime(bins) 

531 if is_datetime64_dtype(bins): 

532 # As of 2.0, to_datetime may give non-nano, so we need to convert 

533 # here until the rest of this file recognizes non-nano 

534 bins = bins.astype("datetime64[ns]", copy=False) 

535 bins = bins.view(np.int64) 

536 else: 

537 raise ValueError("bins must be of datetime64 dtype") 

538 

539 return bins 

540 

541 

542def _convert_bin_to_datelike_type(bins, dtype): 

543 """ 

544 Convert bins to a DatetimeIndex or TimedeltaIndex if the original dtype is 

545 datelike 

546 

547 Parameters 

548 ---------- 

549 bins : list-like of bins 

550 dtype : dtype of data 

551 

552 Returns 

553 ------- 

554 bins : Array-like of bins, DatetimeIndex or TimedeltaIndex if dtype is 

555 datelike 

556 """ 

557 if is_datetime64tz_dtype(dtype): 

558 bins = to_datetime(bins.astype(np.int64), utc=True).tz_convert(dtype.tz) 

559 elif is_datetime_or_timedelta_dtype(dtype): 

560 bins = Index(bins.astype(np.int64), dtype=dtype) 

561 return bins 

562 

563 

564def _format_labels( 

565 bins, precision: int, right: bool = True, include_lowest: bool = False, dtype=None 

566): 

567 """based on the dtype, return our labels""" 

568 closed: IntervalLeftRight = "right" if right else "left" 

569 

570 formatter: Callable[[Any], Timestamp] | Callable[[Any], Timedelta] 

571 

572 if is_datetime64tz_dtype(dtype): 

573 formatter = lambda x: Timestamp(x, tz=dtype.tz) 

574 adjust = lambda x: x - Timedelta("1ns") 

575 elif is_datetime64_dtype(dtype): 

576 formatter = Timestamp 

577 adjust = lambda x: x - Timedelta("1ns") 

578 elif is_timedelta64_dtype(dtype): 

579 formatter = Timedelta 

580 adjust = lambda x: x - Timedelta("1ns") 

581 else: 

582 precision = _infer_precision(precision, bins) 

583 formatter = lambda x: _round_frac(x, precision) 

584 adjust = lambda x: x - 10 ** (-precision) 

585 

586 breaks = [formatter(b) for b in bins] 

587 if right and include_lowest: 

588 # adjust lhs of first interval by precision to account for being right closed 

589 breaks[0] = adjust(breaks[0]) 

590 

591 return IntervalIndex.from_breaks(breaks, closed=closed) 

592 

593 

594def _preprocess_for_cut(x): 

595 """ 

596 handles preprocessing for cut where we convert passed 

597 input to array, strip the index information and store it 

598 separately 

599 """ 

600 # Check that the passed array is a Pandas or Numpy object 

601 # We don't want to strip away a Pandas data-type here (e.g. datetimetz) 

602 ndim = getattr(x, "ndim", None) 

603 if ndim is None: 

604 x = np.asarray(x) 

605 if x.ndim != 1: 

606 raise ValueError("Input array must be 1 dimensional") 

607 

608 return x 

609 

610 

611def _postprocess_for_cut(fac, bins, retbins: bool, dtype, original): 

612 """ 

613 handles post processing for the cut method where 

614 we combine the index information if the originally passed 

615 datatype was a series 

616 """ 

617 if isinstance(original, ABCSeries): 

618 fac = original._constructor(fac, index=original.index, name=original.name) 

619 

620 if not retbins: 

621 return fac 

622 

623 bins = _convert_bin_to_datelike_type(bins, dtype) 

624 

625 return fac, bins 

626 

627 

628def _round_frac(x, precision: int): 

629 """ 

630 Round the fractional part of the given number 

631 """ 

632 if not np.isfinite(x) or x == 0: 

633 return x 

634 else: 

635 frac, whole = np.modf(x) 

636 if whole == 0: 

637 digits = -int(np.floor(np.log10(abs(frac)))) - 1 + precision 

638 else: 

639 digits = precision 

640 return np.around(x, digits) 

641 

642 

643def _infer_precision(base_precision: int, bins) -> int: 

644 """ 

645 Infer an appropriate precision for _round_frac 

646 """ 

647 for precision in range(base_precision, 20): 

648 levels = [_round_frac(b, precision) for b in bins] 

649 if algos.unique(levels).size == bins.size: 

650 return precision 

651 return base_precision # default