Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/scikit_learn-1.4.dev0-py3.8-linux-x86_64.egg/sklearn/utils/stats.py: 12%

24 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-12-12 06:31 +0000

1import numpy as np 

2 

3from .extmath import stable_cumsum 

4 

5 

6def _weighted_percentile(array, sample_weight, percentile=50): 

7 """Compute weighted percentile 

8 

9 Computes lower weighted percentile. If `array` is a 2D array, the 

10 `percentile` is computed along the axis 0. 

11 

12 .. versionchanged:: 0.24 

13 Accepts 2D `array`. 

14 

15 Parameters 

16 ---------- 

17 array : 1D or 2D array 

18 Values to take the weighted percentile of. 

19 

20 sample_weight: 1D or 2D array 

21 Weights for each value in `array`. Must be same shape as `array` or 

22 of shape `(array.shape[0],)`. 

23 

24 percentile: int or float, default=50 

25 Percentile to compute. Must be value between 0 and 100. 

26 

27 Returns 

28 ------- 

29 percentile : int if `array` 1D, ndarray if `array` 2D 

30 Weighted percentile. 

31 """ 

32 n_dim = array.ndim 

33 if n_dim == 0: 

34 return array[()] 

35 if array.ndim == 1: 

36 array = array.reshape((-1, 1)) 

37 # When sample_weight 1D, repeat for each array.shape[1] 

38 if array.shape != sample_weight.shape and array.shape[0] == sample_weight.shape[0]: 

39 sample_weight = np.tile(sample_weight, (array.shape[1], 1)).T 

40 sorted_idx = np.argsort(array, axis=0) 

41 sorted_weights = np.take_along_axis(sample_weight, sorted_idx, axis=0) 

42 

43 # Find index of median prediction for each sample 

44 weight_cdf = stable_cumsum(sorted_weights, axis=0) 

45 adjusted_percentile = percentile / 100 * weight_cdf[-1] 

46 

47 # For percentile=0, ignore leading observations with sample_weight=0. GH20528 

48 mask = adjusted_percentile == 0 

49 adjusted_percentile[mask] = np.nextafter( 

50 adjusted_percentile[mask], adjusted_percentile[mask] + 1 

51 ) 

52 

53 percentile_idx = np.array( 

54 [ 

55 np.searchsorted(weight_cdf[:, i], adjusted_percentile[i]) 

56 for i in range(weight_cdf.shape[1]) 

57 ] 

58 ) 

59 percentile_idx = np.array(percentile_idx) 

60 # In rare cases, percentile_idx equals to sorted_idx.shape[0] 

61 max_idx = sorted_idx.shape[0] - 1 

62 percentile_idx = np.apply_along_axis( 

63 lambda x: np.clip(x, 0, max_idx), axis=0, arr=percentile_idx 

64 ) 

65 

66 col_index = np.arange(array.shape[1]) 

67 percentile_in_sorted = sorted_idx[percentile_idx, col_index] 

68 percentile = array[percentile_in_sorted, col_index] 

69 return percentile[0] if n_dim == 1 else percentile