1import numpy as np
2
3from .extmath import stable_cumsum
4
5
6def _weighted_percentile(array, sample_weight, percentile=50):
7 """Compute weighted percentile
8
9 Computes lower weighted percentile. If `array` is a 2D array, the
10 `percentile` is computed along the axis 0.
11
12 .. versionchanged:: 0.24
13 Accepts 2D `array`.
14
15 Parameters
16 ----------
17 array : 1D or 2D array
18 Values to take the weighted percentile of.
19
20 sample_weight: 1D or 2D array
21 Weights for each value in `array`. Must be same shape as `array` or
22 of shape `(array.shape[0],)`.
23
24 percentile: int or float, default=50
25 Percentile to compute. Must be value between 0 and 100.
26
27 Returns
28 -------
29 percentile : int if `array` 1D, ndarray if `array` 2D
30 Weighted percentile.
31 """
32 n_dim = array.ndim
33 if n_dim == 0:
34 return array[()]
35 if array.ndim == 1:
36 array = array.reshape((-1, 1))
37 # When sample_weight 1D, repeat for each array.shape[1]
38 if array.shape != sample_weight.shape and array.shape[0] == sample_weight.shape[0]:
39 sample_weight = np.tile(sample_weight, (array.shape[1], 1)).T
40 sorted_idx = np.argsort(array, axis=0)
41 sorted_weights = np.take_along_axis(sample_weight, sorted_idx, axis=0)
42
43 # Find index of median prediction for each sample
44 weight_cdf = stable_cumsum(sorted_weights, axis=0)
45 adjusted_percentile = percentile / 100 * weight_cdf[-1]
46
47 # For percentile=0, ignore leading observations with sample_weight=0. GH20528
48 mask = adjusted_percentile == 0
49 adjusted_percentile[mask] = np.nextafter(
50 adjusted_percentile[mask], adjusted_percentile[mask] + 1
51 )
52
53 percentile_idx = np.array(
54 [
55 np.searchsorted(weight_cdf[:, i], adjusted_percentile[i])
56 for i in range(weight_cdf.shape[1])
57 ]
58 )
59 percentile_idx = np.array(percentile_idx)
60 # In rare cases, percentile_idx equals to sorted_idx.shape[0]
61 max_idx = sorted_idx.shape[0] - 1
62 percentile_idx = np.apply_along_axis(
63 lambda x: np.clip(x, 0, max_idx), axis=0, arr=percentile_idx
64 )
65
66 col_index = np.arange(array.shape[1])
67 percentile_in_sorted = sorted_idx[percentile_idx, col_index]
68 percentile = array[percentile_in_sorted, col_index]
69 return percentile[0] if n_dim == 1 else percentile