Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/scipy/stats/_binned

1import builtins

2import numpy as np

3from numpy.testing import suppress_warnings

4from operator import index

5from collections import namedtuple

7__all__ = ['binned_statistic',

8 'binned_statistic_2d',

9 'binned_statistic_dd']

12BinnedStatisticResult = namedtuple('BinnedStatisticResult',

13 ('statistic', 'bin_edges', 'binnumber'))

16def binned_statistic(x, values, statistic='mean',

17 bins=10, range=None):

18 """

19 Compute a binned statistic for one or more sets of data.

21 This is a generalization of a histogram function. A histogram divides

22 the space into bins, and returns the count of the number of points in

23 each bin. This function allows the computation of the sum, mean, median,

24 or other statistic of the values (or set of values) within each bin.

26 Parameters

27 ----------

28 x : (N,) array_like

29 A sequence of values to be binned.

30 values : (N,) array_like or list of (N,) array_like

31 The data on which the statistic will be computed. This must be

32 the same shape as `x`, or a set of sequences - each the same shape as

33 `x`. If `values` is a set of sequences, the statistic will be computed

34 on each independently.

35 statistic : string or callable, optional

36 The statistic to compute (default is 'mean').

37 The following statistics are available:

39 * 'mean' : compute the mean of values for points within each bin.

40 Empty bins will be represented by NaN.

41 * 'std' : compute the standard deviation within each bin. This

42 is implicitly calculated with ddof=0.

43 * 'median' : compute the median of values for points within each

44 bin. Empty bins will be represented by NaN.

45 * 'count' : compute the count of points within each bin. This is

46 identical to an unweighted histogram. `values` array is not

47 referenced.

48 * 'sum' : compute the sum of values for points within each bin.

49 This is identical to a weighted histogram.

50 * 'min' : compute the minimum of values for points within each bin.

51 Empty bins will be represented by NaN.

52 * 'max' : compute the maximum of values for point within each bin.

53 Empty bins will be represented by NaN.

54 * function : a user-defined function which takes a 1D array of

55 values, and outputs a single numerical statistic. This function

56 will be called on the values in each bin. Empty bins will be

57 represented by function([]), or NaN if this returns an error.

59 bins : int or sequence of scalars, optional

60 If `bins` is an int, it defines the number of equal-width bins in the

61 given range (10 by default). If `bins` is a sequence, it defines the

62 bin edges, including the rightmost edge, allowing for non-uniform bin

63 widths. Values in `x` that are smaller than lowest bin edge are

64 assigned to bin number 0, values beyond the highest bin are assigned to

65 ``bins[-1]``. If the bin edges are specified, the number of bins will

66 be, (nx = len(bins)-1).

67 range : (float, float) or [(float, float)], optional

68 The lower and upper range of the bins. If not provided, range

69 is simply ``(x.min(), x.max())``. Values outside the range are

70 ignored.

72 Returns

73 -------

74 statistic : array

75 The values of the selected statistic in each bin.

76 bin_edges : array of dtype float

77 Return the bin edges ``(length(statistic)+1)``.

78 binnumber: 1-D ndarray of ints

79 Indices of the bins (corresponding to `bin_edges`) in which each value

80 of `x` belongs. Same length as `values`. A binnumber of `i` means the

81 corresponding value is between (bin_edges[i-1], bin_edges[i]).

83 See Also

84 --------

85 numpy.digitize, numpy.histogram, binned_statistic_2d, binned_statistic_dd

87 Notes

88 -----

89 All but the last (righthand-most) bin is half-open. In other words, if

90 `bins` is ``[1, 2, 3, 4]``, then the first bin is ``[1, 2)`` (including 1,

91 but excluding 2) and the second ``[2, 3)``. The last bin, however, is

92 ``[3, 4]``, which *includes* 4.

94 .. versionadded:: 0.11.0

96 Examples

97 --------

98 >>> import numpy as np

99 >>> from scipy import stats

100 >>> import matplotlib.pyplot as plt

101

102 First some basic examples:

103

104 Create two evenly spaced bins in the range of the given sample, and sum the

105 corresponding values in each of those bins:

106

107 >>> values = [1.0, 1.0, 2.0, 1.5, 3.0]

108 >>> stats.binned_statistic([1, 1, 2, 5, 7], values, 'sum', bins=2)

109 BinnedStatisticResult(statistic=array([4. , 4.5]),

110 bin_edges=array([1., 4., 7.]), binnumber=array([1, 1, 1, 2, 2]))

111

112 Multiple arrays of values can also be passed. The statistic is calculated

113 on each set independently:

114

115 >>> values = [[1.0, 1.0, 2.0, 1.5, 3.0], [2.0, 2.0, 4.0, 3.0, 6.0]]

116 >>> stats.binned_statistic([1, 1, 2, 5, 7], values, 'sum', bins=2)

117 BinnedStatisticResult(statistic=array([[4. , 4.5],

118 [8. , 9. ]]), bin_edges=array([1., 4., 7.]),

119 binnumber=array([1, 1, 1, 2, 2]))

120

121 >>> stats.binned_statistic([1, 2, 1, 2, 4], np.arange(5), statistic='mean',

122 ... bins=3)

123 BinnedStatisticResult(statistic=array([1., 2., 4.]),

124 bin_edges=array([1., 2., 3., 4.]),

125 binnumber=array([1, 2, 1, 2, 3]))

126

127 As a second example, we now generate some random data of sailing boat speed

128 as a function of wind speed, and then determine how fast our boat is for

129 certain wind speeds:

130

131 >>> rng = np.random.default_rng()

132 >>> windspeed = 8 * rng.random(500)

133 >>> boatspeed = .3 * windspeed**.5 + .2 * rng.random(500)

134 >>> bin_means, bin_edges, binnumber = stats.binned_statistic(windspeed,

135 ... boatspeed, statistic='median', bins=[1,2,3,4,5,6,7])

136 >>> plt.figure()

137 >>> plt.plot(windspeed, boatspeed, 'b.', label='raw data')

138 >>> plt.hlines(bin_means, bin_edges[:-1], bin_edges[1:], colors='g', lw=5,

139 ... label='binned statistic of data')

140 >>> plt.legend()

141

142 Now we can use ``binnumber`` to select all datapoints with a windspeed

143 below 1:

144

145 >>> low_boatspeed = boatspeed[binnumber == 0]

146

147 As a final example, we will use ``bin_edges`` and ``binnumber`` to make a

148 plot of a distribution that shows the mean and distribution around that

149 mean per bin, on top of a regular histogram and the probability

150 distribution function:

151

152 >>> x = np.linspace(0, 5, num=500)

153 >>> x_pdf = stats.maxwell.pdf(x)

154 >>> samples = stats.maxwell.rvs(size=10000)

155

156 >>> bin_means, bin_edges, binnumber = stats.binned_statistic(x, x_pdf,

157 ... statistic='mean', bins=25)

158 >>> bin_width = (bin_edges[1] - bin_edges[0])

159 >>> bin_centers = bin_edges[1:] - bin_width/2

160

161 >>> plt.figure()

162 >>> plt.hist(samples, bins=50, density=True, histtype='stepfilled',

163 ... alpha=0.2, label='histogram of data')

164 >>> plt.plot(x, x_pdf, 'r-', label='analytical pdf')

165 >>> plt.hlines(bin_means, bin_edges[:-1], bin_edges[1:], colors='g', lw=2,

166 ... label='binned statistic of data')

167 >>> plt.plot((binnumber - 0.5) * bin_width, x_pdf, 'g.', alpha=0.5)

168 >>> plt.legend(fontsize=10)

169 >>> plt.show()

170

171 """

172 try:

173 N = len(bins)

174 except TypeError:

175 N = 1

176

177 if N != 1:

178 bins = [np.asarray(bins, float)]

179

180 if range is not None:

181 if len(range) == 2:

182 range = [range]

183

184 medians, edges, binnumbers = binned_statistic_dd(

185 [x], values, statistic, bins, range)

186

187 return BinnedStatisticResult(medians, edges[0], binnumbers)

188

189

190BinnedStatistic2dResult = namedtuple('BinnedStatistic2dResult',

191 ('statistic', 'x_edge', 'y_edge',

192 'binnumber'))

193

194

195def binned_statistic_2d(x, y, values, statistic='mean',

196 bins=10, range=None, expand_binnumbers=False):

197 """

198 Compute a bidimensional binned statistic for one or more sets of data.

199

200 This is a generalization of a histogram2d function. A histogram divides

201 the space into bins, and returns the count of the number of points in

202 each bin. This function allows the computation of the sum, mean, median,

203 or other statistic of the values (or set of values) within each bin.

204

205 Parameters

206 ----------

207 x : (N,) array_like

208 A sequence of values to be binned along the first dimension.

209 y : (N,) array_like

210 A sequence of values to be binned along the second dimension.

211 values : (N,) array_like or list of (N,) array_like

212 The data on which the statistic will be computed. This must be

213 the same shape as `x`, or a list of sequences - each with the same

214 shape as `x`. If `values` is such a list, the statistic will be

215 computed on each independently.

216 statistic : string or callable, optional

217 The statistic to compute (default is 'mean').

218 The following statistics are available:

219

220 * 'mean' : compute the mean of values for points within each bin.

221 Empty bins will be represented by NaN.

222 * 'std' : compute the standard deviation within each bin. This

223 is implicitly calculated with ddof=0.

224 * 'median' : compute the median of values for points within each

225 bin. Empty bins will be represented by NaN.

226 * 'count' : compute the count of points within each bin. This is

227 identical to an unweighted histogram. `values` array is not

228 referenced.

229 * 'sum' : compute the sum of values for points within each bin.

230 This is identical to a weighted histogram.

231 * 'min' : compute the minimum of values for points within each bin.

232 Empty bins will be represented by NaN.

233 * 'max' : compute the maximum of values for point within each bin.

234 Empty bins will be represented by NaN.

235 * function : a user-defined function which takes a 1D array of

236 values, and outputs a single numerical statistic. This function

237 will be called on the values in each bin. Empty bins will be

238 represented by function([]), or NaN if this returns an error.

239

240 bins : int or [int, int] or array_like or [array, array], optional

241 The bin specification:

242

243 * the number of bins for the two dimensions (nx = ny = bins),

244 * the number of bins in each dimension (nx, ny = bins),

245 * the bin edges for the two dimensions (x_edge = y_edge = bins),

246 * the bin edges in each dimension (x_edge, y_edge = bins).

247

248 If the bin edges are specified, the number of bins will be,

249 (nx = len(x_edge)-1, ny = len(y_edge)-1).

250

251 range : (2,2) array_like, optional

252 The leftmost and rightmost edges of the bins along each dimension

253 (if not specified explicitly in the `bins` parameters):

254 [[xmin, xmax], [ymin, ymax]]. All values outside of this range will be

255 considered outliers and not tallied in the histogram.

256 expand_binnumbers : bool, optional

257 'False' (default): the returned `binnumber` is a shape (N,) array of

258 linearized bin indices.

259 'True': the returned `binnumber` is 'unraveled' into a shape (2,N)

260 ndarray, where each row gives the bin numbers in the corresponding

261 dimension.

262 See the `binnumber` returned value, and the `Examples` section.

263

264 .. versionadded:: 0.17.0

265

266 Returns

267 -------

268 statistic : (nx, ny) ndarray

269 The values of the selected statistic in each two-dimensional bin.

270 x_edge : (nx + 1) ndarray

271 The bin edges along the first dimension.

272 y_edge : (ny + 1) ndarray

273 The bin edges along the second dimension.

274 binnumber : (N,) array of ints or (2,N) ndarray of ints

275 This assigns to each element of `sample` an integer that represents the

276 bin in which this observation falls. The representation depends on the

277 `expand_binnumbers` argument. See `Notes` for details.

278

279

280 See Also

281 --------

282 numpy.digitize, numpy.histogram2d, binned_statistic, binned_statistic_dd

283

284 Notes

285 -----

286 Binedges:

287 All but the last (righthand-most) bin is half-open. In other words, if

288 `bins` is ``[1, 2, 3, 4]``, then the first bin is ``[1, 2)`` (including 1,

289 but excluding 2) and the second ``[2, 3)``. The last bin, however, is

290 ``[3, 4]``, which *includes* 4.

291

292 `binnumber`:

293 This returned argument assigns to each element of `sample` an integer that

294 represents the bin in which it belongs. The representation depends on the

295 `expand_binnumbers` argument. If 'False' (default): The returned

296 `binnumber` is a shape (N,) array of linearized indices mapping each

297 element of `sample` to its corresponding bin (using row-major ordering).

298 Note that the returned linearized bin indices are used for an array with

299 extra bins on the outer binedges to capture values outside of the defined

300 bin bounds.

301 If 'True': The returned `binnumber` is a shape (2,N) ndarray where

302 each row indicates bin placements for each dimension respectively. In each

303 dimension, a binnumber of `i` means the corresponding value is between

304 (D_edge[i-1], D_edge[i]), where 'D' is either 'x' or 'y'.

305

306 .. versionadded:: 0.11.0

307

308 Examples

309 --------

310 >>> from scipy import stats

311

312 Calculate the counts with explicit bin-edges:

313

314 >>> x = [0.1, 0.1, 0.1, 0.6]

315 >>> y = [2.1, 2.6, 2.1, 2.1]

316 >>> binx = [0.0, 0.5, 1.0]

317 >>> biny = [2.0, 2.5, 3.0]

318 >>> ret = stats.binned_statistic_2d(x, y, None, 'count', bins=[binx, biny])

319 >>> ret.statistic

320 array([[2., 1.],

321 [1., 0.]])

322

323 The bin in which each sample is placed is given by the `binnumber`

324 returned parameter. By default, these are the linearized bin indices:

325

326 >>> ret.binnumber

327 array([5, 6, 5, 9])

328

329 The bin indices can also be expanded into separate entries for each

330 dimension using the `expand_binnumbers` parameter:

331

332 >>> ret = stats.binned_statistic_2d(x, y, None, 'count', bins=[binx, biny],

333 ... expand_binnumbers=True)

334 >>> ret.binnumber

335 array([[1, 1, 1, 2],

336 [1, 2, 1, 1]])

337

338 Which shows that the first three elements belong in the xbin 1, and the

339 fourth into xbin 2; and so on for y.

340

341 """

342

343 # This code is based on np.histogram2d

344 try:

345 N = len(bins)

346 except TypeError:

347 N = 1

348

349 if N != 1 and N != 2:

350 xedges = yedges = np.asarray(bins, float)

351 bins = [xedges, yedges]

352

353 medians, edges, binnumbers = binned_statistic_dd(

354 [x, y], values, statistic, bins, range,

355 expand_binnumbers=expand_binnumbers)

356

357 return BinnedStatistic2dResult(medians, edges[0], edges[1], binnumbers)

358

359

360BinnedStatisticddResult = namedtuple('BinnedStatisticddResult',

361 ('statistic', 'bin_edges',

362 'binnumber'))

363

364

365def _bincount(x, weights):

366 if np.iscomplexobj(weights):

367 a = np.bincount(x, np.real(weights))

368 b = np.bincount(x, np.imag(weights))

369 z = a + b*1j

370

371 else:

372 z = np.bincount(x, weights)

373 return z

374

375

376def binned_statistic_dd(sample, values, statistic='mean',

377 bins=10, range=None, expand_binnumbers=False,

378 binned_statistic_result=None):

379 """

380 Compute a multidimensional binned statistic for a set of data.

381

382 This is a generalization of a histogramdd function. A histogram divides

383 the space into bins, and returns the count of the number of points in

384 each bin. This function allows the computation of the sum, mean, median,

385 or other statistic of the values within each bin.

386

387 Parameters

388 ----------

389 sample : array_like

390 Data to histogram passed as a sequence of N arrays of length D, or

391 as an (N,D) array.

392 values : (N,) array_like or list of (N,) array_like

393 The data on which the statistic will be computed. This must be

394 the same shape as `sample`, or a list of sequences - each with the

395 same shape as `sample`. If `values` is such a list, the statistic

396 will be computed on each independently.

397 statistic : string or callable, optional

398 The statistic to compute (default is 'mean').

399 The following statistics are available:

400

401 * 'mean' : compute the mean of values for points within each bin.

402 Empty bins will be represented by NaN.

403 * 'median' : compute the median of values for points within each

404 bin. Empty bins will be represented by NaN.

405 * 'count' : compute the count of points within each bin. This is

406 identical to an unweighted histogram. `values` array is not

407 referenced.

408 * 'sum' : compute the sum of values for points within each bin.

409 This is identical to a weighted histogram.

410 * 'std' : compute the standard deviation within each bin. This

411 is implicitly calculated with ddof=0. If the number of values

412 within a given bin is 0 or 1, the computed standard deviation value

413 will be 0 for the bin.

414 * 'min' : compute the minimum of values for points within each bin.

415 Empty bins will be represented by NaN.

416 * 'max' : compute the maximum of values for point within each bin.

417 Empty bins will be represented by NaN.

418 * function : a user-defined function which takes a 1D array of

419 values, and outputs a single numerical statistic. This function

420 will be called on the values in each bin. Empty bins will be

421 represented by function([]), or NaN if this returns an error.

422

423 bins : sequence or positive int, optional

424 The bin specification must be in one of the following forms:

425

426 * A sequence of arrays describing the bin edges along each dimension.

427 * The number of bins for each dimension (nx, ny, ... = bins).

428 * The number of bins for all dimensions (nx = ny = ... = bins).

429 range : sequence, optional

430 A sequence of lower and upper bin edges to be used if the edges are

431 not given explicitly in `bins`. Defaults to the minimum and maximum

432 values along each dimension.

433 expand_binnumbers : bool, optional

434 'False' (default): the returned `binnumber` is a shape (N,) array of

435 linearized bin indices.

436 'True': the returned `binnumber` is 'unraveled' into a shape (D,N)

437 ndarray, where each row gives the bin numbers in the corresponding

438 dimension.

439 See the `binnumber` returned value, and the `Examples` section of

440 `binned_statistic_2d`.

441 binned_statistic_result : binnedStatisticddResult

442 Result of a previous call to the function in order to reuse bin edges

443 and bin numbers with new values and/or a different statistic.

444 To reuse bin numbers, `expand_binnumbers` must have been set to False

445 (the default)

446

447 .. versionadded:: 0.17.0

448

449 Returns

450 -------

451 statistic : ndarray, shape(nx1, nx2, nx3,...)

452 The values of the selected statistic in each two-dimensional bin.

453 bin_edges : list of ndarrays

454 A list of D arrays describing the (nxi + 1) bin edges for each

455 dimension.

456 binnumber : (N,) array of ints or (D,N) ndarray of ints

457 This assigns to each element of `sample` an integer that represents the

458 bin in which this observation falls. The representation depends on the

459 `expand_binnumbers` argument. See `Notes` for details.

460

461

462 See Also

463 --------

464 numpy.digitize, numpy.histogramdd, binned_statistic, binned_statistic_2d

465

466 Notes

467 -----

468 Binedges:

469 All but the last (righthand-most) bin is half-open in each dimension. In

470 other words, if `bins` is ``[1, 2, 3, 4]``, then the first bin is

471 ``[1, 2)`` (including 1, but excluding 2) and the second ``[2, 3)``. The

472 last bin, however, is ``[3, 4]``, which *includes* 4.

473

474 `binnumber`:

475 This returned argument assigns to each element of `sample` an integer that

476 represents the bin in which it belongs. The representation depends on the

477 `expand_binnumbers` argument. If 'False' (default): The returned

478 `binnumber` is a shape (N,) array of linearized indices mapping each

479 element of `sample` to its corresponding bin (using row-major ordering).

480 If 'True': The returned `binnumber` is a shape (D,N) ndarray where

481 each row indicates bin placements for each dimension respectively. In each

482 dimension, a binnumber of `i` means the corresponding value is between

483 (bin_edges[D][i-1], bin_edges[D][i]), for each dimension 'D'.

484

485 .. versionadded:: 0.11.0

486

487 Examples

488 --------

489 >>> import numpy as np

490 >>> from scipy import stats

491 >>> import matplotlib.pyplot as plt

492 >>> from mpl_toolkits.mplot3d import Axes3D

493

494 Take an array of 600 (x, y) coordinates as an example.

495 `binned_statistic_dd` can handle arrays of higher dimension `D`. But a plot

496 of dimension `D+1` is required.

497

498 >>> mu = np.array([0., 1.])

499 >>> sigma = np.array([[1., -0.5],[-0.5, 1.5]])

500 >>> multinormal = stats.multivariate_normal(mu, sigma)

501 >>> data = multinormal.rvs(size=600, random_state=235412)

502 >>> data.shape

503 (600, 2)

504

505 Create bins and count how many arrays fall in each bin:

506

507 >>> N = 60

508 >>> x = np.linspace(-3, 3, N)

509 >>> y = np.linspace(-3, 4, N)

510 >>> ret = stats.binned_statistic_dd(data, np.arange(600), bins=[x, y],

511 ... statistic='count')

512 >>> bincounts = ret.statistic

513

514 Set the volume and the location of bars:

515

516 >>> dx = x[1] - x[0]

517 >>> dy = y[1] - y[0]

518 >>> x, y = np.meshgrid(x[:-1]+dx/2, y[:-1]+dy/2)

519 >>> z = 0

520

521 >>> bincounts = bincounts.ravel()

522 >>> x = x.ravel()

523 >>> y = y.ravel()

524

525 >>> fig = plt.figure()

526 >>> ax = fig.add_subplot(111, projection='3d')

527 >>> with np.errstate(divide='ignore'): # silence random axes3d warning

528 ... ax.bar3d(x, y, z, dx, dy, bincounts)

529

530 Reuse bin numbers and bin edges with new values:

531

532 >>> ret2 = stats.binned_statistic_dd(data, -np.arange(600),

533 ... binned_statistic_result=ret,

534 ... statistic='mean')

535 """

536 known_stats = ['mean', 'median', 'count', 'sum', 'std', 'min', 'max']

537 if not callable(statistic) and statistic not in known_stats:

538 raise ValueError('invalid statistic %r' % (statistic,))

539

540 try:

541 bins = index(bins)

542 except TypeError:

543 # bins is not an integer

544 pass

545 # If bins was an integer-like object, now it is an actual Python int.

546

547 # NOTE: for _bin_edges(), see e.g. gh-11365

548 if isinstance(bins, int) and not np.isfinite(sample).all():

549 raise ValueError('%r contains non-finite values.' % (sample,))

550

551 # `Ndim` is the number of dimensions (e.g. `2` for `binned_statistic_2d`)

552 # `Dlen` is the length of elements along each dimension.

553 # This code is based on np.histogramdd

554 try:

555 # `sample` is an ND-array.

556 Dlen, Ndim = sample.shape

557 except (AttributeError, ValueError):

558 # `sample` is a sequence of 1D arrays.

559 sample = np.atleast_2d(sample).T

560 Dlen, Ndim = sample.shape

561

562 # Store initial shape of `values` to preserve it in the output

563 values = np.asarray(values)

564 input_shape = list(values.shape)

565 # Make sure that `values` is 2D to iterate over rows

566 values = np.atleast_2d(values)

567 Vdim, Vlen = values.shape

568

569 # Make sure `values` match `sample`

570 if statistic != 'count' and Vlen != Dlen:

571 raise AttributeError('The number of `values` elements must match the '

572 'length of each `sample` dimension.')

573

574 try:

575 M = len(bins)

576 if M != Ndim:

577 raise AttributeError('The dimension of bins must be equal '

578 'to the dimension of the sample x.')

579 except TypeError:

580 bins = Ndim * [bins]

581

582 if binned_statistic_result is None:

583 nbin, edges, dedges = _bin_edges(sample, bins, range)

584 binnumbers = _bin_numbers(sample, nbin, edges, dedges)

585 else:

586 edges = binned_statistic_result.bin_edges

587 nbin = np.array([len(edges[i]) + 1 for i in builtins.range(Ndim)])

588 # +1 for outlier bins

589 dedges = [np.diff(edges[i]) for i in builtins.range(Ndim)]

590 binnumbers = binned_statistic_result.binnumber

591

592 # Avoid overflow with double precision. Complex `values` -> `complex128`.

593 result_type = np.result_type(values, np.float64)

594 result = np.empty([Vdim, nbin.prod()], dtype=result_type)

595

596 if statistic in {'mean', np.mean}:

597 result.fill(np.nan)

598 flatcount = _bincount(binnumbers, None)

599 a = flatcount.nonzero()

600 for vv in builtins.range(Vdim):

601 flatsum = _bincount(binnumbers, values[vv])

602 result[vv, a] = flatsum[a] / flatcount[a]

603 elif statistic in {'std', np.std}:

604 result.fill(np.nan)

605 flatcount = _bincount(binnumbers, None)

606 a = flatcount.nonzero()

607 for vv in builtins.range(Vdim):

608 flatsum = _bincount(binnumbers, values[vv])

609 delta = values[vv] - flatsum[binnumbers] / flatcount[binnumbers]

610 std = np.sqrt(

611 _bincount(binnumbers, delta*np.conj(delta))[a] / flatcount[a]

612 )

613 result[vv, a] = std

614 result = np.real(result)

615 elif statistic == 'count':

616 result = np.empty([Vdim, nbin.prod()], dtype=np.float64)

617 result.fill(0)

618 flatcount = _bincount(binnumbers, None)

619 a = np.arange(len(flatcount))

620 result[:, a] = flatcount[np.newaxis, :]

621 elif statistic in {'sum', np.sum}:

622 result.fill(0)

623 for vv in builtins.range(Vdim):

624 flatsum = _bincount(binnumbers, values[vv])

625 a = np.arange(len(flatsum))

626 result[vv, a] = flatsum

627 elif statistic in {'median', np.median}:

628 result.fill(np.nan)

629 for vv in builtins.range(Vdim):

630 i = np.lexsort((values[vv], binnumbers))

631 _, j, counts = np.unique(binnumbers[i],

632 return_index=True, return_counts=True)

633 mid = j + (counts - 1) / 2

634 mid_a = values[vv, i][np.floor(mid).astype(int)]

635 mid_b = values[vv, i][np.ceil(mid).astype(int)]

636 medians = (mid_a + mid_b) / 2

637 result[vv, binnumbers[i][j]] = medians

638 elif statistic in {'min', np.min}:

639 result.fill(np.nan)

640 for vv in builtins.range(Vdim):

641 i = np.argsort(values[vv])[::-1] # Reversed so the min is last

642 result[vv, binnumbers[i]] = values[vv, i]

643 elif statistic in {'max', np.max}:

644 result.fill(np.nan)

645 for vv in builtins.range(Vdim):

646 i = np.argsort(values[vv])

647 result[vv, binnumbers[i]] = values[vv, i]

648 elif callable(statistic):

649 with np.errstate(invalid='ignore'), suppress_warnings() as sup:

650 sup.filter(RuntimeWarning)

651 try:

652 null = statistic([])

653 except Exception:

654 null = np.nan

655 if np.iscomplexobj(null):

656 result = result.astype(np.complex128)

657 result.fill(null)

658 try:

659 _calc_binned_statistic(

660 Vdim, binnumbers, result, values, statistic

661 )

662 except ValueError:

663 result = result.astype(np.complex128)

664 _calc_binned_statistic(

665 Vdim, binnumbers, result, values, statistic

666 )

667

668 # Shape into a proper matrix

669 result = result.reshape(np.append(Vdim, nbin))

670

671 # Remove outliers (indices 0 and -1 for each bin-dimension).

672 core = tuple([slice(None)] + Ndim * [slice(1, -1)])

673 result = result[core]

674

675 # Unravel binnumbers into an ndarray, each row the bins for each dimension

676 if expand_binnumbers and Ndim > 1:

677 binnumbers = np.asarray(np.unravel_index(binnumbers, nbin))

678

679 if np.any(result.shape[1:] != nbin - 2):

680 raise RuntimeError('Internal Shape Error')

681

682 # Reshape to have output (`result`) match input (`values`) shape

683 result = result.reshape(input_shape[:-1] + list(nbin-2))

684

685 return BinnedStatisticddResult(result, edges, binnumbers)

686

687

688def _calc_binned_statistic(Vdim, bin_numbers, result, values, stat_func):

689 unique_bin_numbers = np.unique(bin_numbers)

690 for vv in builtins.range(Vdim):

691 bin_map = _create_binned_data(bin_numbers, unique_bin_numbers,

692 values, vv)

693 for i in unique_bin_numbers:

694 stat = stat_func(np.array(bin_map[i]))

695 if np.iscomplexobj(stat) and not np.iscomplexobj(result):

696 raise ValueError("The statistic function returns complex ")

697 result[vv, i] = stat

698

699

700def _create_binned_data(bin_numbers, unique_bin_numbers, values, vv):

701 """ Create hashmap of bin ids to values in bins

702 key: bin number

703 value: list of binned data

704 """

705 bin_map = dict()

706 for i in unique_bin_numbers:

707 bin_map[i] = []

708 for i in builtins.range(len(bin_numbers)):

709 bin_map[bin_numbers[i]].append(values[vv, i])

710 return bin_map

711

712

713def _bin_edges(sample, bins=None, range=None):

714 """ Create edge arrays

715 """

716 Dlen, Ndim = sample.shape

717

718 nbin = np.empty(Ndim, int) # Number of bins in each dimension

719 edges = Ndim * [None] # Bin edges for each dim (will be 2D array)

720 dedges = Ndim * [None] # Spacing between edges (will be 2D array)

721

722 # Select range for each dimension

723 # Used only if number of bins is given.

724 if range is None:

725 smin = np.atleast_1d(np.array(sample.min(axis=0), float))

726 smax = np.atleast_1d(np.array(sample.max(axis=0), float))

727 else:

728 if len(range) != Ndim:

729 raise ValueError(

730 f"range given for {len(range)} dimensions; {Ndim} required")

731 smin = np.empty(Ndim)

732 smax = np.empty(Ndim)

733 for i in builtins.range(Ndim):

734 if range[i][1] < range[i][0]:

735 raise ValueError(

736 "In {}range, start must be <= stop".format(

737 f"dimension {i + 1} of " if Ndim > 1 else ""))

738 smin[i], smax[i] = range[i]

739

740 # Make sure the bins have a finite width.

741 for i in builtins.range(len(smin)):

742 if smin[i] == smax[i]:

743 smin[i] = smin[i] - .5

744 smax[i] = smax[i] + .5

745

746 # Preserve sample floating point precision in bin edges

747 edges_dtype = (sample.dtype if np.issubdtype(sample.dtype, np.floating)

748 else float)

749

750 # Create edge arrays

751 for i in builtins.range(Ndim):

752 if np.isscalar(bins[i]):

753 nbin[i] = bins[i] + 2 # +2 for outlier bins

754 edges[i] = np.linspace(smin[i], smax[i], nbin[i] - 1,

755 dtype=edges_dtype)

756 else:

757 edges[i] = np.asarray(bins[i], edges_dtype)

758 nbin[i] = len(edges[i]) + 1 # +1 for outlier bins

759 dedges[i] = np.diff(edges[i])

760

761 nbin = np.asarray(nbin)

762

763 return nbin, edges, dedges

764

765

766def _bin_numbers(sample, nbin, edges, dedges):

767 """Compute the bin number each sample falls into, in each dimension

768 """

769 Dlen, Ndim = sample.shape

770

771 sampBin = [

772 np.digitize(sample[:, i], edges[i])

773 for i in range(Ndim)

774 ]

775

776 # Using `digitize`, values that fall on an edge are put in the right bin.

777 # For the rightmost bin, we want values equal to the right

778 # edge to be counted in the last bin, and not as an outlier.

779 for i in range(Ndim):

780 # Find the rounding precision

781 dedges_min = dedges[i].min()

782 if dedges_min == 0:

783 raise ValueError('The smallest edge difference is numerically 0.')

784 decimal = int(-np.log10(dedges_min)) + 6

785 # Find which points are on the rightmost edge.

786 on_edge = np.where((sample[:, i] >= edges[i][-1]) &

787 (np.around(sample[:, i], decimal) ==

788 np.around(edges[i][-1], decimal)))[0]

789 # Shift these points one bin to the left.

790 sampBin[i][on_edge] -= 1

791

792 # Compute the sample indices in the flattened statistic matrix.

793 binnumbers = np.ravel_multi_index(sampBin, nbin)

794

795 return binnumbers

Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/scipy/stats/_binned_statistic.py: 8%

205 statements