Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/scipy/spatial/

1# Copyright Anne M. Archibald 2008

2# Released under the scipy license

3import numpy as np

4from ._ckdtree import cKDTree, cKDTreeNode

6__all__ = ['minkowski_distance_p', 'minkowski_distance',

7 'distance_matrix',

8 'Rectangle', 'KDTree']

11def minkowski_distance_p(x, y, p=2):

12 """Compute the pth power of the L**p distance between two arrays.

14 For efficiency, this function computes the L**p distance but does

15 not extract the pth root. If `p` is 1 or infinity, this is equal to

16 the actual L**p distance.

18 The last dimensions of `x` and `y` must be the same length. Any

19 other dimensions must be compatible for broadcasting.

21 Parameters

22 ----------

23 x : (..., K) array_like

24 Input array.

25 y : (..., K) array_like

26 Input array.

27 p : float, 1 <= p <= infinity

28 Which Minkowski p-norm to use.

30 Returns

31 -------

32 dist : ndarray

33 pth power of the distance between the input arrays.

35 Examples

36 --------

37 >>> from scipy.spatial import minkowski_distance_p

38 >>> minkowski_distance_p([[0, 0], [0, 0]], [[1, 1], [0, 1]])

39 array([2, 1])

41 """

42 x = np.asarray(x)

43 y = np.asarray(y)

45 # Find smallest common datatype with float64 (return type of this

46 # function) - addresses #10262.

47 # Don't just cast to float64 for complex input case.

48 common_datatype = np.promote_types(np.promote_types(x.dtype, y.dtype),

49 'float64')

51 # Make sure x and y are NumPy arrays of correct datatype.

52 x = x.astype(common_datatype)

53 y = y.astype(common_datatype)

55 if p == np.inf:

56 return np.amax(np.abs(y-x), axis=-1)

57 elif p == 1:

58 return np.sum(np.abs(y-x), axis=-1)

59 else:

60 return np.sum(np.abs(y-x)**p, axis=-1)

63def minkowski_distance(x, y, p=2):

64 """Compute the L**p distance between two arrays.

66 The last dimensions of `x` and `y` must be the same length. Any

67 other dimensions must be compatible for broadcasting.

69 Parameters

70 ----------

71 x : (..., K) array_like

72 Input array.

73 y : (..., K) array_like

74 Input array.

75 p : float, 1 <= p <= infinity

76 Which Minkowski p-norm to use.

78 Returns

79 -------

80 dist : ndarray

81 Distance between the input arrays.

83 Examples

84 --------

85 >>> from scipy.spatial import minkowski_distance

86 >>> minkowski_distance([[0, 0], [0, 0]], [[1, 1], [0, 1]])

87 array([ 1.41421356, 1. ])

89 """

90 x = np.asarray(x)

91 y = np.asarray(y)

92 if p == np.inf or p == 1:

93 return minkowski_distance_p(x, y, p)

94 else:

95 return minkowski_distance_p(x, y, p)**(1./p)

98class Rectangle:

99 """Hyperrectangle class.

100

101 Represents a Cartesian product of intervals.

102 """

103 def __init__(self, maxes, mins):

104 """Construct a hyperrectangle."""

105 self.maxes = np.maximum(maxes,mins).astype(float)

106 self.mins = np.minimum(maxes,mins).astype(float)

107 self.m, = self.maxes.shape

108

109 def __repr__(self):

110 return "<Rectangle %s>" % list(zip(self.mins, self.maxes))

111

112 def volume(self):

113 """Total volume."""

114 return np.prod(self.maxes-self.mins)

115

116 def split(self, d, split):

117 """Produce two hyperrectangles by splitting.

118

119 In general, if you need to compute maximum and minimum

120 distances to the children, it can be done more efficiently

121 by updating the maximum and minimum distances to the parent.

122

123 Parameters

124 ----------

125 d : int

126 Axis to split hyperrectangle along.

127 split : float

128 Position along axis `d` to split at.

129

130 """

131 mid = np.copy(self.maxes)

132 mid[d] = split

133 less = Rectangle(self.mins, mid)

134 mid = np.copy(self.mins)

135 mid[d] = split

136 greater = Rectangle(mid, self.maxes)

137 return less, greater

138

139 def min_distance_point(self, x, p=2.):

140 """

141 Return the minimum distance between input and points in the

142 hyperrectangle.

143

144 Parameters

145 ----------

146 x : array_like

147 Input.

148 p : float, optional

149 Input.

150

151 """

152 return minkowski_distance(

153 0, np.maximum(0, np.maximum(self.mins-x, x-self.maxes)),

154 p

155 )

156

157 def max_distance_point(self, x, p=2.):

158 """

159 Return the maximum distance between input and points in the hyperrectangle.

160

161 Parameters

162 ----------

163 x : array_like

164 Input array.

165 p : float, optional

166 Input.

167

168 """

169 return minkowski_distance(0, np.maximum(self.maxes-x, x-self.mins), p)

170

171 def min_distance_rectangle(self, other, p=2.):

172 """

173 Compute the minimum distance between points in the two hyperrectangles.

174

175 Parameters

176 ----------

177 other : hyperrectangle

178 Input.

179 p : float

180 Input.

181

182 """

183 return minkowski_distance(

184 0,

185 np.maximum(0, np.maximum(self.mins-other.maxes,

186 other.mins-self.maxes)),

187 p

188 )

189

190 def max_distance_rectangle(self, other, p=2.):

191 """

192 Compute the maximum distance between points in the two hyperrectangles.

193

194 Parameters

195 ----------

196 other : hyperrectangle

197 Input.

198 p : float, optional

199 Input.

200

201 """

202 return minkowski_distance(

203 0, np.maximum(self.maxes-other.mins, other.maxes-self.mins), p)

204

205

206class KDTree(cKDTree):

207 """kd-tree for quick nearest-neighbor lookup.

208

209 This class provides an index into a set of k-dimensional points

210 which can be used to rapidly look up the nearest neighbors of any

211 point.

212

213 Parameters

214 ----------

215 data : array_like, shape (n,m)

216 The n data points of dimension m to be indexed. This array is

217 not copied unless this is necessary to produce a contiguous

218 array of doubles, and so modifying this data will result in

219 bogus results. The data are also copied if the kd-tree is built

220 with copy_data=True.

221 leafsize : positive int, optional

222 The number of points at which the algorithm switches over to

223 brute-force. Default: 10.

224 compact_nodes : bool, optional

225 If True, the kd-tree is built to shrink the hyperrectangles to

226 the actual data range. This usually gives a more compact tree that

227 is robust against degenerated input data and gives faster queries

228 at the expense of longer build time. Default: True.

229 copy_data : bool, optional

230 If True the data is always copied to protect the kd-tree against

231 data corruption. Default: False.

232 balanced_tree : bool, optional

233 If True, the median is used to split the hyperrectangles instead of

234 the midpoint. This usually gives a more compact tree and

235 faster queries at the expense of longer build time. Default: True.

236 boxsize : array_like or scalar, optional

237 Apply a m-d toroidal topology to the KDTree.. The topology is generated

238 by :math:`x_i + n_i L_i` where :math:`n_i` are integers and :math:`L_i`

239 is the boxsize along i-th dimension. The input data shall be wrapped

240 into :math:`[0, L_i)`. A ValueError is raised if any of the data is

241 outside of this bound.

242

243 Notes

244 -----

245 The algorithm used is described in Maneewongvatana and Mount 1999.

246 The general idea is that the kd-tree is a binary tree, each of whose

247 nodes represents an axis-aligned hyperrectangle. Each node specifies

248 an axis and splits the set of points based on whether their coordinate

249 along that axis is greater than or less than a particular value.

250

251 During construction, the axis and splitting point are chosen by the

252 "sliding midpoint" rule, which ensures that the cells do not all

253 become long and thin.

254

255 The tree can be queried for the r closest neighbors of any given point

256 (optionally returning only those within some maximum distance of the

257 point). It can also be queried, with a substantial gain in efficiency,

258 for the r approximate closest neighbors.

259

260 For large dimensions (20 is already large) do not expect this to run

261 significantly faster than brute force. High-dimensional nearest-neighbor

262 queries are a substantial open problem in computer science.

263

264 Attributes

265 ----------

266 data : ndarray, shape (n,m)

267 The n data points of dimension m to be indexed. This array is

268 not copied unless this is necessary to produce a contiguous

269 array of doubles. The data are also copied if the kd-tree is built

270 with `copy_data=True`.

271 leafsize : positive int

272 The number of points at which the algorithm switches over to

273 brute-force.

274 m : int

275 The dimension of a single data-point.

276 n : int

277 The number of data points.

278 maxes : ndarray, shape (m,)

279 The maximum value in each dimension of the n data points.

280 mins : ndarray, shape (m,)

281 The minimum value in each dimension of the n data points.

282 size : int

283 The number of nodes in the tree.

284

285 """

286

287 class node:

288 @staticmethod

289 def _create(ckdtree_node=None):

290 """Create either an inner or leaf node, wrapping a cKDTreeNode instance"""

291 if ckdtree_node is None:

292 return KDTree.node(ckdtree_node)

293 elif ckdtree_node.split_dim == -1:

294 return KDTree.leafnode(ckdtree_node)

295 else:

296 return KDTree.innernode(ckdtree_node)

297

298 def __init__(self, ckdtree_node=None):

299 if ckdtree_node is None:

300 ckdtree_node = cKDTreeNode()

301 self._node = ckdtree_node

302

303 def __lt__(self, other):

304 return id(self) < id(other)

305

306 def __gt__(self, other):

307 return id(self) > id(other)

308

309 def __le__(self, other):

310 return id(self) <= id(other)

311

312 def __ge__(self, other):

313 return id(self) >= id(other)

314

315 def __eq__(self, other):

316 return id(self) == id(other)

317

318 class leafnode(node):

319 @property

320 def idx(self):

321 return self._node.indices

322

323 @property

324 def children(self):

325 return self._node.children

326

327 class innernode(node):

328 def __init__(self, ckdtreenode):

329 assert isinstance(ckdtreenode, cKDTreeNode)

330 super().__init__(ckdtreenode)

331 self.less = KDTree.node._create(ckdtreenode.lesser)

332 self.greater = KDTree.node._create(ckdtreenode.greater)

333

334 @property

335 def split_dim(self):

336 return self._node.split_dim

337

338 @property

339 def split(self):

340 return self._node.split

341

342 @property

343 def children(self):

344 return self._node.children

345

346 @property

347 def tree(self):

348 if not hasattr(self, "_tree"):

349 self._tree = KDTree.node._create(super().tree)

350

351 return self._tree

352

353 def __init__(self, data, leafsize=10, compact_nodes=True, copy_data=False,

354 balanced_tree=True, boxsize=None):

355 data = np.asarray(data)

356 if data.dtype.kind == 'c':

357 raise TypeError("KDTree does not work with complex data")

358

359 # Note KDTree has different default leafsize from cKDTree

360 super().__init__(data, leafsize, compact_nodes, copy_data,

361 balanced_tree, boxsize)

362

363 def query(

364 self, x, k=1, eps=0, p=2, distance_upper_bound=np.inf, workers=1):

365 r"""Query the kd-tree for nearest neighbors.

366

367 Parameters

368 ----------

369 x : array_like, last dimension self.m

370 An array of points to query.

371 k : int or Sequence[int], optional

372 Either the number of nearest neighbors to return, or a list of the

373 k-th nearest neighbors to return, starting from 1.

374 eps : nonnegative float, optional

375 Return approximate nearest neighbors; the kth returned value

376 is guaranteed to be no further than (1+eps) times the

377 distance to the real kth nearest neighbor.

378 p : float, 1<=p<=infinity, optional

379 Which Minkowski p-norm to use.

380 1 is the sum-of-absolute-values distance ("Manhattan" distance).

381 2 is the usual Euclidean distance.

382 infinity is the maximum-coordinate-difference distance.

383 A large, finite p may cause a ValueError if overflow can occur.

384 distance_upper_bound : nonnegative float, optional

385 Return only neighbors within this distance. This is used to prune

386 tree searches, so if you are doing a series of nearest-neighbor

387 queries, it may help to supply the distance to the nearest neighbor

388 of the most recent point.

389 workers : int, optional

390 Number of workers to use for parallel processing. If -1 is given

391 all CPU threads are used. Default: 1.

392

393 .. versionadded:: 1.6.0

394

395 Returns

396 -------

397 d : float or array of floats

398 The distances to the nearest neighbors.

399 If ``x`` has shape ``tuple+(self.m,)``, then ``d`` has shape

400 ``tuple+(k,)``.

401 When k == 1, the last dimension of the output is squeezed.

402 Missing neighbors are indicated with infinite distances.

403 Hits are sorted by distance (nearest first).

404

405 .. versionchanged:: 1.9.0

406 Previously if ``k=None``, then `d` was an object array of

407 shape ``tuple``, containing lists of distances. This behavior

408 has been removed, use `query_ball_point` instead.

409

410 i : integer or array of integers

411 The index of each neighbor in ``self.data``.

412 ``i`` is the same shape as d.

413 Missing neighbors are indicated with ``self.n``.

414

415 Examples

416 --------

417

418 >>> import numpy as np

419 >>> from scipy.spatial import KDTree

420 >>> x, y = np.mgrid[0:5, 2:8]

421 >>> tree = KDTree(np.c_[x.ravel(), y.ravel()])

422

423 To query the nearest neighbours and return squeezed result, use

424

425 >>> dd, ii = tree.query([[0, 0], [2.2, 2.9]], k=1)

426 >>> print(dd, ii, sep='\n')

427 [2. 0.2236068]

428 [ 0 13]

429

430 To query the nearest neighbours and return unsqueezed result, use

431

432 >>> dd, ii = tree.query([[0, 0], [2.2, 2.9]], k=[1])

433 >>> print(dd, ii, sep='\n')

434 [[2. ]

435 [0.2236068]]

436 [[ 0]

437 [13]]

438

439 To query the second nearest neighbours and return unsqueezed result,

440 use

441

442 >>> dd, ii = tree.query([[0, 0], [2.2, 2.9]], k=[2])

443 >>> print(dd, ii, sep='\n')

444 [[2.23606798]

445 [0.80622577]]

446 [[ 6]

447 [19]]

448

449 To query the first and second nearest neighbours, use

450

451 >>> dd, ii = tree.query([[0, 0], [2.2, 2.9]], k=2)

452 >>> print(dd, ii, sep='\n')

453 [[2. 2.23606798]

454 [0.2236068 0.80622577]]

455 [[ 0 6]

456 [13 19]]

457

458 or, be more specific

459

460 >>> dd, ii = tree.query([[0, 0], [2.2, 2.9]], k=[1, 2])

461 >>> print(dd, ii, sep='\n')

462 [[2. 2.23606798]

463 [0.2236068 0.80622577]]

464 [[ 0 6]

465 [13 19]]

466

467 """

468 x = np.asarray(x)

469 if x.dtype.kind == 'c':

470 raise TypeError("KDTree does not work with complex data")

471

472 if k is None:

473 raise ValueError("k must be an integer or a sequence of integers")

474

475 d, i = super().query(x, k, eps, p, distance_upper_bound, workers)

476 if isinstance(i, int):

477 i = np.intp(i)

478 return d, i

479

480 def query_ball_point(self, x, r, p=2., eps=0, workers=1,

481 return_sorted=None, return_length=False):

482 """Find all points within distance r of point(s) x.

483

484 Parameters

485 ----------

486 x : array_like, shape tuple + (self.m,)

487 The point or points to search for neighbors of.

488 r : array_like, float

489 The radius of points to return, must broadcast to the length of x.

490 p : float, optional

491 Which Minkowski p-norm to use. Should be in the range [1, inf].

492 A finite large p may cause a ValueError if overflow can occur.

493 eps : nonnegative float, optional

494 Approximate search. Branches of the tree are not explored if their

495 nearest points are further than ``r / (1 + eps)``, and branches are

496 added in bulk if their furthest points are nearer than

497 ``r * (1 + eps)``.

498 workers : int, optional

499 Number of jobs to schedule for parallel processing. If -1 is given

500 all processors are used. Default: 1.

501

502 .. versionadded:: 1.6.0

503 return_sorted : bool, optional

504 Sorts returned indicies if True and does not sort them if False. If

505 None, does not sort single point queries, but does sort

506 multi-point queries which was the behavior before this option

507 was added.

508

509 .. versionadded:: 1.6.0

510 return_length : bool, optional

511 Return the number of points inside the radius instead of a list

512 of the indices.

513

514 .. versionadded:: 1.6.0

515

516 Returns

517 -------

518 results : list or array of lists

519 If `x` is a single point, returns a list of the indices of the

520 neighbors of `x`. If `x` is an array of points, returns an object

521 array of shape tuple containing lists of neighbors.

522

523 Notes

524 -----

525 If you have many points whose neighbors you want to find, you may save

526 substantial amounts of time by putting them in a KDTree and using

527 query_ball_tree.

528

529 Examples

530 --------

531 >>> import numpy as np

532 >>> from scipy import spatial

533 >>> x, y = np.mgrid[0:5, 0:5]

534 >>> points = np.c_[x.ravel(), y.ravel()]

535 >>> tree = spatial.KDTree(points)

536 >>> sorted(tree.query_ball_point([2, 0], 1))

537 [5, 10, 11, 15]

538

539 Query multiple points and plot the results:

540

541 >>> import matplotlib.pyplot as plt

542 >>> points = np.asarray(points)

543 >>> plt.plot(points[:,0], points[:,1], '.')

544 >>> for results in tree.query_ball_point(([2, 0], [3, 3]), 1):

545 ... nearby_points = points[results]

546 ... plt.plot(nearby_points[:,0], nearby_points[:,1], 'o')

547 >>> plt.margins(0.1, 0.1)

548 >>> plt.show()

549

550 """

551 x = np.asarray(x)

552 if x.dtype.kind == 'c':

553 raise TypeError("KDTree does not work with complex data")

554 return super().query_ball_point(

555 x, r, p, eps, workers, return_sorted, return_length)

556

557 def query_ball_tree(self, other, r, p=2., eps=0):

558 """

559 Find all pairs of points between `self` and `other` whose distance is

560 at most r.

561

562 Parameters

563 ----------

564 other : KDTree instance

565 The tree containing points to search against.

566 r : float

567 The maximum distance, has to be positive.

568 p : float, optional

569 Which Minkowski norm to use. `p` has to meet the condition

570 ``1 <= p <= infinity``.

571 eps : float, optional

572 Approximate search. Branches of the tree are not explored

573 if their nearest points are further than ``r/(1+eps)``, and

574 branches are added in bulk if their furthest points are nearer

575 than ``r * (1+eps)``. `eps` has to be non-negative.

576

577 Returns

578 -------

579 results : list of lists

580 For each element ``self.data[i]`` of this tree, ``results[i]`` is a

581 list of the indices of its neighbors in ``other.data``.

582

583 Examples

584 --------

585 You can search all pairs of points between two kd-trees within a distance:

586

587 >>> import matplotlib.pyplot as plt

588 >>> import numpy as np

589 >>> from scipy.spatial import KDTree

590 >>> rng = np.random.default_rng()

591 >>> points1 = rng.random((15, 2))

592 >>> points2 = rng.random((15, 2))

593 >>> plt.figure(figsize=(6, 6))

594 >>> plt.plot(points1[:, 0], points1[:, 1], "xk", markersize=14)

595 >>> plt.plot(points2[:, 0], points2[:, 1], "og", markersize=14)

596 >>> kd_tree1 = KDTree(points1)

597 >>> kd_tree2 = KDTree(points2)

598 >>> indexes = kd_tree1.query_ball_tree(kd_tree2, r=0.2)

599 >>> for i in range(len(indexes)):

600 ... for j in indexes[i]:

601 ... plt.plot([points1[i, 0], points2[j, 0]],

602 ... [points1[i, 1], points2[j, 1]], "-r")

603 >>> plt.show()

604

605 """

606 return super().query_ball_tree(other, r, p, eps)

607

608 def query_pairs(self, r, p=2., eps=0, output_type='set'):

609 """Find all pairs of points in `self` whose distance is at most r.

610

611 Parameters

612 ----------

613 r : positive float

614 The maximum distance.

615 p : float, optional

616 Which Minkowski norm to use. `p` has to meet the condition

617 ``1 <= p <= infinity``.

618 eps : float, optional

619 Approximate search. Branches of the tree are not explored

620 if their nearest points are further than ``r/(1+eps)``, and

621 branches are added in bulk if their furthest points are nearer

622 than ``r * (1+eps)``. `eps` has to be non-negative.

623 output_type : string, optional

624 Choose the output container, 'set' or 'ndarray'. Default: 'set'

625

626 .. versionadded:: 1.6.0

627

628 Returns

629 -------

630 results : set or ndarray

631 Set of pairs ``(i,j)``, with ``i < j``, for which the corresponding

632 positions are close. If output_type is 'ndarray', an ndarry is

633 returned instead of a set.

634

635 Examples

636 --------

637 You can search all pairs of points in a kd-tree within a distance:

638

639 >>> import matplotlib.pyplot as plt

640 >>> import numpy as np

641 >>> from scipy.spatial import KDTree

642 >>> rng = np.random.default_rng()

643 >>> points = rng.random((20, 2))

644 >>> plt.figure(figsize=(6, 6))

645 >>> plt.plot(points[:, 0], points[:, 1], "xk", markersize=14)

646 >>> kd_tree = KDTree(points)

647 >>> pairs = kd_tree.query_pairs(r=0.2)

648 >>> for (i, j) in pairs:

649 ... plt.plot([points[i, 0], points[j, 0]],

650 ... [points[i, 1], points[j, 1]], "-r")

651 >>> plt.show()

652

653 """

654 return super().query_pairs(r, p, eps, output_type)

655

656 def count_neighbors(self, other, r, p=2., weights=None, cumulative=True):

657 """Count how many nearby pairs can be formed.

658

659 Count the number of pairs ``(x1,x2)`` can be formed, with ``x1`` drawn

660 from ``self`` and ``x2`` drawn from ``other``, and where

661 ``distance(x1, x2, p) <= r``.

662

663 Data points on ``self`` and ``other`` are optionally weighted by the

664 ``weights`` argument. (See below)

665

666 This is adapted from the "two-point correlation" algorithm described by

667 Gray and Moore [1]_. See notes for further discussion.

668

669 Parameters

670 ----------

671 other : KDTree

672 The other tree to draw points from, can be the same tree as self.

673 r : float or one-dimensional array of floats

674 The radius to produce a count for. Multiple radii are searched with

675 a single tree traversal.

676 If the count is non-cumulative(``cumulative=False``), ``r`` defines

677 the edges of the bins, and must be non-decreasing.

678 p : float, optional

679 1<=p<=infinity.

680 Which Minkowski p-norm to use.

681 Default 2.0.

682 A finite large p may cause a ValueError if overflow can occur.

683 weights : tuple, array_like, or None, optional

684 If None, the pair-counting is unweighted.

685 If given as a tuple, weights[0] is the weights of points in

686 ``self``, and weights[1] is the weights of points in ``other``;

687 either can be None to indicate the points are unweighted.

688 If given as an array_like, weights is the weights of points in

689 ``self`` and ``other``. For this to make sense, ``self`` and

690 ``other`` must be the same tree. If ``self`` and ``other`` are two

691 different trees, a ``ValueError`` is raised.

692 Default: None

693

694 .. versionadded:: 1.6.0

695 cumulative : bool, optional

696 Whether the returned counts are cumulative. When cumulative is set

697 to ``False`` the algorithm is optimized to work with a large number

698 of bins (>10) specified by ``r``. When ``cumulative`` is set to

699 True, the algorithm is optimized to work with a small number of

700 ``r``. Default: True

701

702 .. versionadded:: 1.6.0

703

704 Returns

705 -------

706 result : scalar or 1-D array

707 The number of pairs. For unweighted counts, the result is integer.

708 For weighted counts, the result is float.

709 If cumulative is False, ``result[i]`` contains the counts with

710 ``(-inf if i == 0 else r[i-1]) < R <= r[i]``

711

712 Notes

713 -----

714 Pair-counting is the basic operation used to calculate the two point

715 correlation functions from a data set composed of position of objects.

716

717 Two point correlation function measures the clustering of objects and

718 is widely used in cosmology to quantify the large scale structure

719 in our Universe, but it may be useful for data analysis in other fields

720 where self-similar assembly of objects also occur.

721

722 The Landy-Szalay estimator for the two point correlation function of

723 ``D`` measures the clustering signal in ``D``. [2]_

724

725 For example, given the position of two sets of objects,

726

727 - objects ``D`` (data) contains the clustering signal, and

728

729 - objects ``R`` (random) that contains no signal,

730

731 .. math::

732

733 \\xi(r) = \\frac{<D, D> - 2 f <D, R> + f^2<R, R>}{f^2<R, R>},

734

735 where the brackets represents counting pairs between two data sets

736 in a finite bin around ``r`` (distance), corresponding to setting

737 `cumulative=False`, and ``f = float(len(D)) / float(len(R))`` is the

738 ratio between number of objects from data and random.

739

740 The algorithm implemented here is loosely based on the dual-tree

741 algorithm described in [1]_. We switch between two different

742 pair-cumulation scheme depending on the setting of ``cumulative``.

743 The computing time of the method we use when for

744 ``cumulative == False`` does not scale with the total number of bins.

745 The algorithm for ``cumulative == True`` scales linearly with the

746 number of bins, though it is slightly faster when only

747 1 or 2 bins are used. [5]_.

748

749 As an extension to the naive pair-counting,

750 weighted pair-counting counts the product of weights instead

751 of number of pairs.

752 Weighted pair-counting is used to estimate marked correlation functions

753 ([3]_, section 2.2),

754 or to properly calculate the average of data per distance bin

755 (e.g. [4]_, section 2.1 on redshift).

756

757 .. [1] Gray and Moore,

758 "N-body problems in statistical learning",

759 Mining the sky, 2000,

760 https://arxiv.org/abs/astro-ph/0012333

761

762 .. [2] Landy and Szalay,

763 "Bias and variance of angular correlation functions",

764 The Astrophysical Journal, 1993,

765 http://adsabs.harvard.edu/abs/1993ApJ...412...64L

766

767 .. [3] Sheth, Connolly and Skibba,

768 "Marked correlations in galaxy formation models",

769 Arxiv e-print, 2005,

770 https://arxiv.org/abs/astro-ph/0511773

771

772 .. [4] Hawkins, et al.,

773 "The 2dF Galaxy Redshift Survey: correlation functions,

774 peculiar velocities and the matter density of the Universe",

775 Monthly Notices of the Royal Astronomical Society, 2002,

776 http://adsabs.harvard.edu/abs/2003MNRAS.346...78H

777

778 .. [5] https://github.com/scipy/scipy/pull/5647#issuecomment-168474926

779

780 Examples

781 --------

782 You can count neighbors number between two kd-trees within a distance:

783

784 >>> import numpy as np

785 >>> from scipy.spatial import KDTree

786 >>> rng = np.random.default_rng()

787 >>> points1 = rng.random((5, 2))

788 >>> points2 = rng.random((5, 2))

789 >>> kd_tree1 = KDTree(points1)

790 >>> kd_tree2 = KDTree(points2)

791 >>> kd_tree1.count_neighbors(kd_tree2, 0.2)

792 1

793

794 This number is same as the total pair number calculated by

795 `query_ball_tree`:

796

797 >>> indexes = kd_tree1.query_ball_tree(kd_tree2, r=0.2)

798 >>> sum([len(i) for i in indexes])

799 1

800

801 """

802 return super().count_neighbors(other, r, p, weights, cumulative)

803

804 def sparse_distance_matrix(

805 self, other, max_distance, p=2., output_type='dok_matrix'):

806 """Compute a sparse distance matrix.

807

808 Computes a distance matrix between two KDTrees, leaving as zero

809 any distance greater than max_distance.

810

811 Parameters

812 ----------

813 other : KDTree

814

815 max_distance : positive float

816

817 p : float, 1<=p<=infinity

818 Which Minkowski p-norm to use.

819 A finite large p may cause a ValueError if overflow can occur.

820

821 output_type : string, optional

822 Which container to use for output data. Options: 'dok_matrix',

823 'coo_matrix', 'dict', or 'ndarray'. Default: 'dok_matrix'.

824

825 .. versionadded:: 1.6.0

826

827 Returns

828 -------

829 result : dok_matrix, coo_matrix, dict or ndarray

830 Sparse matrix representing the results in "dictionary of keys"

831 format. If a dict is returned the keys are (i,j) tuples of indices.

832 If output_type is 'ndarray' a record array with fields 'i', 'j',

833 and 'v' is returned,

834

835 Examples

836 --------

837 You can compute a sparse distance matrix between two kd-trees:

838

839 >>> import numpy as np

840 >>> from scipy.spatial import KDTree

841 >>> rng = np.random.default_rng()

842 >>> points1 = rng.random((5, 2))

843 >>> points2 = rng.random((5, 2))

844 >>> kd_tree1 = KDTree(points1)

845 >>> kd_tree2 = KDTree(points2)

846 >>> sdm = kd_tree1.sparse_distance_matrix(kd_tree2, 0.3)

847 >>> sdm.toarray()

848 array([[0. , 0. , 0.12295571, 0. , 0. ],

849 [0. , 0. , 0. , 0. , 0. ],

850 [0.28942611, 0. , 0. , 0.2333084 , 0. ],

851 [0. , 0. , 0. , 0. , 0. ],

852 [0.24617575, 0.29571802, 0.26836782, 0. , 0. ]])

853

854 You can check distances above the `max_distance` are zeros:

855

856 >>> from scipy.spatial import distance_matrix

857 >>> distance_matrix(points1, points2)

858 array([[0.56906522, 0.39923701, 0.12295571, 0.8658745 , 0.79428925],

859 [0.37327919, 0.7225693 , 0.87665969, 0.32580855, 0.75679479],

860 [0.28942611, 0.30088013, 0.6395831 , 0.2333084 , 0.33630734],

861 [0.31994999, 0.72658602, 0.71124834, 0.55396483, 0.90785663],

862 [0.24617575, 0.29571802, 0.26836782, 0.57714465, 0.6473269 ]])

863

864 """

865 return super().sparse_distance_matrix(

866 other, max_distance, p, output_type)

867

868

869def distance_matrix(x, y, p=2, threshold=1000000):

870 """Compute the distance matrix.

871

872 Returns the matrix of all pair-wise distances.

873

874 Parameters

875 ----------

876 x : (M, K) array_like

877 Matrix of M vectors in K dimensions.

878 y : (N, K) array_like

879 Matrix of N vectors in K dimensions.

880 p : float, 1 <= p <= infinity

881 Which Minkowski p-norm to use.

882 threshold : positive int

883 If ``M * N * K`` > `threshold`, algorithm uses a Python loop instead

884 of large temporary arrays.

885

886 Returns

887 -------

888 result : (M, N) ndarray

889 Matrix containing the distance from every vector in `x` to every vector

890 in `y`.

891

892 Examples

893 --------

894 >>> from scipy.spatial import distance_matrix

895 >>> distance_matrix([[0,0],[0,1]], [[1,0],[1,1]])

896 array([[ 1. , 1.41421356],

897 [ 1.41421356, 1. ]])

898

899 """

900

901 x = np.asarray(x)

902 m, k = x.shape

903 y = np.asarray(y)

904 n, kk = y.shape

905

906 if k != kk:

907 raise ValueError(f"x contains {k}-dimensional vectors but y contains "

908 f"{kk}-dimensional vectors")

909

910 if m*n*k <= threshold:

911 return minkowski_distance(x[:,np.newaxis,:],y[np.newaxis,:,:],p)

912 else:

913 result = np.empty((m,n),dtype=float) # FIXME: figure out the best dtype

914 if m < n:

915 for i in range(m):

916 result[i,:] = minkowski_distance(x[i],y,p)

917 else:

918 for j in range(n):

919 result[:,j] = minkowski_distance(x,y[j],p)

920 return result

Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/scipy/spatial/_kdtree.py: 34%

139 statements