Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/scipy/spatial/_kdtree.py: 34%

139 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-12-12 06:31 +0000

1# Copyright Anne M. Archibald 2008 

2# Released under the scipy license 

3import numpy as np 

4from ._ckdtree import cKDTree, cKDTreeNode 

5 

6__all__ = ['minkowski_distance_p', 'minkowski_distance', 

7 'distance_matrix', 

8 'Rectangle', 'KDTree'] 

9 

10 

11def minkowski_distance_p(x, y, p=2): 

12 """Compute the pth power of the L**p distance between two arrays. 

13 

14 For efficiency, this function computes the L**p distance but does 

15 not extract the pth root. If `p` is 1 or infinity, this is equal to 

16 the actual L**p distance. 

17 

18 The last dimensions of `x` and `y` must be the same length. Any 

19 other dimensions must be compatible for broadcasting. 

20 

21 Parameters 

22 ---------- 

23 x : (..., K) array_like 

24 Input array. 

25 y : (..., K) array_like 

26 Input array. 

27 p : float, 1 <= p <= infinity 

28 Which Minkowski p-norm to use. 

29 

30 Returns 

31 ------- 

32 dist : ndarray 

33 pth power of the distance between the input arrays. 

34 

35 Examples 

36 -------- 

37 >>> from scipy.spatial import minkowski_distance_p 

38 >>> minkowski_distance_p([[0, 0], [0, 0]], [[1, 1], [0, 1]]) 

39 array([2, 1]) 

40 

41 """ 

42 x = np.asarray(x) 

43 y = np.asarray(y) 

44 

45 # Find smallest common datatype with float64 (return type of this 

46 # function) - addresses #10262. 

47 # Don't just cast to float64 for complex input case. 

48 common_datatype = np.promote_types(np.promote_types(x.dtype, y.dtype), 

49 'float64') 

50 

51 # Make sure x and y are NumPy arrays of correct datatype. 

52 x = x.astype(common_datatype) 

53 y = y.astype(common_datatype) 

54 

55 if p == np.inf: 

56 return np.amax(np.abs(y-x), axis=-1) 

57 elif p == 1: 

58 return np.sum(np.abs(y-x), axis=-1) 

59 else: 

60 return np.sum(np.abs(y-x)**p, axis=-1) 

61 

62 

63def minkowski_distance(x, y, p=2): 

64 """Compute the L**p distance between two arrays. 

65 

66 The last dimensions of `x` and `y` must be the same length. Any 

67 other dimensions must be compatible for broadcasting. 

68 

69 Parameters 

70 ---------- 

71 x : (..., K) array_like 

72 Input array. 

73 y : (..., K) array_like 

74 Input array. 

75 p : float, 1 <= p <= infinity 

76 Which Minkowski p-norm to use. 

77 

78 Returns 

79 ------- 

80 dist : ndarray 

81 Distance between the input arrays. 

82 

83 Examples 

84 -------- 

85 >>> from scipy.spatial import minkowski_distance 

86 >>> minkowski_distance([[0, 0], [0, 0]], [[1, 1], [0, 1]]) 

87 array([ 1.41421356, 1. ]) 

88 

89 """ 

90 x = np.asarray(x) 

91 y = np.asarray(y) 

92 if p == np.inf or p == 1: 

93 return minkowski_distance_p(x, y, p) 

94 else: 

95 return minkowski_distance_p(x, y, p)**(1./p) 

96 

97 

98class Rectangle: 

99 """Hyperrectangle class. 

100 

101 Represents a Cartesian product of intervals. 

102 """ 

103 def __init__(self, maxes, mins): 

104 """Construct a hyperrectangle.""" 

105 self.maxes = np.maximum(maxes,mins).astype(float) 

106 self.mins = np.minimum(maxes,mins).astype(float) 

107 self.m, = self.maxes.shape 

108 

109 def __repr__(self): 

110 return "<Rectangle %s>" % list(zip(self.mins, self.maxes)) 

111 

112 def volume(self): 

113 """Total volume.""" 

114 return np.prod(self.maxes-self.mins) 

115 

116 def split(self, d, split): 

117 """Produce two hyperrectangles by splitting. 

118 

119 In general, if you need to compute maximum and minimum 

120 distances to the children, it can be done more efficiently 

121 by updating the maximum and minimum distances to the parent. 

122 

123 Parameters 

124 ---------- 

125 d : int 

126 Axis to split hyperrectangle along. 

127 split : float 

128 Position along axis `d` to split at. 

129 

130 """ 

131 mid = np.copy(self.maxes) 

132 mid[d] = split 

133 less = Rectangle(self.mins, mid) 

134 mid = np.copy(self.mins) 

135 mid[d] = split 

136 greater = Rectangle(mid, self.maxes) 

137 return less, greater 

138 

139 def min_distance_point(self, x, p=2.): 

140 """ 

141 Return the minimum distance between input and points in the 

142 hyperrectangle. 

143 

144 Parameters 

145 ---------- 

146 x : array_like 

147 Input. 

148 p : float, optional 

149 Input. 

150 

151 """ 

152 return minkowski_distance( 

153 0, np.maximum(0, np.maximum(self.mins-x, x-self.maxes)), 

154 p 

155 ) 

156 

157 def max_distance_point(self, x, p=2.): 

158 """ 

159 Return the maximum distance between input and points in the hyperrectangle. 

160 

161 Parameters 

162 ---------- 

163 x : array_like 

164 Input array. 

165 p : float, optional 

166 Input. 

167 

168 """ 

169 return minkowski_distance(0, np.maximum(self.maxes-x, x-self.mins), p) 

170 

171 def min_distance_rectangle(self, other, p=2.): 

172 """ 

173 Compute the minimum distance between points in the two hyperrectangles. 

174 

175 Parameters 

176 ---------- 

177 other : hyperrectangle 

178 Input. 

179 p : float 

180 Input. 

181 

182 """ 

183 return minkowski_distance( 

184 0, 

185 np.maximum(0, np.maximum(self.mins-other.maxes, 

186 other.mins-self.maxes)), 

187 p 

188 ) 

189 

190 def max_distance_rectangle(self, other, p=2.): 

191 """ 

192 Compute the maximum distance between points in the two hyperrectangles. 

193 

194 Parameters 

195 ---------- 

196 other : hyperrectangle 

197 Input. 

198 p : float, optional 

199 Input. 

200 

201 """ 

202 return minkowski_distance( 

203 0, np.maximum(self.maxes-other.mins, other.maxes-self.mins), p) 

204 

205 

206class KDTree(cKDTree): 

207 """kd-tree for quick nearest-neighbor lookup. 

208 

209 This class provides an index into a set of k-dimensional points 

210 which can be used to rapidly look up the nearest neighbors of any 

211 point. 

212 

213 Parameters 

214 ---------- 

215 data : array_like, shape (n,m) 

216 The n data points of dimension m to be indexed. This array is 

217 not copied unless this is necessary to produce a contiguous 

218 array of doubles, and so modifying this data will result in 

219 bogus results. The data are also copied if the kd-tree is built 

220 with copy_data=True. 

221 leafsize : positive int, optional 

222 The number of points at which the algorithm switches over to 

223 brute-force. Default: 10. 

224 compact_nodes : bool, optional 

225 If True, the kd-tree is built to shrink the hyperrectangles to 

226 the actual data range. This usually gives a more compact tree that 

227 is robust against degenerated input data and gives faster queries 

228 at the expense of longer build time. Default: True. 

229 copy_data : bool, optional 

230 If True the data is always copied to protect the kd-tree against 

231 data corruption. Default: False. 

232 balanced_tree : bool, optional 

233 If True, the median is used to split the hyperrectangles instead of 

234 the midpoint. This usually gives a more compact tree and 

235 faster queries at the expense of longer build time. Default: True. 

236 boxsize : array_like or scalar, optional 

237 Apply a m-d toroidal topology to the KDTree.. The topology is generated 

238 by :math:`x_i + n_i L_i` where :math:`n_i` are integers and :math:`L_i` 

239 is the boxsize along i-th dimension. The input data shall be wrapped 

240 into :math:`[0, L_i)`. A ValueError is raised if any of the data is 

241 outside of this bound. 

242 

243 Notes 

244 ----- 

245 The algorithm used is described in Maneewongvatana and Mount 1999. 

246 The general idea is that the kd-tree is a binary tree, each of whose 

247 nodes represents an axis-aligned hyperrectangle. Each node specifies 

248 an axis and splits the set of points based on whether their coordinate 

249 along that axis is greater than or less than a particular value. 

250 

251 During construction, the axis and splitting point are chosen by the 

252 "sliding midpoint" rule, which ensures that the cells do not all 

253 become long and thin. 

254 

255 The tree can be queried for the r closest neighbors of any given point 

256 (optionally returning only those within some maximum distance of the 

257 point). It can also be queried, with a substantial gain in efficiency, 

258 for the r approximate closest neighbors. 

259 

260 For large dimensions (20 is already large) do not expect this to run 

261 significantly faster than brute force. High-dimensional nearest-neighbor 

262 queries are a substantial open problem in computer science. 

263 

264 Attributes 

265 ---------- 

266 data : ndarray, shape (n,m) 

267 The n data points of dimension m to be indexed. This array is 

268 not copied unless this is necessary to produce a contiguous 

269 array of doubles. The data are also copied if the kd-tree is built 

270 with `copy_data=True`. 

271 leafsize : positive int 

272 The number of points at which the algorithm switches over to 

273 brute-force. 

274 m : int 

275 The dimension of a single data-point. 

276 n : int 

277 The number of data points. 

278 maxes : ndarray, shape (m,) 

279 The maximum value in each dimension of the n data points. 

280 mins : ndarray, shape (m,) 

281 The minimum value in each dimension of the n data points. 

282 size : int 

283 The number of nodes in the tree. 

284 

285 """ 

286 

287 class node: 

288 @staticmethod 

289 def _create(ckdtree_node=None): 

290 """Create either an inner or leaf node, wrapping a cKDTreeNode instance""" 

291 if ckdtree_node is None: 

292 return KDTree.node(ckdtree_node) 

293 elif ckdtree_node.split_dim == -1: 

294 return KDTree.leafnode(ckdtree_node) 

295 else: 

296 return KDTree.innernode(ckdtree_node) 

297 

298 def __init__(self, ckdtree_node=None): 

299 if ckdtree_node is None: 

300 ckdtree_node = cKDTreeNode() 

301 self._node = ckdtree_node 

302 

303 def __lt__(self, other): 

304 return id(self) < id(other) 

305 

306 def __gt__(self, other): 

307 return id(self) > id(other) 

308 

309 def __le__(self, other): 

310 return id(self) <= id(other) 

311 

312 def __ge__(self, other): 

313 return id(self) >= id(other) 

314 

315 def __eq__(self, other): 

316 return id(self) == id(other) 

317 

318 class leafnode(node): 

319 @property 

320 def idx(self): 

321 return self._node.indices 

322 

323 @property 

324 def children(self): 

325 return self._node.children 

326 

327 class innernode(node): 

328 def __init__(self, ckdtreenode): 

329 assert isinstance(ckdtreenode, cKDTreeNode) 

330 super().__init__(ckdtreenode) 

331 self.less = KDTree.node._create(ckdtreenode.lesser) 

332 self.greater = KDTree.node._create(ckdtreenode.greater) 

333 

334 @property 

335 def split_dim(self): 

336 return self._node.split_dim 

337 

338 @property 

339 def split(self): 

340 return self._node.split 

341 

342 @property 

343 def children(self): 

344 return self._node.children 

345 

346 @property 

347 def tree(self): 

348 if not hasattr(self, "_tree"): 

349 self._tree = KDTree.node._create(super().tree) 

350 

351 return self._tree 

352 

353 def __init__(self, data, leafsize=10, compact_nodes=True, copy_data=False, 

354 balanced_tree=True, boxsize=None): 

355 data = np.asarray(data) 

356 if data.dtype.kind == 'c': 

357 raise TypeError("KDTree does not work with complex data") 

358 

359 # Note KDTree has different default leafsize from cKDTree 

360 super().__init__(data, leafsize, compact_nodes, copy_data, 

361 balanced_tree, boxsize) 

362 

363 def query( 

364 self, x, k=1, eps=0, p=2, distance_upper_bound=np.inf, workers=1): 

365 r"""Query the kd-tree for nearest neighbors. 

366 

367 Parameters 

368 ---------- 

369 x : array_like, last dimension self.m 

370 An array of points to query. 

371 k : int or Sequence[int], optional 

372 Either the number of nearest neighbors to return, or a list of the 

373 k-th nearest neighbors to return, starting from 1. 

374 eps : nonnegative float, optional 

375 Return approximate nearest neighbors; the kth returned value 

376 is guaranteed to be no further than (1+eps) times the 

377 distance to the real kth nearest neighbor. 

378 p : float, 1<=p<=infinity, optional 

379 Which Minkowski p-norm to use. 

380 1 is the sum-of-absolute-values distance ("Manhattan" distance). 

381 2 is the usual Euclidean distance. 

382 infinity is the maximum-coordinate-difference distance. 

383 A large, finite p may cause a ValueError if overflow can occur. 

384 distance_upper_bound : nonnegative float, optional 

385 Return only neighbors within this distance. This is used to prune 

386 tree searches, so if you are doing a series of nearest-neighbor 

387 queries, it may help to supply the distance to the nearest neighbor 

388 of the most recent point. 

389 workers : int, optional 

390 Number of workers to use for parallel processing. If -1 is given 

391 all CPU threads are used. Default: 1. 

392 

393 .. versionadded:: 1.6.0 

394 

395 Returns 

396 ------- 

397 d : float or array of floats 

398 The distances to the nearest neighbors. 

399 If ``x`` has shape ``tuple+(self.m,)``, then ``d`` has shape 

400 ``tuple+(k,)``. 

401 When k == 1, the last dimension of the output is squeezed. 

402 Missing neighbors are indicated with infinite distances. 

403 Hits are sorted by distance (nearest first). 

404 

405 .. versionchanged:: 1.9.0 

406 Previously if ``k=None``, then `d` was an object array of 

407 shape ``tuple``, containing lists of distances. This behavior 

408 has been removed, use `query_ball_point` instead. 

409 

410 i : integer or array of integers 

411 The index of each neighbor in ``self.data``. 

412 ``i`` is the same shape as d. 

413 Missing neighbors are indicated with ``self.n``. 

414 

415 Examples 

416 -------- 

417 

418 >>> import numpy as np 

419 >>> from scipy.spatial import KDTree 

420 >>> x, y = np.mgrid[0:5, 2:8] 

421 >>> tree = KDTree(np.c_[x.ravel(), y.ravel()]) 

422 

423 To query the nearest neighbours and return squeezed result, use 

424 

425 >>> dd, ii = tree.query([[0, 0], [2.2, 2.9]], k=1) 

426 >>> print(dd, ii, sep='\n') 

427 [2. 0.2236068] 

428 [ 0 13] 

429 

430 To query the nearest neighbours and return unsqueezed result, use 

431 

432 >>> dd, ii = tree.query([[0, 0], [2.2, 2.9]], k=[1]) 

433 >>> print(dd, ii, sep='\n') 

434 [[2. ] 

435 [0.2236068]] 

436 [[ 0] 

437 [13]] 

438 

439 To query the second nearest neighbours and return unsqueezed result, 

440 use 

441 

442 >>> dd, ii = tree.query([[0, 0], [2.2, 2.9]], k=[2]) 

443 >>> print(dd, ii, sep='\n') 

444 [[2.23606798] 

445 [0.80622577]] 

446 [[ 6] 

447 [19]] 

448 

449 To query the first and second nearest neighbours, use 

450 

451 >>> dd, ii = tree.query([[0, 0], [2.2, 2.9]], k=2) 

452 >>> print(dd, ii, sep='\n') 

453 [[2. 2.23606798] 

454 [0.2236068 0.80622577]] 

455 [[ 0 6] 

456 [13 19]] 

457 

458 or, be more specific 

459 

460 >>> dd, ii = tree.query([[0, 0], [2.2, 2.9]], k=[1, 2]) 

461 >>> print(dd, ii, sep='\n') 

462 [[2. 2.23606798] 

463 [0.2236068 0.80622577]] 

464 [[ 0 6] 

465 [13 19]] 

466 

467 """ 

468 x = np.asarray(x) 

469 if x.dtype.kind == 'c': 

470 raise TypeError("KDTree does not work with complex data") 

471 

472 if k is None: 

473 raise ValueError("k must be an integer or a sequence of integers") 

474 

475 d, i = super().query(x, k, eps, p, distance_upper_bound, workers) 

476 if isinstance(i, int): 

477 i = np.intp(i) 

478 return d, i 

479 

480 def query_ball_point(self, x, r, p=2., eps=0, workers=1, 

481 return_sorted=None, return_length=False): 

482 """Find all points within distance r of point(s) x. 

483 

484 Parameters 

485 ---------- 

486 x : array_like, shape tuple + (self.m,) 

487 The point or points to search for neighbors of. 

488 r : array_like, float 

489 The radius of points to return, must broadcast to the length of x. 

490 p : float, optional 

491 Which Minkowski p-norm to use. Should be in the range [1, inf]. 

492 A finite large p may cause a ValueError if overflow can occur. 

493 eps : nonnegative float, optional 

494 Approximate search. Branches of the tree are not explored if their 

495 nearest points are further than ``r / (1 + eps)``, and branches are 

496 added in bulk if their furthest points are nearer than 

497 ``r * (1 + eps)``. 

498 workers : int, optional 

499 Number of jobs to schedule for parallel processing. If -1 is given 

500 all processors are used. Default: 1. 

501 

502 .. versionadded:: 1.6.0 

503 return_sorted : bool, optional 

504 Sorts returned indicies if True and does not sort them if False. If 

505 None, does not sort single point queries, but does sort 

506 multi-point queries which was the behavior before this option 

507 was added. 

508 

509 .. versionadded:: 1.6.0 

510 return_length : bool, optional 

511 Return the number of points inside the radius instead of a list 

512 of the indices. 

513 

514 .. versionadded:: 1.6.0 

515 

516 Returns 

517 ------- 

518 results : list or array of lists 

519 If `x` is a single point, returns a list of the indices of the 

520 neighbors of `x`. If `x` is an array of points, returns an object 

521 array of shape tuple containing lists of neighbors. 

522 

523 Notes 

524 ----- 

525 If you have many points whose neighbors you want to find, you may save 

526 substantial amounts of time by putting them in a KDTree and using 

527 query_ball_tree. 

528 

529 Examples 

530 -------- 

531 >>> import numpy as np 

532 >>> from scipy import spatial 

533 >>> x, y = np.mgrid[0:5, 0:5] 

534 >>> points = np.c_[x.ravel(), y.ravel()] 

535 >>> tree = spatial.KDTree(points) 

536 >>> sorted(tree.query_ball_point([2, 0], 1)) 

537 [5, 10, 11, 15] 

538 

539 Query multiple points and plot the results: 

540 

541 >>> import matplotlib.pyplot as plt 

542 >>> points = np.asarray(points) 

543 >>> plt.plot(points[:,0], points[:,1], '.') 

544 >>> for results in tree.query_ball_point(([2, 0], [3, 3]), 1): 

545 ... nearby_points = points[results] 

546 ... plt.plot(nearby_points[:,0], nearby_points[:,1], 'o') 

547 >>> plt.margins(0.1, 0.1) 

548 >>> plt.show() 

549 

550 """ 

551 x = np.asarray(x) 

552 if x.dtype.kind == 'c': 

553 raise TypeError("KDTree does not work with complex data") 

554 return super().query_ball_point( 

555 x, r, p, eps, workers, return_sorted, return_length) 

556 

557 def query_ball_tree(self, other, r, p=2., eps=0): 

558 """ 

559 Find all pairs of points between `self` and `other` whose distance is 

560 at most r. 

561 

562 Parameters 

563 ---------- 

564 other : KDTree instance 

565 The tree containing points to search against. 

566 r : float 

567 The maximum distance, has to be positive. 

568 p : float, optional 

569 Which Minkowski norm to use. `p` has to meet the condition 

570 ``1 <= p <= infinity``. 

571 eps : float, optional 

572 Approximate search. Branches of the tree are not explored 

573 if their nearest points are further than ``r/(1+eps)``, and 

574 branches are added in bulk if their furthest points are nearer 

575 than ``r * (1+eps)``. `eps` has to be non-negative. 

576 

577 Returns 

578 ------- 

579 results : list of lists 

580 For each element ``self.data[i]`` of this tree, ``results[i]`` is a 

581 list of the indices of its neighbors in ``other.data``. 

582 

583 Examples 

584 -------- 

585 You can search all pairs of points between two kd-trees within a distance: 

586 

587 >>> import matplotlib.pyplot as plt 

588 >>> import numpy as np 

589 >>> from scipy.spatial import KDTree 

590 >>> rng = np.random.default_rng() 

591 >>> points1 = rng.random((15, 2)) 

592 >>> points2 = rng.random((15, 2)) 

593 >>> plt.figure(figsize=(6, 6)) 

594 >>> plt.plot(points1[:, 0], points1[:, 1], "xk", markersize=14) 

595 >>> plt.plot(points2[:, 0], points2[:, 1], "og", markersize=14) 

596 >>> kd_tree1 = KDTree(points1) 

597 >>> kd_tree2 = KDTree(points2) 

598 >>> indexes = kd_tree1.query_ball_tree(kd_tree2, r=0.2) 

599 >>> for i in range(len(indexes)): 

600 ... for j in indexes[i]: 

601 ... plt.plot([points1[i, 0], points2[j, 0]], 

602 ... [points1[i, 1], points2[j, 1]], "-r") 

603 >>> plt.show() 

604 

605 """ 

606 return super().query_ball_tree(other, r, p, eps) 

607 

608 def query_pairs(self, r, p=2., eps=0, output_type='set'): 

609 """Find all pairs of points in `self` whose distance is at most r. 

610 

611 Parameters 

612 ---------- 

613 r : positive float 

614 The maximum distance. 

615 p : float, optional 

616 Which Minkowski norm to use. `p` has to meet the condition 

617 ``1 <= p <= infinity``. 

618 eps : float, optional 

619 Approximate search. Branches of the tree are not explored 

620 if their nearest points are further than ``r/(1+eps)``, and 

621 branches are added in bulk if their furthest points are nearer 

622 than ``r * (1+eps)``. `eps` has to be non-negative. 

623 output_type : string, optional 

624 Choose the output container, 'set' or 'ndarray'. Default: 'set' 

625 

626 .. versionadded:: 1.6.0 

627 

628 Returns 

629 ------- 

630 results : set or ndarray 

631 Set of pairs ``(i,j)``, with ``i < j``, for which the corresponding 

632 positions are close. If output_type is 'ndarray', an ndarry is 

633 returned instead of a set. 

634 

635 Examples 

636 -------- 

637 You can search all pairs of points in a kd-tree within a distance: 

638 

639 >>> import matplotlib.pyplot as plt 

640 >>> import numpy as np 

641 >>> from scipy.spatial import KDTree 

642 >>> rng = np.random.default_rng() 

643 >>> points = rng.random((20, 2)) 

644 >>> plt.figure(figsize=(6, 6)) 

645 >>> plt.plot(points[:, 0], points[:, 1], "xk", markersize=14) 

646 >>> kd_tree = KDTree(points) 

647 >>> pairs = kd_tree.query_pairs(r=0.2) 

648 >>> for (i, j) in pairs: 

649 ... plt.plot([points[i, 0], points[j, 0]], 

650 ... [points[i, 1], points[j, 1]], "-r") 

651 >>> plt.show() 

652 

653 """ 

654 return super().query_pairs(r, p, eps, output_type) 

655 

656 def count_neighbors(self, other, r, p=2., weights=None, cumulative=True): 

657 """Count how many nearby pairs can be formed. 

658 

659 Count the number of pairs ``(x1,x2)`` can be formed, with ``x1`` drawn 

660 from ``self`` and ``x2`` drawn from ``other``, and where 

661 ``distance(x1, x2, p) <= r``. 

662 

663 Data points on ``self`` and ``other`` are optionally weighted by the 

664 ``weights`` argument. (See below) 

665 

666 This is adapted from the "two-point correlation" algorithm described by 

667 Gray and Moore [1]_. See notes for further discussion. 

668 

669 Parameters 

670 ---------- 

671 other : KDTree 

672 The other tree to draw points from, can be the same tree as self. 

673 r : float or one-dimensional array of floats 

674 The radius to produce a count for. Multiple radii are searched with 

675 a single tree traversal. 

676 If the count is non-cumulative(``cumulative=False``), ``r`` defines 

677 the edges of the bins, and must be non-decreasing. 

678 p : float, optional 

679 1<=p<=infinity. 

680 Which Minkowski p-norm to use. 

681 Default 2.0. 

682 A finite large p may cause a ValueError if overflow can occur. 

683 weights : tuple, array_like, or None, optional 

684 If None, the pair-counting is unweighted. 

685 If given as a tuple, weights[0] is the weights of points in 

686 ``self``, and weights[1] is the weights of points in ``other``; 

687 either can be None to indicate the points are unweighted. 

688 If given as an array_like, weights is the weights of points in 

689 ``self`` and ``other``. For this to make sense, ``self`` and 

690 ``other`` must be the same tree. If ``self`` and ``other`` are two 

691 different trees, a ``ValueError`` is raised. 

692 Default: None 

693 

694 .. versionadded:: 1.6.0 

695 cumulative : bool, optional 

696 Whether the returned counts are cumulative. When cumulative is set 

697 to ``False`` the algorithm is optimized to work with a large number 

698 of bins (>10) specified by ``r``. When ``cumulative`` is set to 

699 True, the algorithm is optimized to work with a small number of 

700 ``r``. Default: True 

701 

702 .. versionadded:: 1.6.0 

703 

704 Returns 

705 ------- 

706 result : scalar or 1-D array 

707 The number of pairs. For unweighted counts, the result is integer. 

708 For weighted counts, the result is float. 

709 If cumulative is False, ``result[i]`` contains the counts with 

710 ``(-inf if i == 0 else r[i-1]) < R <= r[i]`` 

711 

712 Notes 

713 ----- 

714 Pair-counting is the basic operation used to calculate the two point 

715 correlation functions from a data set composed of position of objects. 

716 

717 Two point correlation function measures the clustering of objects and 

718 is widely used in cosmology to quantify the large scale structure 

719 in our Universe, but it may be useful for data analysis in other fields 

720 where self-similar assembly of objects also occur. 

721 

722 The Landy-Szalay estimator for the two point correlation function of 

723 ``D`` measures the clustering signal in ``D``. [2]_ 

724 

725 For example, given the position of two sets of objects, 

726 

727 - objects ``D`` (data) contains the clustering signal, and 

728 

729 - objects ``R`` (random) that contains no signal, 

730 

731 .. math:: 

732 

733 \\xi(r) = \\frac{<D, D> - 2 f <D, R> + f^2<R, R>}{f^2<R, R>}, 

734 

735 where the brackets represents counting pairs between two data sets 

736 in a finite bin around ``r`` (distance), corresponding to setting 

737 `cumulative=False`, and ``f = float(len(D)) / float(len(R))`` is the 

738 ratio between number of objects from data and random. 

739 

740 The algorithm implemented here is loosely based on the dual-tree 

741 algorithm described in [1]_. We switch between two different 

742 pair-cumulation scheme depending on the setting of ``cumulative``. 

743 The computing time of the method we use when for 

744 ``cumulative == False`` does not scale with the total number of bins. 

745 The algorithm for ``cumulative == True`` scales linearly with the 

746 number of bins, though it is slightly faster when only 

747 1 or 2 bins are used. [5]_. 

748 

749 As an extension to the naive pair-counting, 

750 weighted pair-counting counts the product of weights instead 

751 of number of pairs. 

752 Weighted pair-counting is used to estimate marked correlation functions 

753 ([3]_, section 2.2), 

754 or to properly calculate the average of data per distance bin 

755 (e.g. [4]_, section 2.1 on redshift). 

756 

757 .. [1] Gray and Moore, 

758 "N-body problems in statistical learning", 

759 Mining the sky, 2000, 

760 https://arxiv.org/abs/astro-ph/0012333 

761 

762 .. [2] Landy and Szalay, 

763 "Bias and variance of angular correlation functions", 

764 The Astrophysical Journal, 1993, 

765 http://adsabs.harvard.edu/abs/1993ApJ...412...64L 

766 

767 .. [3] Sheth, Connolly and Skibba, 

768 "Marked correlations in galaxy formation models", 

769 Arxiv e-print, 2005, 

770 https://arxiv.org/abs/astro-ph/0511773 

771 

772 .. [4] Hawkins, et al., 

773 "The 2dF Galaxy Redshift Survey: correlation functions, 

774 peculiar velocities and the matter density of the Universe", 

775 Monthly Notices of the Royal Astronomical Society, 2002, 

776 http://adsabs.harvard.edu/abs/2003MNRAS.346...78H 

777 

778 .. [5] https://github.com/scipy/scipy/pull/5647#issuecomment-168474926 

779 

780 Examples 

781 -------- 

782 You can count neighbors number between two kd-trees within a distance: 

783 

784 >>> import numpy as np 

785 >>> from scipy.spatial import KDTree 

786 >>> rng = np.random.default_rng() 

787 >>> points1 = rng.random((5, 2)) 

788 >>> points2 = rng.random((5, 2)) 

789 >>> kd_tree1 = KDTree(points1) 

790 >>> kd_tree2 = KDTree(points2) 

791 >>> kd_tree1.count_neighbors(kd_tree2, 0.2) 

792 1 

793 

794 This number is same as the total pair number calculated by 

795 `query_ball_tree`: 

796 

797 >>> indexes = kd_tree1.query_ball_tree(kd_tree2, r=0.2) 

798 >>> sum([len(i) for i in indexes]) 

799 1 

800 

801 """ 

802 return super().count_neighbors(other, r, p, weights, cumulative) 

803 

804 def sparse_distance_matrix( 

805 self, other, max_distance, p=2., output_type='dok_matrix'): 

806 """Compute a sparse distance matrix. 

807 

808 Computes a distance matrix between two KDTrees, leaving as zero 

809 any distance greater than max_distance. 

810 

811 Parameters 

812 ---------- 

813 other : KDTree 

814 

815 max_distance : positive float 

816 

817 p : float, 1<=p<=infinity 

818 Which Minkowski p-norm to use. 

819 A finite large p may cause a ValueError if overflow can occur. 

820 

821 output_type : string, optional 

822 Which container to use for output data. Options: 'dok_matrix', 

823 'coo_matrix', 'dict', or 'ndarray'. Default: 'dok_matrix'. 

824 

825 .. versionadded:: 1.6.0 

826 

827 Returns 

828 ------- 

829 result : dok_matrix, coo_matrix, dict or ndarray 

830 Sparse matrix representing the results in "dictionary of keys" 

831 format. If a dict is returned the keys are (i,j) tuples of indices. 

832 If output_type is 'ndarray' a record array with fields 'i', 'j', 

833 and 'v' is returned, 

834 

835 Examples 

836 -------- 

837 You can compute a sparse distance matrix between two kd-trees: 

838 

839 >>> import numpy as np 

840 >>> from scipy.spatial import KDTree 

841 >>> rng = np.random.default_rng() 

842 >>> points1 = rng.random((5, 2)) 

843 >>> points2 = rng.random((5, 2)) 

844 >>> kd_tree1 = KDTree(points1) 

845 >>> kd_tree2 = KDTree(points2) 

846 >>> sdm = kd_tree1.sparse_distance_matrix(kd_tree2, 0.3) 

847 >>> sdm.toarray() 

848 array([[0. , 0. , 0.12295571, 0. , 0. ], 

849 [0. , 0. , 0. , 0. , 0. ], 

850 [0.28942611, 0. , 0. , 0.2333084 , 0. ], 

851 [0. , 0. , 0. , 0. , 0. ], 

852 [0.24617575, 0.29571802, 0.26836782, 0. , 0. ]]) 

853 

854 You can check distances above the `max_distance` are zeros: 

855 

856 >>> from scipy.spatial import distance_matrix 

857 >>> distance_matrix(points1, points2) 

858 array([[0.56906522, 0.39923701, 0.12295571, 0.8658745 , 0.79428925], 

859 [0.37327919, 0.7225693 , 0.87665969, 0.32580855, 0.75679479], 

860 [0.28942611, 0.30088013, 0.6395831 , 0.2333084 , 0.33630734], 

861 [0.31994999, 0.72658602, 0.71124834, 0.55396483, 0.90785663], 

862 [0.24617575, 0.29571802, 0.26836782, 0.57714465, 0.6473269 ]]) 

863 

864 """ 

865 return super().sparse_distance_matrix( 

866 other, max_distance, p, output_type) 

867 

868 

869def distance_matrix(x, y, p=2, threshold=1000000): 

870 """Compute the distance matrix. 

871 

872 Returns the matrix of all pair-wise distances. 

873 

874 Parameters 

875 ---------- 

876 x : (M, K) array_like 

877 Matrix of M vectors in K dimensions. 

878 y : (N, K) array_like 

879 Matrix of N vectors in K dimensions. 

880 p : float, 1 <= p <= infinity 

881 Which Minkowski p-norm to use. 

882 threshold : positive int 

883 If ``M * N * K`` > `threshold`, algorithm uses a Python loop instead 

884 of large temporary arrays. 

885 

886 Returns 

887 ------- 

888 result : (M, N) ndarray 

889 Matrix containing the distance from every vector in `x` to every vector 

890 in `y`. 

891 

892 Examples 

893 -------- 

894 >>> from scipy.spatial import distance_matrix 

895 >>> distance_matrix([[0,0],[0,1]], [[1,0],[1,1]]) 

896 array([[ 1. , 1.41421356], 

897 [ 1.41421356, 1. ]]) 

898 

899 """ 

900 

901 x = np.asarray(x) 

902 m, k = x.shape 

903 y = np.asarray(y) 

904 n, kk = y.shape 

905 

906 if k != kk: 

907 raise ValueError(f"x contains {k}-dimensional vectors but y contains " 

908 f"{kk}-dimensional vectors") 

909 

910 if m*n*k <= threshold: 

911 return minkowski_distance(x[:,np.newaxis,:],y[np.newaxis,:,:],p) 

912 else: 

913 result = np.empty((m,n),dtype=float) # FIXME: figure out the best dtype 

914 if m < n: 

915 for i in range(m): 

916 result[i,:] = minkowski_distance(x[i],y,p) 

917 else: 

918 for j in range(n): 

919 result[:,j] = minkowski_distance(x,y[j],p) 

920 return result