/src/s2geometry/src/s2/s2closest_point_query_base.h
Line | Count | Source |
1 | | // Copyright 2017 Google Inc. All Rights Reserved. |
2 | | // |
3 | | // Licensed under the Apache License, Version 2.0 (the "License"); |
4 | | // you may not use this file except in compliance with the License. |
5 | | // You may obtain a copy of the License at |
6 | | // |
7 | | // http://www.apache.org/licenses/LICENSE-2.0 |
8 | | // |
9 | | // Unless required by applicable law or agreed to in writing, software |
10 | | // distributed under the License is distributed on an "AS-IS" BASIS, |
11 | | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | | // See the License for the specific language governing permissions and |
13 | | // limitations under the License. |
14 | | // |
15 | | |
16 | | // Author: ericv@google.com (Eric Veach) |
17 | | // |
18 | | // See S2ClosestPointQueryBase (defined below) for an overview. |
19 | | |
20 | | #ifndef S2_S2CLOSEST_POINT_QUERY_BASE_H_ |
21 | | #define S2_S2CLOSEST_POINT_QUERY_BASE_H_ |
22 | | |
23 | | #include <algorithm> |
24 | | #include <iterator> |
25 | | #include <limits> |
26 | | #include <queue> |
27 | | #include <vector> |
28 | | |
29 | | #include "absl/container/inlined_vector.h" |
30 | | #include "absl/log/absl_check.h" |
31 | | #include "absl/log/absl_log.h" |
32 | | #include "s2/_fp_contract_off.h" // IWYU pragma: keep |
33 | | #include "s2/s1chord_angle.h" |
34 | | #include "s2/s2cap.h" |
35 | | #include "s2/s2cell.h" |
36 | | #include "s2/s2cell_id.h" |
37 | | #include "s2/s2cell_union.h" |
38 | | #include "s2/s2distance_target.h" |
39 | | #include "s2/s2edge_distances.h" |
40 | | #include "s2/s2point.h" |
41 | | #include "s2/s2point_index.h" |
42 | | #include "s2/s2region.h" |
43 | | #include "s2/s2region_coverer.h" |
44 | | |
45 | | // Options that control the set of points returned. Note that by default |
46 | | // *all* points are returned, so you will always want to set either the |
47 | | // max_results() option or the max_distance() option (or both). |
48 | | // |
49 | | // This class is also available as S2ClosestPointQueryBase<Data>::Options. |
50 | | // (It is defined here to avoid depending on the "Data" template argument.) |
51 | | // |
52 | | // The Distance template argument is described below. |
53 | | template <class Distance> |
54 | | class S2ClosestPointQueryBaseOptions { |
55 | | public: |
56 | | using Delta = typename Distance::Delta; |
57 | | |
58 | | S2ClosestPointQueryBaseOptions(); |
59 | | |
60 | | // Specifies that at most "max_results" points should be returned. |
61 | | // |
62 | | // REQUIRES: max_results >= 1 |
63 | | // DEFAULT: numeric_limits<int>::max() |
64 | | int max_results() const; |
65 | | void set_max_results(int max_results); |
66 | | static constexpr int kMaxMaxResults = std::numeric_limits<int>::max(); |
67 | | |
68 | | // Specifies that only points whose distance to the target is less than |
69 | | // "max_distance" should be returned. |
70 | | // |
71 | | // Note that points whose distance is exactly equal to "max_distance" are |
72 | | // not returned. In most cases this doesn't matter (since distances are |
73 | | // not computed exactly in the first place), but if such points are needed |
74 | | // then you can retrieve them by specifying "max_distance" as the next |
75 | | // largest representable Distance. For example, if Distance is an |
76 | | // S1ChordAngle then you can specify max_distance.Successor(). |
77 | | // |
78 | | // DEFAULT: Distance::Infinity() |
79 | | Distance max_distance() const; |
80 | | void set_max_distance(Distance max_distance); |
81 | | |
82 | | // Specifies that points up to max_error() further away than the true |
83 | | // closest points may be substituted in the result set, as long as such |
84 | | // points satisfy all the remaining search criteria (such as max_distance). |
85 | | // This option only has an effect if max_results() is also specified; |
86 | | // otherwise all points closer than max_distance() will always be returned. |
87 | | // |
88 | | // Note that this does not affect how the distance between points is |
89 | | // computed; it simply gives the algorithm permission to stop the search |
90 | | // early as soon as the best possible improvement drops below max_error(). |
91 | | // |
92 | | // This can be used to implement distance predicates efficiently. For |
93 | | // example, to determine whether the minimum distance is less than D, the |
94 | | // IsDistanceLess() method sets max_results() == 1 and max_distance() == |
95 | | // max_error() == D. This causes the algorithm to terminate as soon as it |
96 | | // finds any point whose distance is less than D, rather than continuing to |
97 | | // search for a point that is even closer. |
98 | | // |
99 | | // DEFAULT: Distance::Delta::Zero() |
100 | | Delta max_error() const; |
101 | | void set_max_error(Delta max_error); |
102 | | |
103 | | // Specifies that points must be contained by the given S2Region. "region" |
104 | | // is owned by the caller and must persist during the lifetime of this |
105 | | // object. The value may be changed between calls to FindClosestPoints(), |
106 | | // or reset by calling set_region(nullptr). |
107 | | // |
108 | | // Note that if you want to set the region to a disc around a target point, |
109 | | // it is faster to use a PointTarget with set_max_distance() instead. You |
110 | | // can also call both methods, e.g. to set a maximum distance and also |
111 | | // require that points lie within a given rectangle. |
112 | | const S2Region* region() const; |
113 | | void set_region(const S2Region* region); |
114 | | |
115 | | // Specifies that distances should be computed by examining every point |
116 | | // rather than using the S2ShapeIndex. This is useful for testing, |
117 | | // benchmarking, and debugging. |
118 | | // |
119 | | // DEFAULT: false |
120 | | bool use_brute_force() const; |
121 | | void set_use_brute_force(bool use_brute_force); |
122 | | |
123 | | private: |
124 | | Distance max_distance_ = Distance::Infinity(); |
125 | | Delta max_error_ = Delta::Zero(); |
126 | | const S2Region* region_ = nullptr; |
127 | | int max_results_ = kMaxMaxResults; |
128 | | bool use_brute_force_ = false; |
129 | | }; |
130 | | |
131 | | // S2ClosestPointQueryBase is a templatized class for finding the closest |
132 | | // point(s) to a given target. It is not intended to be used directly, but |
133 | | // rather to serve as the implementation of various specialized classes with |
134 | | // more convenient APIs (such as S2ClosestPointQuery). It is flexible enough |
135 | | // so that it can be adapted to compute maximum distances and even potentially |
136 | | // Hausdorff distances. |
137 | | // |
138 | | // By using the appropriate options, this class can answer questions such as: |
139 | | // |
140 | | // - Find the minimum distance between a point collection A and a target B. |
141 | | // - Find all points in collection A that are within a distance D of target B. |
142 | | // - Find the k points of collection A that are closest to a given point P. |
143 | | // |
144 | | // The target is any class that implements the S2DistanceTarget interface. |
145 | | // There are predefined targets for points, edges, S2Cells, and S2ShapeIndexes |
146 | | // (arbitrary collctions of points, polylines, and polygons). |
147 | | // |
148 | | // The Distance template argument is used to represent distances. Usually it |
149 | | // is a thin wrapper around S1ChordAngle, but another distance type may be |
150 | | // used as long as it implements the Distance concept described in |
151 | | // s2distance_target.h. For example this can be used to measure maximum |
152 | | // distances, to get more accuracy, or to measure non-spheroidal distances. |
153 | | template <class Distance, class Data> |
154 | | class S2ClosestPointQueryBase { |
155 | | public: |
156 | | using Delta = typename Distance::Delta; |
157 | | using Index = S2PointIndex<Data>; |
158 | | using PointData = typename Index::PointData; |
159 | | using Options = S2ClosestPointQueryBaseOptions<Distance>; |
160 | | |
161 | | // The Target class represents the geometry to which the distance is |
162 | | // measured. For example, there can be subtypes for measuring the distance |
163 | | // to a point, an edge, or to an S2ShapeIndex (an arbitrary collection of |
164 | | // geometry). |
165 | | // |
166 | | // Implementations do *not* need to be thread-safe. They may cache data or |
167 | | // allocate temporary data structures in order to improve performance. |
168 | | using Target = S2DistanceTarget<Distance>; |
169 | | |
170 | | // Each "Result" object represents a closest point. |
171 | | class Result { |
172 | | public: |
173 | | // The default constructor creates an "empty" result, with a distance() of |
174 | | // Infinity() and non-dereferencable point() and data() values. |
175 | 0 | Result() = default; |
176 | | |
177 | | // Constructs a Result object for the given point. |
178 | | Result(Distance distance, const PointData* point_data) |
179 | 0 | : distance_(distance), point_data_(point_data) {} |
180 | | |
181 | | // Returns true if this Result object does not refer to any data point. |
182 | | // (The only case where an empty Result is returned is when the |
183 | | // FindClosestPoint() method does not find any points that meet the |
184 | | // specified criteria.) |
185 | 0 | bool is_empty() const { return point_data_ == nullptr; } |
186 | | |
187 | | // The distance from the target to this point. |
188 | 0 | Distance distance() const { return distance_; } |
189 | | |
190 | | // The point itself. |
191 | 0 | const S2Point& point() const { return point_data_->point(); } |
192 | | |
193 | | // The client-specified data associated with this point. |
194 | 0 | const Data& data() const { return point_data_->data(); } |
195 | | |
196 | | // Returns true if two Result objects are identical. |
197 | 0 | friend bool operator==(const Result& x, const Result& y) { |
198 | 0 | return (x.distance_ == y.distance_ && x.point_data_ == y.point_data_); |
199 | 0 | } |
200 | | |
201 | | // Compares two Result objects first by distance, then by point_data(). |
202 | 0 | friend bool operator<(const Result& x, const Result& y) { |
203 | 0 | if (x.distance_ < y.distance_) return true; |
204 | 0 | if (y.distance_ < x.distance_) return false; |
205 | 0 | return x.point_data_ < y.point_data_; |
206 | 0 | } |
207 | | |
208 | | private: |
209 | | Distance distance_ = Distance::Infinity(); |
210 | | const PointData* point_data_ = nullptr; |
211 | | }; |
212 | | |
213 | | // The minimum number of points that a cell must contain to enqueue it |
214 | | // rather than processing its contents immediately. |
215 | | static constexpr int kMinPointsToEnqueue = 13; |
216 | | |
217 | | // Default constructor; requires Init() to be called. |
218 | | S2ClosestPointQueryBase(); |
219 | | ~S2ClosestPointQueryBase(); |
220 | | |
221 | | // Convenience constructor that calls Init(). |
222 | | explicit S2ClosestPointQueryBase(const Index* index); |
223 | | |
224 | | // S2ClosestPointQueryBase is not copyable. |
225 | | S2ClosestPointQueryBase(const S2ClosestPointQueryBase&) = delete; |
226 | | void operator=(const S2ClosestPointQueryBase&) = delete; |
227 | | |
228 | | // Initializes the query. |
229 | | // REQUIRES: ReInit() must be called if "index" is modified. |
230 | | void Init(const Index* index); |
231 | | |
232 | | // Reinitializes the query. This method must be called whenever the |
233 | | // underlying index is modified. |
234 | | void ReInit(); |
235 | | |
236 | | // Return a reference to the underlying S2PointIndex. |
237 | | const Index& index() const; |
238 | | |
239 | | // Returns the closest points to the given target that satisfy the given |
240 | | // options. This method may be called multiple times. |
241 | | std::vector<Result> FindClosestPoints(Target* target, const Options& options); |
242 | | |
243 | | // This version can be more efficient when this method is called many times, |
244 | | // since it does not require allocating a new vector on each call. |
245 | | void FindClosestPoints(Target* target, const Options& options, |
246 | | std::vector<Result>* results); |
247 | | |
248 | | // Convenience method that returns exactly one point. If no points satisfy |
249 | | // the given search criteria, then a Result with distance() == Infinity() |
250 | | // and is_empty() == true is returned. |
251 | | // |
252 | | // REQUIRES: options.max_results() == 1 |
253 | | Result FindClosestPoint(Target* target, const Options& options); |
254 | | |
255 | | private: |
256 | | using Iterator = typename Index::Iterator; |
257 | | |
258 | 0 | const Options& options() const { return *options_; } |
259 | | void FindClosestPointsInternal(Target* target, const Options& options); |
260 | | void FindClosestPointsBruteForce(); |
261 | | void FindClosestPointsOptimized(); |
262 | | void InitQueue(); |
263 | | void InitCovering(); |
264 | | void AddInitialRange(S2CellId first_id, S2CellId last_id); |
265 | | void MaybeAddResult(const PointData* point_data); |
266 | | bool ProcessOrEnqueue(S2CellId id, Iterator* iter, bool seek); |
267 | | |
268 | | const Index* index_; |
269 | | const Options* options_; |
270 | | Target* target_; |
271 | | |
272 | | // True if max_error() must be subtracted from priority queue cell distances |
273 | | // in order to ensure that such distances are measured conservatively. This |
274 | | // is true only if the target takes advantage of max_error() in order to |
275 | | // return faster results, and 0 < max_error() < distance_limit_. |
276 | | bool use_conservative_cell_distance_; |
277 | | |
278 | | // For the optimized algorithm we precompute the top-level S2CellIds that |
279 | | // will be added to the priority queue. There can be at most 6 of these |
280 | | // cells. Essentially this is just a covering of the indexed points. |
281 | | std::vector<S2CellId> index_covering_; |
282 | | |
283 | | // The distance beyond which we can safely ignore further candidate points. |
284 | | // (Candidates that are exactly at the limit are ignored; this is more |
285 | | // efficient for UpdateMinDistance() and should not affect clients since |
286 | | // distance measurements have a small amount of error anyway.) |
287 | | // |
288 | | // Initially this is the same as the maximum distance specified by the user, |
289 | | // but it can also be updated by the algorithm (see MaybeAddResult). |
290 | | Distance distance_limit_; |
291 | | |
292 | | // The current result set is stored in one of three ways: |
293 | | // |
294 | | // - If max_results() == 1, the best result is kept in result_singleton_. |
295 | | // |
296 | | // - If max_results() == "infinity", results are appended to result_vector_ |
297 | | // and sorted/uniqued at the end. |
298 | | // |
299 | | // - Otherwise results are kept in a priority queue so that we can |
300 | | // progressively reduce the distance limit once max_results() results |
301 | | // have been found. |
302 | | Result result_singleton_; |
303 | | std::vector<Result> result_vector_; |
304 | | std::priority_queue<Result, absl::InlinedVector<Result, 16>> result_set_; |
305 | | |
306 | | // The algorithm maintains a priority queue of unprocessed S2CellIds, sorted |
307 | | // in increasing order of distance from the target. |
308 | | struct QueueEntry { |
309 | | // A lower bound on the distance from the target to "id". This is the key |
310 | | // of the priority queue. |
311 | | Distance distance; |
312 | | |
313 | | // The cell being queued. |
314 | | S2CellId id; |
315 | | |
316 | | QueueEntry(Distance _distance, S2CellId _id) |
317 | 0 | : distance(_distance), id(_id) {} |
318 | | |
319 | 0 | bool operator<(const QueueEntry& other) const { |
320 | | // The priority queue returns the largest elements first, so we want the |
321 | | // "largest" entry to have the smallest distance. |
322 | 0 | return other.distance < distance; |
323 | 0 | } |
324 | | }; |
325 | | using CellQueue = |
326 | | std::priority_queue<QueueEntry, absl::InlinedVector<QueueEntry, 16>>; |
327 | | CellQueue queue_; |
328 | | |
329 | | // Temporaries, defined here to avoid multiple allocations / initializations. |
330 | | |
331 | | Iterator iter_; |
332 | | std::vector<S2CellId> region_covering_; |
333 | | std::vector<S2CellId> max_distance_covering_; |
334 | | std::vector<S2CellId> intersection_with_region_; |
335 | | std::vector<S2CellId> intersection_with_max_distance_; |
336 | | const PointData* tmp_point_data_[kMinPointsToEnqueue - 1]; |
337 | | }; |
338 | | |
339 | | |
340 | | ////////////////// Implementation details follow //////////////////// |
341 | | |
342 | | template <class Distance> |
343 | | inline S2ClosestPointQueryBaseOptions< |
344 | 0 | Distance>::S2ClosestPointQueryBaseOptions() = default; |
345 | | |
346 | | template <class Distance> |
347 | 0 | inline int S2ClosestPointQueryBaseOptions<Distance>::max_results() const { |
348 | 0 | return max_results_; |
349 | 0 | } |
350 | | |
351 | | template <class Distance> |
352 | | inline void S2ClosestPointQueryBaseOptions<Distance>::set_max_results( |
353 | | int max_results) { |
354 | | ABSL_DCHECK_GE(max_results, 1); |
355 | | max_results_ = max_results; |
356 | | } |
357 | | |
358 | | template <class Distance> |
359 | | inline Distance S2ClosestPointQueryBaseOptions<Distance>::max_distance() |
360 | 0 | const { |
361 | 0 | return max_distance_; |
362 | 0 | } |
363 | | |
364 | | template <class Distance> |
365 | | inline void S2ClosestPointQueryBaseOptions<Distance>::set_max_distance( |
366 | 0 | Distance max_distance) { |
367 | 0 | max_distance_ = max_distance; |
368 | 0 | } |
369 | | |
370 | | template <class Distance> |
371 | | inline typename Distance::Delta |
372 | 0 | S2ClosestPointQueryBaseOptions<Distance>::max_error() const { |
373 | 0 | return max_error_; |
374 | 0 | } |
375 | | |
376 | | template <class Distance> |
377 | | inline void S2ClosestPointQueryBaseOptions<Distance>::set_max_error( |
378 | 0 | Delta max_error) { |
379 | 0 | max_error_ = max_error; |
380 | 0 | } |
381 | | |
382 | | template <class Distance> |
383 | | inline const S2Region* S2ClosestPointQueryBaseOptions<Distance>::region() |
384 | 0 | const { |
385 | 0 | return region_; |
386 | 0 | } |
387 | | |
388 | | template <class Distance> |
389 | | inline void S2ClosestPointQueryBaseOptions<Distance>::set_region( |
390 | | const S2Region* region) { |
391 | | region_ = region; |
392 | | } |
393 | | |
394 | | template <class Distance> |
395 | 0 | inline bool S2ClosestPointQueryBaseOptions<Distance>::use_brute_force() const { |
396 | 0 | return use_brute_force_; |
397 | 0 | } |
398 | | |
399 | | template <class Distance> |
400 | | inline void S2ClosestPointQueryBaseOptions<Distance>::set_use_brute_force( |
401 | | bool use_brute_force) { |
402 | | use_brute_force_ = use_brute_force; |
403 | | } |
404 | | |
405 | | template <class Distance, class Data> |
406 | 0 | S2ClosestPointQueryBase<Distance, Data>::S2ClosestPointQueryBase() = default; |
407 | | |
408 | | template <class Distance, class Data> |
409 | 0 | S2ClosestPointQueryBase<Distance, Data>::~S2ClosestPointQueryBase() { |
410 | | // Prevent inline destructor bloat by providing a definition. |
411 | 0 | } |
412 | | |
413 | | template <class Distance, class Data> |
414 | | inline S2ClosestPointQueryBase<Distance, Data>::S2ClosestPointQueryBase( |
415 | | const S2PointIndex<Data>* index) : S2ClosestPointQueryBase() { |
416 | | Init(index); |
417 | | } |
418 | | |
419 | | template <class Distance, class Data> |
420 | | void S2ClosestPointQueryBase<Distance, Data>::Init( |
421 | 0 | const S2PointIndex<Data>* index) { |
422 | 0 | index_ = index; |
423 | 0 | ReInit(); |
424 | 0 | } |
425 | | |
426 | | template <class Distance, class Data> |
427 | 0 | void S2ClosestPointQueryBase<Distance, Data>::ReInit() { |
428 | 0 | iter_.Init(index_); |
429 | 0 | index_covering_.clear(); |
430 | 0 | } |
431 | | |
432 | | template <class Distance, class Data> |
433 | | inline const S2PointIndex<Data>& |
434 | | S2ClosestPointQueryBase<Distance, Data>::index() const { |
435 | | return *index_; |
436 | | } |
437 | | |
438 | | template <class Distance, class Data> |
439 | | inline std::vector<typename S2ClosestPointQueryBase<Distance, Data>::Result> |
440 | | S2ClosestPointQueryBase<Distance, Data>::FindClosestPoints( |
441 | | Target* target, const Options& options) { |
442 | | std::vector<Result> results; |
443 | | FindClosestPoints(target, options, &results); |
444 | | return results; |
445 | | } |
446 | | |
447 | | template <class Distance, class Data> |
448 | | typename S2ClosestPointQueryBase<Distance, Data>::Result |
449 | | S2ClosestPointQueryBase<Distance, Data>::FindClosestPoint( |
450 | | Target* target, const Options& options) { |
451 | | ABSL_DCHECK_EQ(options.max_results(), 1); |
452 | | FindClosestPointsInternal(target, options); |
453 | | return result_singleton_; |
454 | | } |
455 | | |
456 | | template <class Distance, class Data> |
457 | | void S2ClosestPointQueryBase<Distance, Data>::FindClosestPoints( |
458 | 0 | Target* target, const Options& options, std::vector<Result>* results) { |
459 | 0 | FindClosestPointsInternal(target, options); |
460 | 0 | results->clear(); |
461 | 0 | if (options.max_results() == 1) { |
462 | 0 | if (!result_singleton_.is_empty()) { |
463 | 0 | results->push_back(result_singleton_); |
464 | 0 | } |
465 | 0 | } else if (options.max_results() == Options::kMaxMaxResults) { |
466 | 0 | std::sort(result_vector_.begin(), result_vector_.end()); |
467 | 0 | std::unique_copy(result_vector_.begin(), result_vector_.end(), |
468 | 0 | std::back_inserter(*results)); |
469 | 0 | result_vector_.clear(); |
470 | 0 | } else { |
471 | 0 | results->reserve(result_set_.size()); |
472 | 0 | for (; !result_set_.empty(); result_set_.pop()) { |
473 | 0 | results->push_back(result_set_.top()); |
474 | 0 | } |
475 | | // The priority queue returns the largest elements first. |
476 | 0 | std::reverse(results->begin(), results->end()); |
477 | 0 | ABSL_DCHECK(std::is_sorted(results->begin(), results->end())); |
478 | 0 | } |
479 | 0 | } |
480 | | |
481 | | template <class Distance, class Data> |
482 | | void S2ClosestPointQueryBase<Distance, Data>::FindClosestPointsInternal( |
483 | 0 | Target* target, const Options& options) { |
484 | 0 | target_ = target; |
485 | 0 | options_ = &options; |
486 | |
|
487 | 0 | distance_limit_ = options.max_distance(); |
488 | 0 | result_singleton_ = Result(); |
489 | 0 | ABSL_DCHECK(result_vector_.empty()); |
490 | 0 | ABSL_DCHECK(result_set_.empty()); |
491 | 0 | ABSL_DCHECK_GE(target->max_brute_force_index_size(), 0); |
492 | 0 | if (distance_limit_ == Distance::Zero()) return; |
493 | | |
494 | 0 | if (options.max_results() == Options::kMaxMaxResults && |
495 | 0 | options.max_distance() == Distance::Infinity() && |
496 | 0 | options.region() == nullptr) { |
497 | 0 | ABSL_LOG(WARNING) << "Returning all points " |
498 | 0 | "(max_results/max_distance/region not set)"; |
499 | 0 | } |
500 | | |
501 | | // If max_error() > 0 and the target takes advantage of this, then we may |
502 | | // need to adjust the distance estimates to the priority queue cells to |
503 | | // ensure that they are always a lower bound on the true distance. For |
504 | | // example, suppose max_distance == 100, max_error == 30, and we compute the |
505 | | // distance to the target from some cell C0 as d(C0) == 80. Then because |
506 | | // the target takes advantage of max_error(), the true distance could be as |
507 | | // low as 50. In order not to miss edges contained by such cells, we need |
508 | | // to subtract max_error() from the distance estimates. This behavior is |
509 | | // controlled by the use_conservative_cell_distance_ flag. |
510 | | // |
511 | | // However there is one important case where this adjustment is not |
512 | | // necessary, namely when max_distance() < max_error(). This is because |
513 | | // max_error() only affects the algorithm once at least max_results() edges |
514 | | // have been found that satisfy the given distance limit. At that point, |
515 | | // max_error() is subtracted from distance_limit_ in order to ensure that |
516 | | // any further matches are closer by at least that amount. But when |
517 | | // max_distance() < max_error(), this reduces the distance limit to 0, |
518 | | // i.e. all remaining candidate cells and edges can safely be discarded. |
519 | | // (Note that this is how IsDistanceLess() and friends are implemented.) |
520 | | // |
521 | | // Note that Distance::Delta only supports operator==. |
522 | 0 | bool target_uses_max_error = (!(options.max_error() == Delta::Zero()) && |
523 | 0 | target_->set_max_error(options.max_error())); |
524 | | |
525 | | // Note that we can't compare max_error() and distance_limit_ directly |
526 | | // because one is a Delta and one is a Distance. Instead we subtract them. |
527 | 0 | use_conservative_cell_distance_ = target_uses_max_error && |
528 | 0 | (distance_limit_ == Distance::Infinity() || |
529 | 0 | Distance::Zero() < distance_limit_ - options.max_error()); |
530 | | |
531 | | // Note that given point is processed only once (unlike S2ClosestEdgeQuery), |
532 | | // and therefore we don't need to worry about the possibility of having |
533 | | // duplicate points in the results. |
534 | 0 | if (options.use_brute_force() || |
535 | 0 | index_->num_points() <= target_->max_brute_force_index_size()) { |
536 | 0 | FindClosestPointsBruteForce(); |
537 | 0 | } else { |
538 | 0 | FindClosestPointsOptimized(); |
539 | 0 | } |
540 | 0 | } |
541 | | |
542 | | template <class Distance, class Data> |
543 | 0 | void S2ClosestPointQueryBase<Distance, Data>::FindClosestPointsBruteForce() { |
544 | 0 | for (iter_.Begin(); !iter_.done(); iter_.Next()) { |
545 | 0 | MaybeAddResult(&iter_.point_data()); |
546 | 0 | } |
547 | 0 | } |
548 | | |
549 | | template <class Distance, class Data> |
550 | 0 | void S2ClosestPointQueryBase<Distance, Data>::FindClosestPointsOptimized() { |
551 | 0 | InitQueue(); |
552 | 0 | while (!queue_.empty()) { |
553 | | // We need to copy the top entry before removing it, and we need to remove |
554 | | // it before adding any new entries to the queue. |
555 | 0 | QueueEntry entry = queue_.top(); |
556 | 0 | queue_.pop(); |
557 | 0 | if (!(entry.distance < distance_limit_)) { |
558 | 0 | queue_ = CellQueue(); // Clear any remaining entries. |
559 | 0 | break; |
560 | 0 | } |
561 | 0 | S2CellId child = entry.id.child_begin(); |
562 | | // We already know that it has too many points, so process its children. |
563 | | // Each child may either be processed directly or enqueued again. The |
564 | | // loop is optimized so that we don't seek unnecessarily. |
565 | 0 | bool seek = true; |
566 | 0 | for (int i = 0; i < 4; ++i, child = child.next()) { |
567 | 0 | seek = ProcessOrEnqueue(child, &iter_, seek); |
568 | 0 | } |
569 | 0 | } |
570 | 0 | } |
571 | | |
572 | | template <class Distance, class Data> |
573 | 0 | void S2ClosestPointQueryBase<Distance, Data>::InitQueue() { |
574 | 0 | ABSL_DCHECK(queue_.empty()); |
575 | | |
576 | | // Optimization: rather than starting with the entire index, see if we can |
577 | | // limit the search region to a small disc. Then we can find a covering for |
578 | | // that disc and intersect it with the covering for the index. This can |
579 | | // save a lot of work when the search region is small. |
580 | 0 | S2Cap cap = target_->GetCapBound(); |
581 | 0 | if (cap.is_empty()) return; // Empty target. |
582 | 0 | if (options().max_results() == 1) { |
583 | | // If the user is searching for just the closest point, we can compute an |
584 | | // upper bound on search radius by seeking to the center of the target's |
585 | | // bounding cap and looking at the adjacent index points (in S2CellId |
586 | | // order). The minimum distance to either of these points is an upper |
587 | | // bound on the search radius. |
588 | | // |
589 | | // TODO(ericv): The same strategy would also work for small values of |
590 | | // max_results() > 1, e.g. max_results() == 20, except that we would need to |
591 | | // examine more neighbors (at least 20, and preferably 20 in each |
592 | | // direction). It's not clear whether this is a common case, though, and |
593 | | // also this would require extending MaybeAddResult() so that it can |
594 | | // remove duplicate entries. (The points added here may be re-added by |
595 | | // ProcessOrEnqueue(), but this is okay when max_results() == 1.) |
596 | 0 | iter_.Seek(S2CellId(cap.center())); |
597 | 0 | if (!iter_.done()) { |
598 | 0 | MaybeAddResult(&iter_.point_data()); |
599 | 0 | } |
600 | 0 | if (iter_.Prev()) { |
601 | 0 | MaybeAddResult(&iter_.point_data()); |
602 | 0 | } |
603 | | // Skip the rest of the algorithm if we found a matching point. |
604 | 0 | if (distance_limit_ == Distance::Zero()) return; |
605 | 0 | } |
606 | | // We start with a covering of the set of indexed points, then intersect it |
607 | | // with the given region (if any) and maximum search radius disc (if any). |
608 | 0 | if (index_covering_.empty()) InitCovering(); |
609 | 0 | const std::vector<S2CellId>* initial_cells = &index_covering_; |
610 | 0 | if (options().region()) { |
611 | 0 | S2RegionCoverer coverer; |
612 | 0 | coverer.mutable_options()->set_max_cells(4); |
613 | 0 | coverer.GetCovering(*options().region(), ®ion_covering_); |
614 | 0 | S2CellUnion::GetIntersection(index_covering_, region_covering_, |
615 | 0 | &intersection_with_region_); |
616 | 0 | initial_cells = &intersection_with_region_; |
617 | 0 | } |
618 | 0 | if (distance_limit_ < Distance::Infinity()) { |
619 | 0 | S2RegionCoverer coverer; |
620 | 0 | coverer.mutable_options()->set_max_cells(4); |
621 | 0 | S1ChordAngle radius = cap.radius() + distance_limit_.GetChordAngleBound(); |
622 | 0 | S2Cap search_cap(cap.center(), radius); |
623 | 0 | coverer.GetFastCovering(search_cap, &max_distance_covering_); |
624 | 0 | S2CellUnion::GetIntersection(*initial_cells, max_distance_covering_, |
625 | 0 | &intersection_with_max_distance_); |
626 | 0 | initial_cells = &intersection_with_max_distance_; |
627 | 0 | } |
628 | 0 | iter_.Begin(); |
629 | 0 | for (size_t i = 0; i < initial_cells->size() && !iter_.done(); ++i) { |
630 | 0 | S2CellId id = (*initial_cells)[i]; |
631 | 0 | ProcessOrEnqueue(id, &iter_, id.range_min() > iter_.id() /*seek*/); |
632 | 0 | } |
633 | 0 | } |
634 | | |
635 | | template <class Distance, class Data> |
636 | 0 | void S2ClosestPointQueryBase<Distance, Data>::InitCovering() { |
637 | | // Compute the "index covering", which is a small number of S2CellIds that |
638 | | // cover the indexed points. There are two cases: |
639 | | // |
640 | | // - If the index spans more than one face, then there is one covering cell |
641 | | // per spanned face, just big enough to cover the index cells on that face. |
642 | | // |
643 | | // - If the index spans only one face, then we find the smallest cell "C" |
644 | | // that covers the index cells on that face (just like the case above). |
645 | | // Then for each of the 4 children of "C", if the child contains any index |
646 | | // cells then we create a covering cell that is big enough to just fit |
647 | | // those index cells (i.e., shrinking the child as much as possible to fit |
648 | | // its contents). This essentially replicates what would happen if we |
649 | | // started with "C" as the covering cell, since "C" would immediately be |
650 | | // split, except that we take the time to prune the children further since |
651 | | // this will save work on every subsequent query. |
652 | 0 | index_covering_.reserve(6); |
653 | 0 | iter_.Finish(); |
654 | 0 | if (!iter_.Prev()) return; // Empty index. |
655 | 0 | S2CellId index_last_id = iter_.id(); |
656 | 0 | iter_.Begin(); |
657 | 0 | if (iter_.id() != index_last_id) { |
658 | | // The index has at least two cells. Choose a level such that the entire |
659 | | // index can be spanned with at most 6 cells (if the index spans multiple |
660 | | // faces) or 4 cells (it the index spans a single face). |
661 | 0 | int level = iter_.id().GetCommonAncestorLevel(index_last_id) + 1; |
662 | | |
663 | | // Visit each potential covering cell except the last (handled below). |
664 | 0 | S2CellId last_id = index_last_id.parent(level); |
665 | 0 | for (S2CellId id = iter_.id().parent(level); |
666 | 0 | id != last_id; id = id.next()) { |
667 | | // Skip any covering cells that don't contain any index cells. |
668 | 0 | if (id.range_max() < iter_.id()) continue; |
669 | | |
670 | | // Find the range of index cells contained by this covering cell and |
671 | | // then shrink the cell if necessary so that it just covers them. |
672 | 0 | S2CellId cell_first_id = iter_.id(); |
673 | 0 | iter_.Seek(id.range_max().next()); |
674 | 0 | iter_.Prev(); |
675 | 0 | S2CellId cell_last_id = iter_.id(); |
676 | 0 | iter_.Next(); |
677 | 0 | AddInitialRange(cell_first_id, cell_last_id); |
678 | 0 | } |
679 | 0 | } |
680 | 0 | AddInitialRange(iter_.id(), index_last_id); |
681 | 0 | } |
682 | | |
683 | | // Adds a cell to index_covering_ that covers the given inclusive range. |
684 | | // |
685 | | // REQUIRES: "first" and "last" have a common ancestor. |
686 | | template <class Distance, class Data> |
687 | | void S2ClosestPointQueryBase<Distance, Data>::AddInitialRange( |
688 | 0 | S2CellId first_id, S2CellId last_id) { |
689 | | // Add the lowest common ancestor of the given range. |
690 | 0 | int level = first_id.GetCommonAncestorLevel(last_id); |
691 | 0 | ABSL_DCHECK_GE(level, 0); |
692 | 0 | index_covering_.push_back(first_id.parent(level)); |
693 | 0 | } |
694 | | |
695 | | template <class Distance, class Data> |
696 | | void S2ClosestPointQueryBase<Distance, Data>::MaybeAddResult( |
697 | 0 | const PointData* point_data) { |
698 | 0 | Distance distance = distance_limit_; |
699 | 0 | if (!target_->UpdateMinDistance(point_data->point(), &distance)) return; |
700 | | |
701 | 0 | const S2Region* region = options().region(); |
702 | 0 | if (region && !region->Contains(point_data->point())) return; |
703 | | |
704 | 0 | Result result(distance, point_data); |
705 | 0 | if (options().max_results() == 1) { |
706 | | // Optimization for the common case where only the closest point is wanted. |
707 | 0 | result_singleton_ = result; |
708 | 0 | distance_limit_ = result.distance() - options().max_error(); |
709 | 0 | } else if (options().max_results() == Options::kMaxMaxResults) { |
710 | 0 | result_vector_.push_back(result); // Sort/unique at end. |
711 | 0 | } else { |
712 | | // Add this point to result_set_. Note that with the current algorithm |
713 | | // each candidate point is considered at most once (except for one special |
714 | | // case where max_results() == 1, see InitQueue for details), so we don't |
715 | | // need to worry about possibly adding a duplicate entry here. |
716 | 0 | if (result_set_.size() >= static_cast<size_t>(options().max_results())) { |
717 | 0 | result_set_.pop(); // Replace the furthest result point. |
718 | 0 | } |
719 | 0 | result_set_.push(result); |
720 | 0 | if (result_set_.size() >= static_cast<size_t>(options().max_results())) { |
721 | 0 | distance_limit_ = result_set_.top().distance() - options().max_error(); |
722 | 0 | } |
723 | 0 | } |
724 | 0 | } |
725 | | |
726 | | // Either process the contents of the given cell immediately, or add it to the |
727 | | // queue to be subdivided. If "seek" is false, then "iter" must already be |
728 | | // positioned at the first indexed point within or after this cell. |
729 | | // |
730 | | // Returns "true" if the cell was added to the queue, and "false" if it was |
731 | | // processed immediately, in which case "iter" is left positioned at the next |
732 | | // cell in S2CellId order. |
733 | | template <class Distance, class Data> |
734 | | bool S2ClosestPointQueryBase<Distance, Data>::ProcessOrEnqueue( |
735 | 0 | S2CellId id, Iterator* iter, bool seek) { |
736 | 0 | if (seek) iter->Seek(id.range_min()); |
737 | 0 | if (id.is_leaf()) { |
738 | | // Leaf cells can't be subdivided. |
739 | 0 | for (; !iter->done() && iter->id() == id; iter->Next()) { |
740 | 0 | MaybeAddResult(&iter->point_data()); |
741 | 0 | } |
742 | 0 | return false; // No need to seek to next child. |
743 | 0 | } |
744 | 0 | S2CellId last = id.range_max(); |
745 | 0 | int num_points = 0; |
746 | 0 | for (; !iter->done() && iter->id() <= last; iter->Next()) { |
747 | 0 | if (num_points == kMinPointsToEnqueue - 1) { |
748 | | // This cell has too many points (including this one), so enqueue it. |
749 | 0 | S2Cell cell(id); |
750 | 0 | Distance distance = distance_limit_; |
751 | | // We check "region_" second because it may be relatively expensive. |
752 | 0 | if (target_->UpdateMinDistance(cell, &distance) && |
753 | 0 | (!options().region() || options().region()->MayIntersect(cell))) { |
754 | 0 | if (use_conservative_cell_distance_) { |
755 | | // Ensure that "distance" is a lower bound on distance to the cell. |
756 | 0 | distance = distance - options().max_error(); |
757 | 0 | } |
758 | 0 | queue_.push(QueueEntry(distance, id)); |
759 | 0 | } |
760 | 0 | return true; // Seek to next child. |
761 | 0 | } |
762 | 0 | tmp_point_data_[num_points++] = &iter->point_data(); |
763 | 0 | } |
764 | | // There were few enough points that we might as well process them now. |
765 | 0 | for (int i = 0; i < num_points; ++i) { |
766 | 0 | MaybeAddResult(tmp_point_data_[i]); |
767 | 0 | } |
768 | 0 | return false; // No need to seek to next child. |
769 | 0 | } |
770 | | |
771 | | #endif // S2_S2CLOSEST_POINT_QUERY_BASE_H_ |