/src/s2geometry/src/s2/id_set_lexicon.h
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright 2016 Google Inc. All Rights Reserved. |
2 | | // |
3 | | // Licensed under the Apache License, Version 2.0 (the "License"); |
4 | | // you may not use this file except in compliance with the License. |
5 | | // You may obtain a copy of the License at |
6 | | // |
7 | | // http://www.apache.org/licenses/LICENSE-2.0 |
8 | | // |
9 | | // Unless required by applicable law or agreed to in writing, software |
10 | | // distributed under the License is distributed on an "AS-IS" BASIS, |
11 | | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | | // See the License for the specific language governing permissions and |
13 | | // limitations under the License. |
14 | | // |
15 | | |
16 | | // Author: ericv@google.com (Eric Veach) |
17 | | |
18 | | #ifndef S2_ID_SET_LEXICON_H_ |
19 | | #define S2_ID_SET_LEXICON_H_ |
20 | | |
21 | | #include <cstddef> |
22 | | |
23 | | #include <iterator> |
24 | | #include <limits> |
25 | | #include <vector> |
26 | | |
27 | | #include "s2/base/integral_types.h" |
28 | | #include "s2/base/logging.h" |
29 | | #include "s2/sequence_lexicon.h" |
30 | | |
31 | | // IdSetLexicon is a class for compactly representing sets of non-negative |
32 | | // integers such as array indices ("id sets"). It is especially suitable when |
33 | | // either (1) there are many duplicate sets, or (2) there are many singleton |
34 | | // or empty sets. See also ValueLexicon and SequenceLexicon. |
35 | | // |
36 | | // Each distinct id set is mapped to a 32-bit integer. Empty and singleton |
37 | | // sets take up no additional space whatsoever; the set itself is represented |
38 | | // by the unique id assigned to the set. Sets of size 2 or more occupy about |
39 | | // 11 bytes per set plus 4 bytes per element (as compared to 24 bytes per set |
40 | | // plus 4 bytes per element for std::vector). Duplicate sets are |
41 | | // automatically eliminated. Note also that id sets are referred to using |
42 | | // 32-bit integers rather than 64-bit pointers. |
43 | | // |
44 | | // This class is especially useful in conjunction with ValueLexicon<T>. For |
45 | | // example, suppose that you want to label objects with a set of strings. You |
46 | | // could use a ValueLexicon<string> to map the strings to "label ids" (32-bit |
47 | | // integers), and then use IdSetLexicon to map each set of labels to a "label |
48 | | // set id". Each reference to that label set then takes up only 4 bytes. |
49 | | // |
50 | | // Example usage: |
51 | | // |
52 | | // ValueLexicon<string> labels_; |
53 | | // IdSetLexicon label_sets_; |
54 | | // |
55 | | // int32 GetLabelSet(const vector<string>& label_strings) { |
56 | | // vector<int32> label_ids; |
57 | | // for (const auto& str : label_strings) { |
58 | | // label_ids.push_back(labels_.Add(str)); |
59 | | // } |
60 | | // return label_sets_.Add(label_ids); |
61 | | // } |
62 | | // |
63 | | // int label_set_id = GetLabelSet(...); |
64 | | // for (auto id : label_sets_.id_set(label_set_id)) { |
65 | | // S2_LOG(INFO) << id; |
66 | | // } |
67 | | // |
68 | | // This class is similar to SequenceLexicon, except: |
69 | | // |
70 | | // 1. Empty and singleton sets are represented implicitly; they use no space. |
71 | | // 2. Sets are represented rather than sequences; values are reordered to be in |
72 | | // sorted order, and duplicates are removed. |
73 | | // 3. The values must be 32-bit non-negative integers (only). |
74 | | class IdSetLexicon { |
75 | | public: |
76 | | IdSetLexicon(); |
77 | | ~IdSetLexicon(); |
78 | | |
79 | | // IdSetLexicon is movable and copyable. |
80 | | IdSetLexicon(const IdSetLexicon&); |
81 | | IdSetLexicon& operator=(const IdSetLexicon&); |
82 | | IdSetLexicon(IdSetLexicon&&); |
83 | | IdSetLexicon& operator=(IdSetLexicon&&); |
84 | | |
85 | | // Clears all data from the lexicon. |
86 | | void Clear(); |
87 | | |
88 | | // Add the given set of integers to the lexicon if it is not already |
89 | | // present, and return the unique id for this set. "begin" and "end" are |
90 | | // forward iterators over a sequence of values that can be converted to |
91 | | // non-negative 32-bit integers. The values are automatically sorted and |
92 | | // duplicates are removed. Returns a signed integer representing this set. |
93 | | // |
94 | | // REQUIRES: All values in [begin, end) are non-negative 32-bit integers. |
95 | | template <class FwdIterator> |
96 | | int32 Add(FwdIterator begin, FwdIterator end); |
97 | | |
98 | | // Add the given set of integers to the lexicon if it is not already |
99 | | // present, and return the unique id for this set. This is a convenience |
100 | | // method equivalent to Add(std::begin(container), std::end(container)). |
101 | | template <class Container> |
102 | | int32 Add(const Container& container); |
103 | | |
104 | | // Convenience method that returns the unique id for a singleton set. |
105 | | // Note that because singleton sets take up no space, this method is |
106 | | // const. Equivalent to calling Add(&id, &id + 1). |
107 | | int32 AddSingleton(int32 id) const; |
108 | | |
109 | | // Convenience method that returns the unique id for the empty set. Note |
110 | | // that because the empty set takes up no space and has a fixed id, this |
111 | | // method is static. Equivalent to calling Add() with an empty container. |
112 | | static int32 EmptySetId(); |
113 | | |
114 | | // Iterator type; please treat this as an opaque forward iterator. |
115 | | using Iterator = const int32*; |
116 | | |
117 | | // This class represents a set of integers stored in the IdSetLexicon. |
118 | | class IdSet { |
119 | | public: |
120 | | using value_type = const int32; |
121 | | |
122 | | Iterator begin() const; |
123 | | Iterator end() const; |
124 | | size_t size() const; |
125 | | |
126 | | private: |
127 | | friend class IdSetLexicon; |
128 | | IdSet(); |
129 | | IdSet(Iterator begin, Iterator end); |
130 | | explicit IdSet(int32 singleton_id); |
131 | | Iterator begin_, end_; |
132 | | int32 singleton_id_; |
133 | | }; |
134 | | // Return the set of integers corresponding to an id returned by Add(). |
135 | | IdSet id_set(int32 set_id) const; |
136 | | |
137 | | private: |
138 | | // Choose kEmptySetId to be the last id that will ever be generated. |
139 | | // (Non-negative ids are reserved for singleton sets.) |
140 | | static constexpr int32 kEmptySetId = std::numeric_limits<int32>::min(); |
141 | | int32 AddInternal(std::vector<int32>* ids); |
142 | | |
143 | | SequenceLexicon<int32> id_sets_; |
144 | | |
145 | | std::vector<int32> tmp_; // temporary storage used during Add() |
146 | | }; |
147 | | |
148 | | |
149 | | ////////////////// Implementation details follow //////////////////// |
150 | | |
151 | | |
152 | 0 | inline IdSetLexicon::Iterator IdSetLexicon::IdSet::begin() const { |
153 | 0 | return begin_; |
154 | 0 | } |
155 | | |
156 | 0 | inline IdSetLexicon::Iterator IdSetLexicon::IdSet::end() const { |
157 | 0 | return end_; |
158 | 0 | } |
159 | | |
160 | 0 | inline size_t IdSetLexicon::IdSet::size() const { |
161 | 0 | return end_ - begin_; |
162 | 0 | } |
163 | | |
164 | | inline IdSetLexicon::IdSet::IdSet() |
165 | 0 | : begin_(&singleton_id_), end_(begin_) { |
166 | 0 | } |
167 | | |
168 | | inline IdSetLexicon::IdSet::IdSet(Iterator begin, Iterator end) |
169 | 0 | : begin_(begin), end_(end) { |
170 | 0 | } |
171 | | |
172 | | inline IdSetLexicon::IdSet::IdSet(int32 singleton_id) |
173 | | : begin_(&singleton_id_), end_(&singleton_id_ + 1), |
174 | 0 | singleton_id_(singleton_id) { |
175 | 0 | } |
176 | | |
177 | 0 | inline int32 IdSetLexicon::AddSingleton(int32 id) const { |
178 | 0 | S2_DCHECK_GE(id, 0); |
179 | 0 | S2_DCHECK_LE(id, std::numeric_limits<int32>::max()); |
180 | | // Singleton sets are represented by their element. |
181 | 0 | return id; |
182 | 0 | } |
183 | | |
184 | 0 | /*static*/ inline int32 IdSetLexicon::EmptySetId() { |
185 | 0 | return kEmptySetId; |
186 | 0 | } |
187 | | |
188 | | template <class FwdIterator> |
189 | 0 | int32 IdSetLexicon::Add(FwdIterator begin, FwdIterator end) { |
190 | 0 | tmp_.clear(); |
191 | 0 | for (; begin != end; ++begin) { |
192 | 0 | S2_DCHECK_GE(*begin, 0); |
193 | 0 | S2_DCHECK_LE(*begin, std::numeric_limits<int32>::max()); |
194 | 0 | tmp_.push_back(*begin); |
195 | 0 | } |
196 | 0 | return AddInternal(&tmp_); |
197 | 0 | } |
198 | | |
199 | | template <class Container> |
200 | 0 | int32 IdSetLexicon::Add(const Container& container) { |
201 | 0 | return Add(std::begin(container), std::end(container)); |
202 | 0 | } |
203 | | |
204 | | #endif // S2_ID_SET_LEXICON_H_ |