Coverage Report

Created: 2023-09-25 06:27

/src/s2geometry/src/s2/id_set_lexicon.h
Line
Count
Source (jump to first uncovered line)
1
// Copyright 2016 Google Inc. All Rights Reserved.
2
//
3
// Licensed under the Apache License, Version 2.0 (the "License");
4
// you may not use this file except in compliance with the License.
5
// You may obtain a copy of the License at
6
//
7
//     http://www.apache.org/licenses/LICENSE-2.0
8
//
9
// Unless required by applicable law or agreed to in writing, software
10
// distributed under the License is distributed on an "AS-IS" BASIS,
11
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
// See the License for the specific language governing permissions and
13
// limitations under the License.
14
//
15
16
// Author: ericv@google.com (Eric Veach)
17
18
#ifndef S2_ID_SET_LEXICON_H_
19
#define S2_ID_SET_LEXICON_H_
20
21
#include <cstddef>
22
23
#include <iterator>
24
#include <limits>
25
#include <vector>
26
27
#include "s2/base/integral_types.h"
28
#include "s2/base/logging.h"
29
#include "s2/sequence_lexicon.h"
30
31
// IdSetLexicon is a class for compactly representing sets of non-negative
32
// integers such as array indices ("id sets").  It is especially suitable when
33
// either (1) there are many duplicate sets, or (2) there are many singleton
34
// or empty sets.  See also ValueLexicon and SequenceLexicon.
35
//
36
// Each distinct id set is mapped to a 32-bit integer.  Empty and singleton
37
// sets take up no additional space whatsoever; the set itself is represented
38
// by the unique id assigned to the set. Sets of size 2 or more occupy about
39
// 11 bytes per set plus 4 bytes per element (as compared to 24 bytes per set
40
// plus 4 bytes per element for std::vector).  Duplicate sets are
41
// automatically eliminated.  Note also that id sets are referred to using
42
// 32-bit integers rather than 64-bit pointers.
43
//
44
// This class is especially useful in conjunction with ValueLexicon<T>.  For
45
// example, suppose that you want to label objects with a set of strings.  You
46
// could use a ValueLexicon<string> to map the strings to "label ids" (32-bit
47
// integers), and then use IdSetLexicon to map each set of labels to a "label
48
// set id".  Each reference to that label set then takes up only 4 bytes.
49
//
50
// Example usage:
51
//
52
//   ValueLexicon<string> labels_;
53
//   IdSetLexicon label_sets_;
54
//
55
//   int32 GetLabelSet(const vector<string>& label_strings) {
56
//     vector<int32> label_ids;
57
//     for (const auto& str : label_strings) {
58
//       label_ids.push_back(labels_.Add(str));
59
//     }
60
//     return label_sets_.Add(label_ids);
61
//   }
62
//
63
//   int label_set_id = GetLabelSet(...);
64
//   for (auto id : label_sets_.id_set(label_set_id)) {
65
//     S2_LOG(INFO) << id;
66
//   }
67
//
68
// This class is similar to SequenceLexicon, except:
69
//
70
// 1. Empty and singleton sets are represented implicitly; they use no space.
71
// 2. Sets are represented rather than sequences; values are reordered to be in
72
//    sorted order, and duplicates are removed.
73
// 3. The values must be 32-bit non-negative integers (only).
74
class IdSetLexicon {
75
 public:
76
  IdSetLexicon();
77
  ~IdSetLexicon();
78
79
  // IdSetLexicon is movable and copyable.
80
  IdSetLexicon(const IdSetLexicon&);
81
  IdSetLexicon& operator=(const IdSetLexicon&);
82
  IdSetLexicon(IdSetLexicon&&);
83
  IdSetLexicon& operator=(IdSetLexicon&&);
84
85
  // Clears all data from the lexicon.
86
  void Clear();
87
88
  // Add the given set of integers to the lexicon if it is not already
89
  // present, and return the unique id for this set.  "begin" and "end" are
90
  // forward iterators over a sequence of values that can be converted to
91
  // non-negative 32-bit integers.  The values are automatically sorted and
92
  // duplicates are removed.  Returns a signed integer representing this set.
93
  //
94
  // REQUIRES: All values in [begin, end) are non-negative 32-bit integers.
95
  template <class FwdIterator>
96
  int32 Add(FwdIterator begin, FwdIterator end);
97
98
  // Add the given set of integers to the lexicon if it is not already
99
  // present, and return the unique id for this set.  This is a convenience
100
  // method equivalent to Add(std::begin(container), std::end(container)).
101
  template <class Container>
102
  int32 Add(const Container& container);
103
104
  // Convenience method that returns the unique id for a singleton set.
105
  // Note that because singleton sets take up no space, this method is
106
  // const.  Equivalent to calling Add(&id, &id + 1).
107
  int32 AddSingleton(int32 id) const;
108
109
  // Convenience method that returns the unique id for the empty set.  Note
110
  // that because the empty set takes up no space and has a fixed id, this
111
  // method is static.  Equivalent to calling Add() with an empty container.
112
  static int32 EmptySetId();
113
114
  // Iterator type; please treat this as an opaque forward iterator.
115
  using Iterator = const int32*;
116
117
  // This class represents a set of integers stored in the IdSetLexicon.
118
  class IdSet {
119
   public:
120
    using value_type = const int32;
121
122
    Iterator begin() const;
123
    Iterator end() const;
124
    size_t size() const;
125
126
   private:
127
    friend class IdSetLexicon;
128
    IdSet();
129
    IdSet(Iterator begin, Iterator end);
130
    explicit IdSet(int32 singleton_id);
131
    Iterator begin_, end_;
132
    int32 singleton_id_;
133
  };
134
  // Return the set of integers corresponding to an id returned by Add().
135
  IdSet id_set(int32 set_id) const;
136
137
 private:
138
  // Choose kEmptySetId to be the last id that will ever be generated.
139
  // (Non-negative ids are reserved for singleton sets.)
140
  static constexpr int32 kEmptySetId = std::numeric_limits<int32>::min();
141
  int32 AddInternal(std::vector<int32>* ids);
142
143
  SequenceLexicon<int32> id_sets_;
144
145
  std::vector<int32> tmp_;  // temporary storage used during Add()
146
};
147
148
149
//////////////////   Implementation details follow   ////////////////////
150
151
152
0
inline IdSetLexicon::Iterator IdSetLexicon::IdSet::begin() const {
153
0
  return begin_;
154
0
}
155
156
0
inline IdSetLexicon::Iterator IdSetLexicon::IdSet::end() const {
157
0
  return end_;
158
0
}
159
160
0
inline size_t IdSetLexicon::IdSet::size() const {
161
0
  return end_ - begin_;
162
0
}
163
164
inline IdSetLexicon::IdSet::IdSet()
165
0
    : begin_(&singleton_id_), end_(begin_) {
166
0
}
167
168
inline IdSetLexicon::IdSet::IdSet(Iterator begin, Iterator end)
169
0
    : begin_(begin), end_(end) {
170
0
}
171
172
inline IdSetLexicon::IdSet::IdSet(int32 singleton_id)
173
    : begin_(&singleton_id_), end_(&singleton_id_ + 1),
174
0
      singleton_id_(singleton_id) {
175
0
}
176
177
0
inline int32 IdSetLexicon::AddSingleton(int32 id) const {
178
0
  S2_DCHECK_GE(id, 0);
179
0
  S2_DCHECK_LE(id, std::numeric_limits<int32>::max());
180
  // Singleton sets are represented by their element.
181
0
  return id;
182
0
}
183
184
0
/*static*/ inline int32 IdSetLexicon::EmptySetId() {
185
0
  return kEmptySetId;
186
0
}
187
188
template <class FwdIterator>
189
0
int32 IdSetLexicon::Add(FwdIterator begin, FwdIterator end) {
190
0
  tmp_.clear();
191
0
  for (; begin != end; ++begin) {
192
0
    S2_DCHECK_GE(*begin, 0);
193
0
    S2_DCHECK_LE(*begin, std::numeric_limits<int32>::max());
194
0
    tmp_.push_back(*begin);
195
0
  }
196
0
  return AddInternal(&tmp_);
197
0
}
198
199
template <class Container>
200
0
int32 IdSetLexicon::Add(const Container& container) {
201
0
  return Add(std::begin(container), std::end(container));
202
0
}
203
204
#endif  // S2_ID_SET_LEXICON_H_