/src/s2geometry/src/s2/encoded_string_vector.h
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright 2018 Google Inc. All Rights Reserved. |
2 | | // |
3 | | // Licensed under the Apache License, Version 2.0 (the "License"); |
4 | | // you may not use this file except in compliance with the License. |
5 | | // You may obtain a copy of the License at |
6 | | // |
7 | | // http://www.apache.org/licenses/LICENSE-2.0 |
8 | | // |
9 | | // Unless required by applicable law or agreed to in writing, software |
10 | | // distributed under the License is distributed on an "AS-IS" BASIS, |
11 | | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | | // See the License for the specific language governing permissions and |
13 | | // limitations under the License. |
14 | | // |
15 | | |
16 | | // Author: ericv@google.com (Eric Veach) |
17 | | |
18 | | #ifndef S2_ENCODED_STRING_VECTOR_H_ |
19 | | #define S2_ENCODED_STRING_VECTOR_H_ |
20 | | |
21 | | #include <cstddef> |
22 | | |
23 | | #include <cstdint> |
24 | | #include <memory> |
25 | | #include <string> |
26 | | #include <vector> |
27 | | |
28 | | #include "absl/strings/string_view.h" |
29 | | #include "absl/types/span.h" |
30 | | #include "s2/util/coding/coder.h" |
31 | | #include "s2/encoded_uint_vector.h" |
32 | | |
33 | | namespace s2coding { |
34 | | |
35 | | // This class allows an EncodedStringVector to be created by adding strings |
36 | | // incrementally. It also supports adding strings that are the output of |
37 | | // another Encoder. For example, to create a vector of encoded S2Polygons, |
38 | | // you can do this: |
39 | | // |
40 | | // void EncodePolygons(const vector<S2Polygon*>& polygons, Encoder* encoder) { |
41 | | // StringVectorEncoder encoded_polygons; |
42 | | // for (auto polygon : polygons) { |
43 | | // polygon->Encode(encoded_polygons.AddViaEncoder()); |
44 | | // } |
45 | | // encoded_polygons.Encode(encoder); |
46 | | // } |
47 | | class StringVectorEncoder { |
48 | | public: |
49 | | StringVectorEncoder(); |
50 | | |
51 | | // Adds a string to the encoded vector. |
52 | | void Add(absl::string_view str); |
53 | | |
54 | | // Adds a string to the encoded vector by means of the given Encoder. The |
55 | | // string consists of all output added to the encoder before the next call |
56 | | // to any method of this class (after which the encoder is no longer valid). |
57 | | Encoder* AddViaEncoder(); |
58 | | |
59 | | // Appends the EncodedStringVector representation to the given Encoder. |
60 | | // |
61 | | // REQUIRES: "encoder" uses the default constructor, so that its buffer |
62 | | // can be enlarged as necessary by calling Ensure(int). |
63 | | void Encode(Encoder* encoder); |
64 | | |
65 | | // Encodes a vector of strings in a format that can later be decoded as an |
66 | | // EncodedStringVector. |
67 | | // |
68 | | // REQUIRES: "encoder" uses the default constructor, so that its buffer |
69 | | // can be enlarged as necessary by calling Ensure(int). |
70 | | static void Encode(absl::Span<const std::string> v, Encoder* encoder); |
71 | | |
72 | | private: |
73 | | // A vector consisting of the starting offset of each string in the |
74 | | // encoder's data buffer, plus a final entry pointing just past the end of |
75 | | // the last string. |
76 | | std::vector<uint64_t> offsets_; |
77 | | Encoder data_; |
78 | | }; |
79 | | |
80 | | // This class represents an encoded vector of strings. Values are decoded |
81 | | // only when they are accessed. This allows for very fast initialization and |
82 | | // no additional memory use beyond the encoded data. The encoded data is not |
83 | | // owned by this class; typically it points into a large contiguous buffer |
84 | | // that contains other encoded data as well. |
85 | | // |
86 | | // This is one of several helper classes that allow complex data structures to |
87 | | // be initialized from an encoded format in constant time and then decoded on |
88 | | // demand. This can be a big performance advantage when only a small part of |
89 | | // the data structure is actually used. |
90 | | class EncodedStringVector { |
91 | | public: |
92 | | // Constructs an uninitialized object; requires Init() to be called. |
93 | | EncodedStringVector() = default; |
94 | | |
95 | | // Initializes the EncodedStringVector. Returns false on errors, leaving |
96 | | // the vector in an unspecified state. |
97 | | // |
98 | | // REQUIRES: The Decoder data buffer must outlive this object. |
99 | | bool Init(Decoder* decoder); |
100 | | |
101 | | // Resets the vector to be empty. |
102 | | void Clear(); |
103 | | |
104 | | // Returns the size of the original vector. |
105 | | size_t size() const; |
106 | | |
107 | | // Returns the string at the given index. |
108 | | absl::string_view operator[](size_t i) const; |
109 | | |
110 | | // Returns a Decoder initialized with the string at the given index. |
111 | | Decoder GetDecoder(size_t i) const; |
112 | | |
113 | | // Returns a pointer to the start of the string at the given index. This is |
114 | | // faster than operator[] but returns an unbounded string. |
115 | | const char* GetStart(size_t i) const; |
116 | | |
117 | | // Returns the entire vector of original strings. Requires that the |
118 | | // data buffer passed to the constructor persists until the result vector is |
119 | | // no longer needed. |
120 | | std::vector<absl::string_view> Decode() const; |
121 | | |
122 | | // Copies the encoded byte stream to a new encoder. |
123 | | void Encode(Encoder* encoder) const; |
124 | | |
125 | | private: |
126 | | EncodedUintVector<uint64_t> offsets_; |
127 | | const char* data_; |
128 | | }; |
129 | | |
130 | | |
131 | | ////////////////// Implementation details follow //////////////////// |
132 | | |
133 | 0 | inline void StringVectorEncoder::Add(absl::string_view str) { |
134 | 0 | offsets_.push_back(data_.length()); |
135 | 0 | data_.Ensure(str.size()); |
136 | 0 | data_.putn(str.data(), str.size()); |
137 | 0 | } |
138 | | |
139 | 0 | inline Encoder* StringVectorEncoder::AddViaEncoder() { |
140 | 0 | offsets_.push_back(data_.length()); |
141 | 0 | return &data_; |
142 | 0 | } |
143 | | |
144 | 0 | inline void EncodedStringVector::Clear() { |
145 | 0 | offsets_.Clear(); |
146 | 0 | data_ = nullptr; |
147 | 0 | } |
148 | | |
149 | 0 | inline size_t EncodedStringVector::size() const { |
150 | 0 | return offsets_.size(); |
151 | 0 | } |
152 | | |
153 | 0 | inline absl::string_view EncodedStringVector::operator[](size_t i) const { |
154 | 0 | uint64_t start = (i == 0) ? 0 : offsets_[i - 1]; |
155 | 0 | uint64_t limit = offsets_[i]; |
156 | 0 | return absl::string_view(data_ + start, limit - start); |
157 | 0 | } |
158 | | |
159 | 0 | inline Decoder EncodedStringVector::GetDecoder(size_t i) const { |
160 | | // Return an empty decoder if we don't have enough data. |
161 | 0 | if (i >= offsets_.size()) { |
162 | 0 | return Decoder(); |
163 | 0 | } |
164 | | |
165 | 0 | uint64_t start = (i == 0) ? 0 : offsets_[i - 1]; |
166 | 0 | uint64_t limit = offsets_[i]; |
167 | 0 | return Decoder(data_ + start, limit - start); |
168 | 0 | } |
169 | | |
170 | 0 | inline const char* EncodedStringVector::GetStart(size_t i) const { |
171 | 0 | uint64_t start = (i == 0) ? 0 : offsets_[i - 1]; |
172 | 0 | return data_ + start; |
173 | 0 | } |
174 | | |
175 | | } // namespace s2coding |
176 | | |
177 | | #endif // S2_ENCODED_STRING_VECTOR_H_ |