/src/abseil-cpp/absl/log/internal/proto.h
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright 2020 The Abseil Authors. |
2 | | // |
3 | | // Licensed under the Apache License, Version 2.0 (the "License"); |
4 | | // you may not use this file except in compliance with the License. |
5 | | // You may obtain a copy of the License at |
6 | | // |
7 | | // https://www.apache.org/licenses/LICENSE-2.0 |
8 | | // |
9 | | // Unless required by applicable law or agreed to in writing, software |
10 | | // distributed under the License is distributed on an "AS IS" BASIS, |
11 | | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | | // See the License for the specific language governing permissions and |
13 | | // limitations under the License. |
14 | | |
15 | | // ----------------------------------------------------------------------------- |
16 | | // File: internal/proto.h |
17 | | // ----------------------------------------------------------------------------- |
18 | | // |
19 | | // Declares functions for serializing and deserializing data to and from memory |
20 | | // buffers in protocol buffer wire format. This library takes no steps to |
21 | | // ensure that the encoded data matches with any message specification. |
22 | | |
23 | | #ifndef ABSL_LOG_INTERNAL_PROTO_H_ |
24 | | #define ABSL_LOG_INTERNAL_PROTO_H_ |
25 | | |
26 | | #include <cstddef> |
27 | | #include <cstdint> |
28 | | #include <limits> |
29 | | |
30 | | #include "absl/base/attributes.h" |
31 | | #include "absl/base/casts.h" |
32 | | #include "absl/base/config.h" |
33 | | #include "absl/strings/string_view.h" |
34 | | #include "absl/types/span.h" |
35 | | |
36 | | namespace absl { |
37 | | ABSL_NAMESPACE_BEGIN |
38 | | namespace log_internal { |
39 | | |
40 | | // absl::Span<char> represents a view into the available space in a mutable |
41 | | // buffer during encoding. Encoding functions shrink the span as they go so |
42 | | // that the same view can be passed to a series of Encode functions. If the |
43 | | // data do not fit, nothing is encoded, the view is set to size zero (so that |
44 | | // all subsequent encode calls fail), and false is returned. Otherwise true is |
45 | | // returned. |
46 | | |
47 | | // In particular, attempting to encode a series of data into an insufficient |
48 | | // buffer has consistent and efficient behavior without any caller-side error |
49 | | // checking. Individual values will be encoded in their entirety or not at all |
50 | | // (unless one of the `Truncate` functions is used). Once a value is omitted |
51 | | // because it does not fit, no subsequent values will be encoded to preserve |
52 | | // ordering; the decoded sequence will be a prefix of the original sequence. |
53 | | |
54 | | // There are two ways to encode a message-typed field: |
55 | | // |
56 | | // * Construct its contents in a separate buffer and use `EncodeBytes` to copy |
57 | | // it into the primary buffer with type, tag, and length. |
58 | | // * Use `EncodeMessageStart` to write type and tag fields and reserve space for |
59 | | // the length field, then encode the contents directly into the buffer, then |
60 | | // use `EncodeMessageLength` to write the actual length into the reserved |
61 | | // bytes. This works fine if the actual length takes fewer bytes to encode |
62 | | // than were reserved, although you don't get your extra bytes back. |
63 | | // This approach will always produce a valid encoding, but your protocol may |
64 | | // require that the whole message field by omitted if the buffer is too small |
65 | | // to contain all desired subfields. In this case, operate on a copy of the |
66 | | // buffer view and assign back only if everything fit, i.e. if the last |
67 | | // `Encode` call returned true. |
68 | | |
69 | | // Encodes the specified integer as a varint field and returns true if it fits. |
70 | | // Used for int32_t, int64_t, uint32_t, uint64_t, bool, and enum field types. |
71 | | // Consumes up to kMaxVarintSize * 2 bytes (20). |
72 | | bool EncodeVarint(uint64_t tag, uint64_t value, absl::Span<char> *buf); |
73 | 0 | inline bool EncodeVarint(uint64_t tag, int64_t value, absl::Span<char> *buf) { |
74 | 0 | return EncodeVarint(tag, static_cast<uint64_t>(value), buf); |
75 | 0 | } |
76 | 0 | inline bool EncodeVarint(uint64_t tag, uint32_t value, absl::Span<char> *buf) { |
77 | 0 | return EncodeVarint(tag, static_cast<uint64_t>(value), buf); |
78 | 0 | } |
79 | 0 | inline bool EncodeVarint(uint64_t tag, int32_t value, absl::Span<char> *buf) { |
80 | 0 | return EncodeVarint(tag, static_cast<uint64_t>(value), buf); |
81 | 0 | } |
82 | | |
83 | | // Encodes the specified integer as a varint field using ZigZag encoding and |
84 | | // returns true if it fits. |
85 | | // Used for sint32 and sint64 field types. |
86 | | // Consumes up to kMaxVarintSize * 2 bytes (20). |
87 | | inline bool EncodeVarintZigZag(uint64_t tag, int64_t value, |
88 | 0 | absl::Span<char> *buf) { |
89 | 0 | if (value < 0) |
90 | 0 | return EncodeVarint(tag, 2 * static_cast<uint64_t>(-(value + 1)) + 1, buf); |
91 | 0 | return EncodeVarint(tag, 2 * static_cast<uint64_t>(value), buf); |
92 | 0 | } |
93 | | |
94 | | // Encodes the specified integer as a 64-bit field and returns true if it fits. |
95 | | // Used for fixed64 and sfixed64 field types. |
96 | | // Consumes up to kMaxVarintSize + 8 bytes (18). |
97 | | bool Encode64Bit(uint64_t tag, uint64_t value, absl::Span<char> *buf); |
98 | 0 | inline bool Encode64Bit(uint64_t tag, int64_t value, absl::Span<char> *buf) { |
99 | 0 | return Encode64Bit(tag, static_cast<uint64_t>(value), buf); |
100 | 0 | } |
101 | 0 | inline bool Encode64Bit(uint64_t tag, uint32_t value, absl::Span<char> *buf) { |
102 | 0 | return Encode64Bit(tag, static_cast<uint64_t>(value), buf); |
103 | 0 | } |
104 | 0 | inline bool Encode64Bit(uint64_t tag, int32_t value, absl::Span<char> *buf) { |
105 | 0 | return Encode64Bit(tag, static_cast<uint64_t>(value), buf); |
106 | 0 | } |
107 | | |
108 | | // Encodes the specified double as a 64-bit field and returns true if it fits. |
109 | | // Used for double field type. |
110 | | // Consumes up to kMaxVarintSize + 8 bytes (18). |
111 | 0 | inline bool EncodeDouble(uint64_t tag, double value, absl::Span<char> *buf) { |
112 | 0 | return Encode64Bit(tag, absl::bit_cast<uint64_t>(value), buf); |
113 | 0 | } |
114 | | |
115 | | // Encodes the specified integer as a 32-bit field and returns true if it fits. |
116 | | // Used for fixed32 and sfixed32 field types. |
117 | | // Consumes up to kMaxVarintSize + 4 bytes (14). |
118 | | bool Encode32Bit(uint64_t tag, uint32_t value, absl::Span<char> *buf); |
119 | 0 | inline bool Encode32Bit(uint64_t tag, int32_t value, absl::Span<char> *buf) { |
120 | 0 | return Encode32Bit(tag, static_cast<uint32_t>(value), buf); |
121 | 0 | } |
122 | | |
123 | | // Encodes the specified float as a 32-bit field and returns true if it fits. |
124 | | // Used for float field type. |
125 | | // Consumes up to kMaxVarintSize + 4 bytes (14). |
126 | 0 | inline bool EncodeFloat(uint64_t tag, float value, absl::Span<char> *buf) { |
127 | 0 | return Encode32Bit(tag, absl::bit_cast<uint32_t>(value), buf); |
128 | 0 | } |
129 | | |
130 | | // Encodes the specified bytes as a length-delimited field and returns true if |
131 | | // they fit. |
132 | | // Used for string, bytes, message, and packed-repeated field type. |
133 | | // Consumes up to kMaxVarintSize * 2 + value.size() bytes (20 + value.size()). |
134 | | bool EncodeBytes(uint64_t tag, absl::Span<const char> value, |
135 | | absl::Span<char> *buf); |
136 | | |
137 | | // Encodes as many of the specified bytes as will fit as a length-delimited |
138 | | // field and returns true as long as the field header (`tag_type` and `length`) |
139 | | // fits. |
140 | | // Used for string, bytes, message, and packed-repeated field type. |
141 | | // Consumes up to kMaxVarintSize * 2 + value.size() bytes (20 + value.size()). |
142 | | bool EncodeBytesTruncate(uint64_t tag, absl::Span<const char> value, |
143 | | absl::Span<char> *buf); |
144 | | |
145 | | // Encodes the specified string as a length-delimited field and returns true if |
146 | | // it fits. |
147 | | // Used for string, bytes, message, and packed-repeated field type. |
148 | | // Consumes up to kMaxVarintSize * 2 + value.size() bytes (20 + value.size()). |
149 | | inline bool EncodeString(uint64_t tag, absl::string_view value, |
150 | 0 | absl::Span<char> *buf) { |
151 | 0 | return EncodeBytes(tag, value, buf); |
152 | 0 | } |
153 | | |
154 | | // Encodes as much of the specified string as will fit as a length-delimited |
155 | | // field and returns true as long as the field header (`tag_type` and `length`) |
156 | | // fits. |
157 | | // Used for string, bytes, message, and packed-repeated field type. |
158 | | // Consumes up to kMaxVarintSize * 2 + value.size() bytes (20 + value.size()). |
159 | | inline bool EncodeStringTruncate(uint64_t tag, absl::string_view value, |
160 | 3.24M | absl::Span<char> *buf) { |
161 | 3.24M | return EncodeBytesTruncate(tag, value, buf); |
162 | 3.24M | } |
163 | | |
164 | | // Encodes the header for a length-delimited field containing up to `max_size` |
165 | | // bytes or the number remaining in the buffer, whichever is less. If the |
166 | | // header fits, a non-nullptr `Span` is returned; this must be passed to |
167 | | // `EncodeMessageLength` after all contents are encoded to finalize the length |
168 | | // field. If the header does not fit, a nullptr `Span` is returned which is |
169 | | // safe to pass to `EncodeMessageLength` but need not be. |
170 | | // Used for string, bytes, message, and packed-repeated field type. |
171 | | // Consumes up to kMaxVarintSize * 2 bytes (20). |
172 | | ABSL_MUST_USE_RESULT absl::Span<char> EncodeMessageStart(uint64_t tag, |
173 | | uint64_t max_size, |
174 | | absl::Span<char> *buf); |
175 | | |
176 | | // Finalizes the length field in `msg` so that it encompasses all data encoded |
177 | | // since the call to `EncodeMessageStart` which returned `msg`. Does nothing if |
178 | | // `msg` is a `nullptr` `Span`. |
179 | | void EncodeMessageLength(absl::Span<char> msg, const absl::Span<char> *buf); |
180 | | |
181 | | enum class WireType : uint64_t { |
182 | | kVarint = 0, |
183 | | k64Bit = 1, |
184 | | kLengthDelimited = 2, |
185 | | k32Bit = 5, |
186 | | }; |
187 | | |
188 | 97.1M | constexpr size_t VarintSize(uint64_t value) { |
189 | 97.1M | return value < 128 ? 1 : 1 + VarintSize(value >> 7); |
190 | 97.1M | } |
191 | 0 | constexpr size_t MinVarintSize() { |
192 | 0 | return VarintSize((std::numeric_limits<uint64_t>::min)()); |
193 | 0 | } |
194 | 6.48M | constexpr size_t MaxVarintSize() { |
195 | 6.48M | return VarintSize((std::numeric_limits<uint64_t>::max)()); |
196 | 6.48M | } |
197 | | |
198 | 0 | constexpr uint64_t MaxVarintForSize(size_t size) { |
199 | 0 | return size >= 10 ? (std::numeric_limits<uint64_t>::max)() |
200 | 0 | : (static_cast<uint64_t>(1) << size * 7) - 1; |
201 | 0 | } |
202 | | |
203 | | // `BufferSizeFor` returns a number of bytes guaranteed to be sufficient to |
204 | | // store encoded fields of the specified WireTypes regardless of tag numbers and |
205 | | // data values. This only makes sense for `WireType::kLengthDelimited` if you |
206 | | // add in the length of the contents yourself, e.g. for string and bytes fields |
207 | | // by adding the lengths of any encoded strings to the return value or for |
208 | | // submessage fields by enumerating the fields you may encode into their |
209 | | // contents. |
210 | 3.24M | constexpr size_t BufferSizeFor() { return 0; } |
211 | | template <typename... T> |
212 | 3.24M | constexpr size_t BufferSizeFor(WireType type, T... tail) { |
213 | | // tag_type + data + ... |
214 | 3.24M | return MaxVarintSize() + |
215 | 3.24M | (type == WireType::kVarint ? MaxVarintSize() : // |
216 | 3.24M | type == WireType::k64Bit ? 8 : // |
217 | 3.24M | type == WireType::k32Bit ? 4 : MaxVarintSize()) + // |
218 | 3.24M | BufferSizeFor(tail...); |
219 | 3.24M | } |
220 | | |
221 | | // absl::Span<const char> represents a view into the un-processed space in a |
222 | | // buffer during decoding. Decoding functions shrink the span as they go so |
223 | | // that the same view can be decoded iteratively until all data are processed. |
224 | | // In general, if the buffer is exhausted but additional bytes are expected by |
225 | | // the decoder, it will return values as if the additional bytes were zeros. |
226 | | // Length-delimited fields are an exception - if the encoded length field |
227 | | // indicates more data bytes than are available in the buffer, the `bytes_value` |
228 | | // and `string_value` accessors will return truncated views. |
229 | | |
230 | | class ProtoField final { |
231 | | public: |
232 | | // Consumes bytes from `data` and returns true if there were any bytes to |
233 | | // decode. |
234 | | bool DecodeFrom(absl::Span<const char> *data); |
235 | 12.9M | uint64_t tag() const { return tag_; } |
236 | 12.9M | WireType type() const { return type_; } |
237 | | |
238 | | // These value accessors will return nonsense if the data were not encoded in |
239 | | // the corresponding wiretype from the corresponding C++ (or other language) |
240 | | // type. |
241 | | |
242 | 0 | double double_value() const { return absl::bit_cast<double>(value_); } |
243 | 0 | float float_value() const { |
244 | 0 | return absl::bit_cast<float>(static_cast<uint32_t>(value_)); |
245 | 0 | } |
246 | 0 | int32_t int32_value() const { return static_cast<int32_t>(value_); } |
247 | 0 | int64_t int64_value() const { return static_cast<int64_t>(value_); } |
248 | 0 | int32_t sint32_value() const { |
249 | 0 | if (value_ % 2) return static_cast<int32_t>(0 - ((value_ - 1) / 2) - 1); |
250 | 0 | return static_cast<int32_t>(value_ / 2); |
251 | 0 | } |
252 | 0 | int64_t sint64_value() const { |
253 | 0 | if (value_ % 2) return 0 - ((value_ - 1) / 2) - 1; |
254 | 0 | return value_ / 2; |
255 | 0 | } |
256 | 0 | uint32_t uint32_value() const { return static_cast<uint32_t>(value_); } |
257 | 0 | uint64_t uint64_value() const { return value_; } |
258 | 0 | bool bool_value() const { return value_ != 0; } |
259 | | // To decode an enum, call int32_value() and cast to the appropriate type. |
260 | | // Note that the official C++ proto compiler treats enum fields with values |
261 | | // that do not correspond to a defined enumerator as unknown fields. |
262 | | |
263 | | // To decode fields within a submessage field, call |
264 | | // `DecodeNextField(field.BytesValue())`. |
265 | 19.4M | absl::Span<const char> bytes_value() const { return data_; } |
266 | 12.9M | absl::string_view string_value() const { |
267 | 12.9M | const auto data = bytes_value(); |
268 | 12.9M | return absl::string_view(data.data(), data.size()); |
269 | 12.9M | } |
270 | | // Returns the encoded length of a length-delimited field. This equals |
271 | | // `bytes_value().size()` except when the latter has been truncated due to |
272 | | // buffer underrun. |
273 | 0 | uint64_t encoded_length() const { return value_; } |
274 | | |
275 | | private: |
276 | | uint64_t tag_; |
277 | | WireType type_; |
278 | | // For `kTypeVarint`, `kType64Bit`, and `kType32Bit`, holds the decoded value. |
279 | | // For `kTypeLengthDelimited`, holds the decoded length. |
280 | | uint64_t value_; |
281 | | absl::Span<const char> data_; |
282 | | }; |
283 | | |
284 | | } // namespace log_internal |
285 | | ABSL_NAMESPACE_END |
286 | | } // namespace absl |
287 | | |
288 | | #endif // ABSL_LOG_INTERNAL_PROTO_H_ |