Line data Source code
1 : #if defined(ENVOY_ENABLE_FULL_PROTOS)
2 : #include "source/common/protobuf/deterministic_hash.h"
3 :
4 : #include "source/common/common/assert.h"
5 : #include "source/common/common/hash.h"
6 :
7 : namespace Envoy {
8 : namespace DeterministicProtoHash {
9 : namespace {
10 :
11 : // Get a scalar field from protobuf reflection field definition. The return
12 : // type must be specified by the caller. Every implementation is a specialization
13 : // because the reflection interface did separate named functions instead of a
14 : // template.
15 : template <typename T>
16 : T reflectionGet(const Protobuf::Reflection& reflection, const Protobuf::Message& message,
17 : const Protobuf::FieldDescriptor& field);
18 :
19 : template <>
20 : uint32_t reflectionGet(const Protobuf::Reflection& reflection, const Protobuf::Message& message,
21 0 : const Protobuf::FieldDescriptor& field) {
22 0 : return reflection.GetUInt32(message, &field);
23 0 : }
24 :
25 : template <>
26 : int32_t reflectionGet(const Protobuf::Reflection& reflection, const Protobuf::Message& message,
27 0 : const Protobuf::FieldDescriptor& field) {
28 0 : return reflection.GetInt32(message, &field);
29 0 : }
30 :
31 : template <>
32 : uint64_t reflectionGet(const Protobuf::Reflection& reflection, const Protobuf::Message& message,
33 0 : const Protobuf::FieldDescriptor& field) {
34 0 : return reflection.GetUInt64(message, &field);
35 0 : }
36 :
37 : template <>
38 : int64_t reflectionGet(const Protobuf::Reflection& reflection, const Protobuf::Message& message,
39 0 : const Protobuf::FieldDescriptor& field) {
40 0 : return reflection.GetInt64(message, &field);
41 0 : }
42 :
43 : template <>
44 : float reflectionGet(const Protobuf::Reflection& reflection, const Protobuf::Message& message,
45 0 : const Protobuf::FieldDescriptor& field) {
46 0 : return reflection.GetFloat(message, &field);
47 0 : }
48 :
49 : template <>
50 : double reflectionGet(const Protobuf::Reflection& reflection, const Protobuf::Message& message,
51 0 : const Protobuf::FieldDescriptor& field) {
52 0 : return reflection.GetDouble(message, &field);
53 0 : }
54 :
55 : template <>
56 : bool reflectionGet(const Protobuf::Reflection& reflection, const Protobuf::Message& message,
57 0 : const Protobuf::FieldDescriptor& field) {
58 0 : return reflection.GetBool(message, &field);
59 0 : }
60 :
61 : // Takes a field of scalar type, and hashes it. In case the field is a repeated field,
62 : // the function hashes each of its elements.
63 : template <typename T, std::enable_if_t<std::is_scalar_v<T>, bool> = true>
64 : uint64_t hashScalarField(const Protobuf::Reflection& reflection, const Protobuf::Message& message,
65 0 : const Protobuf::FieldDescriptor& field, uint64_t seed) {
66 0 : if (field.is_repeated()) {
67 0 : for (const T& scalar : reflection.GetRepeatedFieldRef<T>(message, &field)) {
68 0 : seed = HashUtil::xxHash64Value(scalar, seed);
69 0 : }
70 0 : } else {
71 0 : seed = HashUtil::xxHash64Value(reflectionGet<T>(reflection, message, field), seed);
72 0 : }
73 0 : return seed;
74 0 : }
75 :
76 : uint64_t reflectionHashMessage(const Protobuf::Message& message, uint64_t seed = 0);
77 : uint64_t reflectionHashField(const Protobuf::Message& message,
78 : const Protobuf::FieldDescriptor& field, uint64_t seed);
79 :
80 : // To make a map serialize deterministically we need to ignore the order of
81 : // the map fields. To do that, we simply combine the hashes of each entry
82 : // using an unordered operator (addition), and then apply that combined hash to
83 : // the seed.
84 : uint64_t reflectionHashMapField(const Protobuf::Message& message,
85 0 : const Protobuf::FieldDescriptor& field, uint64_t seed) {
86 0 : const Protobuf::Reflection& reflection = *message.GetReflection();
87 0 : ASSERT(field.is_map());
88 0 : const auto& entries = reflection.GetRepeatedFieldRef<Protobuf::Message>(message, &field);
89 0 : ASSERT(!entries.empty());
90 0 : const Protobuf::Descriptor& map_descriptor = *entries.begin()->GetDescriptor();
91 0 : const Protobuf::FieldDescriptor& key_field = *map_descriptor.map_key();
92 0 : const Protobuf::FieldDescriptor& value_field = *map_descriptor.map_value();
93 0 : uint64_t combined_hash = 0;
94 0 : for (const Protobuf::Message& entry : entries) {
95 0 : uint64_t entry_hash = reflectionHashField(entry, key_field, 0);
96 0 : entry_hash = reflectionHashField(entry, value_field, entry_hash);
97 0 : combined_hash += entry_hash;
98 0 : }
99 0 : return HashUtil::xxHash64Value(combined_hash, seed);
100 0 : }
101 :
102 : uint64_t reflectionHashField(const Protobuf::Message& message,
103 0 : const Protobuf::FieldDescriptor& field, uint64_t seed) {
104 0 : using Protobuf::FieldDescriptor;
105 0 : const Protobuf::Reflection& reflection = *message.GetReflection();
106 0 : seed = HashUtil::xxHash64Value(field.number(), seed);
107 0 : switch (field.cpp_type()) {
108 0 : case FieldDescriptor::CPPTYPE_INT32:
109 0 : seed = hashScalarField<int32_t>(reflection, message, field, seed);
110 0 : break;
111 0 : case FieldDescriptor::CPPTYPE_UINT32:
112 0 : seed = hashScalarField<uint32_t>(reflection, message, field, seed);
113 0 : break;
114 0 : case FieldDescriptor::CPPTYPE_INT64:
115 0 : seed = hashScalarField<int64_t>(reflection, message, field, seed);
116 0 : break;
117 0 : case FieldDescriptor::CPPTYPE_UINT64:
118 0 : seed = hashScalarField<uint64_t>(reflection, message, field, seed);
119 0 : break;
120 0 : case FieldDescriptor::CPPTYPE_DOUBLE:
121 0 : seed = hashScalarField<double>(reflection, message, field, seed);
122 0 : break;
123 0 : case FieldDescriptor::CPPTYPE_FLOAT:
124 0 : seed = hashScalarField<float>(reflection, message, field, seed);
125 0 : break;
126 0 : case FieldDescriptor::CPPTYPE_BOOL:
127 0 : seed = hashScalarField<bool>(reflection, message, field, seed);
128 0 : break;
129 0 : case FieldDescriptor::CPPTYPE_ENUM:
130 0 : if (field.is_repeated()) {
131 0 : const int c = reflection.FieldSize(message, &field);
132 0 : for (int i = 0; i < c; i++) {
133 0 : seed = HashUtil::xxHash64Value(reflection.GetRepeatedEnumValue(message, &field, i), seed);
134 0 : }
135 0 : } else {
136 0 : seed = HashUtil::xxHash64Value(reflection.GetEnumValue(message, &field), seed);
137 0 : }
138 0 : break;
139 0 : case FieldDescriptor::CPPTYPE_STRING:
140 0 : if (field.is_repeated()) {
141 0 : for (const std::string& str : reflection.GetRepeatedFieldRef<std::string>(message, &field)) {
142 0 : seed = HashUtil::xxHash64(str, seed);
143 0 : }
144 0 : } else {
145 : // Scratch may be used by GetStringReference if the field is not already a std::string.
146 0 : std::string scratch;
147 0 : seed = HashUtil::xxHash64(reflection.GetStringReference(message, &field, &scratch), seed);
148 0 : }
149 0 : break;
150 0 : case FieldDescriptor::CPPTYPE_MESSAGE:
151 0 : if (field.is_map()) {
152 0 : seed = reflectionHashMapField(message, field, seed);
153 0 : } else if (field.is_repeated()) {
154 0 : for (const Protobuf::Message& submsg :
155 0 : reflection.GetRepeatedFieldRef<Protobuf::Message>(message, &field)) {
156 0 : seed = reflectionHashMessage(submsg, seed);
157 0 : }
158 0 : } else {
159 0 : seed = reflectionHashMessage(reflection.GetMessage(message, &field), seed);
160 0 : }
161 0 : break;
162 0 : }
163 0 : return seed;
164 0 : }
165 :
166 : // Converts from type urls OR descriptor full names to descriptor full names.
167 : // Type urls are as used in envoy yaml config, e.g.
168 : // "type.googleapis.com/envoy.extensions.filters.udp.udp_proxy.v3.UdpProxyConfig"
169 : // becomes
170 : // "envoy.extensions.filters.udp.udp_proxy.v3.UdpProxyConfig"
171 0 : absl::string_view typeUrlToDescriptorFullName(absl::string_view url) {
172 0 : const size_t pos = url.rfind('/');
173 0 : if (pos != absl::string_view::npos) {
174 0 : return url.substr(pos + 1);
175 0 : }
176 0 : return url;
177 0 : }
178 :
179 0 : std::unique_ptr<Protobuf::Message> unpackAnyForReflection(const ProtobufWkt::Any& any) {
180 0 : const Protobuf::Descriptor* descriptor =
181 0 : Protobuf::DescriptorPool::generated_pool()->FindMessageTypeByName(
182 0 : typeUrlToDescriptorFullName(any.type_url()));
183 : // If the type name refers to an unknown type, we treat it the same as other
184 : // unknown fields - not including its contents in the hash.
185 0 : if (descriptor == nullptr) {
186 0 : return nullptr;
187 0 : }
188 0 : const Protobuf::Message* prototype =
189 0 : Protobuf::MessageFactory::generated_factory()->GetPrototype(descriptor);
190 0 : ASSERT(prototype != nullptr, "should be impossible since the descriptor is known");
191 0 : std::unique_ptr<Protobuf::Message> msg(prototype->New());
192 0 : any.UnpackTo(msg.get());
193 0 : return msg;
194 0 : }
195 :
196 : // This is intentionally ignoring unknown fields.
197 0 : uint64_t reflectionHashMessage(const Protobuf::Message& message, uint64_t seed) {
198 0 : using Protobuf::FieldDescriptor;
199 0 : std::string scratch;
200 0 : const Protobuf::Reflection* reflection = message.GetReflection();
201 0 : const Protobuf::Descriptor* descriptor = message.GetDescriptor();
202 0 : seed = HashUtil::xxHash64(descriptor->full_name(), seed);
203 0 : if (descriptor->well_known_type() == Protobuf::Descriptor::WELLKNOWNTYPE_ANY) {
204 0 : const ProtobufWkt::Any* any = Protobuf::DynamicCastToGenerated<ProtobufWkt::Any>(&message);
205 0 : ASSERT(any != nullptr, "casting to any should always work for WELLKNOWNTYPE_ANY");
206 0 : std::unique_ptr<Protobuf::Message> submsg = unpackAnyForReflection(*any);
207 0 : if (submsg == nullptr) {
208 : // If we wanted to handle unknown types in Any, this is where we'd have to do it.
209 : // Since we don't know the type to introspect it, we hash just its type name.
210 0 : return HashUtil::xxHash64(any->type_url(), seed);
211 0 : }
212 0 : return reflectionHashMessage(*submsg, seed);
213 0 : }
214 0 : std::vector<const FieldDescriptor*> fields;
215 : // ListFields returned the fields ordered by field number.
216 0 : reflection->ListFields(message, &fields);
217 : // If we wanted to handle unknown fields, we'd need to also GetUnknownFields here.
218 0 : for (const FieldDescriptor* field : fields) {
219 0 : seed = reflectionHashField(message, *field, seed);
220 0 : }
221 : // Hash one extra character to signify end of message, so that
222 : // msg{} field2=2
223 : // hashes differently from
224 : // msg{field2=2}
225 0 : return HashUtil::xxHash64("\x17", seed);
226 0 : }
227 : } // namespace
228 :
229 0 : uint64_t hash(const Protobuf::Message& message) { return reflectionHashMessage(message, 0); }
230 :
231 : } // namespace DeterministicProtoHash
232 : } // namespace Envoy
233 : #endif
|