Line data Source code
1 : #pragma once 2 : 3 : #include <algorithm> 4 : 5 : #include "source/common/common/assert.h" 6 : #include "source/common/common/utility.h" 7 : 8 : #include "absl/container/inlined_vector.h" 9 : #include "absl/strings/string_view.h" 10 : #include "absl/types/variant.h" 11 : 12 : namespace Envoy { 13 : 14 : /** 15 : * Convenient type for an inline vector that will be used by InlinedString. 16 : */ 17 : using InlinedStringVector = absl::InlinedVector<char, 128>; 18 : 19 : /** 20 : * Convenient type for the underlying type of InlinedString that allows a variant 21 : * between string_view and the InlinedVector. 22 : */ 23 : using VariantStringOrView = absl::variant<absl::string_view, InlinedStringVector>; 24 : 25 : // This includes the NULL (StringUtil::itoa technically only needs 21). 26 : inline constexpr size_t MaxIntegerLength{32}; 27 : 28 44827 : inline void validateCapacity(uint64_t new_capacity) { 29 : // If the resizing will cause buffer overflow due to hitting uint32_t::max, an OOM is likely 30 : // imminent. Fast-fail rather than allow a buffer overflow attack (issue #1421) 31 44827 : RELEASE_ASSERT(new_capacity <= std::numeric_limits<uint32_t>::max(), 32 44827 : "Trying to allocate overly large headers."); 33 44827 : } 34 : 35 53077 : inline absl::string_view getStrView(const VariantStringOrView& buffer) { 36 53077 : return absl::get<absl::string_view>(buffer); 37 53077 : } 38 : 39 712806 : inline InlinedStringVector& getInVec(VariantStringOrView& buffer) { 40 712806 : return absl::get<InlinedStringVector>(buffer); 41 712806 : } 42 : 43 1224338 : inline const InlinedStringVector& getInVec(const VariantStringOrView& buffer) { 44 1224338 : return absl::get<InlinedStringVector>(buffer); 45 1224338 : } 46 : 47 : /** 48 : * This is a string implementation that unified string reference and owned string. It is heavily 49 : * optimized for performance. It supports 2 different types of storage and can switch between them: 50 : * 1) A string reference. 51 : * 2) A string InlinedVector (an optimized interned string for small strings, but allows heap 52 : * allocation if needed). 53 : */ 54 : template <class Validator> class UnionStringBase { 55 : public: 56 : using Storage = VariantStringOrView; 57 : 58 : /** 59 : * Default constructor. Sets up for inline storage. 60 : */ 61 : 62 : #if defined(__GNUC__) 63 : #pragma GCC diagnostic push 64 : #pragma GCC diagnostic ignored "-Wuninitialized" 65 : #endif 66 217522 : UnionStringBase() : buffer_(InlinedStringVector()) { 67 217522 : #if defined(__GNUC__) 68 217522 : #pragma GCC diagnostic pop 69 217522 : #endif 70 217522 : ASSERT((getInVec(buffer_).capacity()) >= MaxIntegerLength); 71 217522 : ASSERT(valid()); 72 217522 : } 73 : 74 : /** 75 : * Constructor for a string reference. 76 : * @param ref_value MUST point to data that will live beyond the lifetime of any request/response 77 : * using the string (since a codec may optimize for zero copy). 78 : */ 79 20281 : explicit UnionStringBase(absl::string_view ref_value) : buffer_(ref_value) { ASSERT(valid()); } 80 : 81 157243 : UnionStringBase(UnionStringBase&& move_value) noexcept : buffer_(std::move(move_value.buffer_)) { 82 157243 : move_value.clear(); 83 : // Move constructor does not validate and relies on the source object validating its mutations. 84 157243 : } 85 395046 : ~UnionStringBase() = default; 86 : 87 : /** 88 : * Append data to an existing string. If the string is a reference string the reference data is 89 : * not copied. 90 : */ 91 44827 : void append(const char* data, uint32_t data_size) { 92 : // Make sure the requested memory allocation is below uint32_t::max 93 44827 : const uint64_t new_capacity = static_cast<uint64_t>(data_size) + size(); 94 44827 : validateCapacity(new_capacity); 95 44827 : ASSERT(valid(absl::string_view(data, data_size))); 96 : 97 44827 : switch (type()) { 98 6 : case Type::Reference: { 99 : // Rather than be too clever and optimize this uncommon case, we switch to 100 : // Inline mode and copy. 101 6 : const absl::string_view prev = getStrView(buffer_); 102 6 : buffer_ = InlinedStringVector(); 103 : // Assigning new_capacity to avoid resizing when appending the new data 104 6 : getInVec(buffer_).reserve(new_capacity); 105 6 : getInVec(buffer_).assign(prev.begin(), prev.end()); 106 6 : break; 107 0 : } 108 44821 : case Type::Inline: { 109 44821 : getInVec(buffer_).reserve(new_capacity); 110 44821 : break; 111 0 : } 112 44827 : } 113 44827 : getInVec(buffer_).insert(getInVec(buffer_).end(), data, data + data_size); 114 44827 : } 115 : 116 : /** 117 : * Transforms the inlined vector data using the given UnaryOperation (conforms 118 : * to std::transform). 119 : * @param unary_op the operations to be performed on each of the elements. 120 : */ 121 2478 : template <typename UnaryOperation> void inlineTransform(UnaryOperation&& unary_op) { 122 2478 : ASSERT(type() == Type::Inline); 123 2478 : std::transform(absl::get<InlinedStringVector>(buffer_).begin(), 124 2478 : absl::get<InlinedStringVector>(buffer_).end(), 125 2478 : absl::get<InlinedStringVector>(buffer_).begin(), unary_op); 126 2478 : } 127 : 128 : /** 129 : * Trim trailing whitespaces from the InlinedString. Only supported by the "Inline" InlinedString 130 : * representation. 131 : */ 132 2407 : void rtrim() { 133 2407 : ASSERT(type() == Type::Inline); 134 2407 : absl::string_view original = getStringView(); 135 2407 : absl::string_view rtrimmed = StringUtil::rtrim(original); 136 2407 : if (original.size() != rtrimmed.size()) { 137 3 : getInVec(buffer_).resize(rtrimmed.size()); 138 3 : } 139 2407 : } 140 : 141 : /** 142 : * Get an absl::string_view. It will NOT be NUL terminated! 143 : * 144 : * @return an absl::string_view. 145 : */ 146 499939 : absl::string_view getStringView() const { 147 499939 : if (type() == Type::Reference) { 148 27823 : return getStrView(buffer_); 149 27823 : } 150 472116 : ASSERT(type() == Type::Inline); 151 472116 : return {getInVec(buffer_).data(), getInVec(buffer_).size()}; 152 499939 : } 153 : 154 : /** 155 : * Return the string to a default state. Reference strings are not touched. Both inline/dynamic 156 : * strings are reset to zero size. 157 : */ 158 169930 : void clear() { 159 169930 : if (type() == Type::Inline) { 160 167202 : getInVec(buffer_).clear(); 161 167202 : } 162 169930 : } 163 : 164 : /** 165 : * @return whether the string is empty or not. 166 : */ 167 25316 : bool empty() const { return size() == 0; } 168 : 169 : // Looking for find? Use getStringView().find() 170 : 171 : /** 172 : * Set the value of the string by copying data into it. This overwrites any existing string. 173 : */ 174 204706 : void setCopy(const char* data, uint32_t size) { 175 204706 : if (!absl::holds_alternative<InlinedStringVector>(buffer_)) { 176 : // Switching from Type::Reference to Type::Inline 177 0 : buffer_ = InlinedStringVector(); 178 0 : } 179 : 180 204706 : getInVec(buffer_).reserve(size); 181 204706 : getInVec(buffer_).assign(data, data + size); 182 204706 : ASSERT(valid()); 183 204706 : } 184 : 185 : /** 186 : * Set the value of the string by copying data into it. This overwrites any existing string. 187 : */ 188 153678 : void setCopy(absl::string_view view) { setCopy(view.data(), view.size()); } 189 : 190 : /** 191 : * Set the value of the string to an integer. This overwrites any existing string. 192 : */ 193 917 : void setInteger(uint64_t value) { 194 : // Initialize the size to the max length, copy the actual data, and then 195 : // reduce the size (but not the capacity) as needed 196 : // Note: instead of using the inner_buffer, attempted the following: 197 : // resize buffer_ to MaxIntegerLength, apply StringUtil::itoa to the buffer_.data(), and then 198 : // resize buffer_ to int_length (the number of digits in value). 199 : // However it was slower than the following approach. 200 917 : char inner_buffer[MaxIntegerLength]; 201 917 : const uint32_t int_length = StringUtil::itoa(inner_buffer, MaxIntegerLength, value); 202 : 203 917 : if (type() == Type::Reference) { 204 : // Switching from Type::Reference to Type::Inline 205 0 : buffer_ = InlinedStringVector(); 206 0 : } 207 917 : ASSERT((getInVec(buffer_).capacity()) > MaxIntegerLength); 208 917 : getInVec(buffer_).assign(inner_buffer, inner_buffer + int_length); 209 917 : } 210 : 211 : /** 212 : * Set the value of the string to a string reference. 213 : * @param ref_value MUST point to data that will live beyond the lifetime of any request/response 214 : * using the string (since a codec may optimize for zero copy). 215 : */ 216 2689 : void setReference(absl::string_view ref_value) { 217 2689 : buffer_ = ref_value; 218 2689 : ASSERT(valid()); 219 2689 : } 220 : 221 : /** 222 : * @return whether the string is a reference or an InlinedVector. 223 : */ 224 7088 : bool isReference() const { return type() == Type::Reference; } 225 : 226 : /** 227 : * @return the size of the string, not including the null terminator. 228 : */ 229 305354 : uint32_t size() const { 230 305354 : if (type() == Type::Reference) { 231 25248 : return getStrView(buffer_).size(); 232 25248 : } 233 280106 : ASSERT(type() == Type::Inline); 234 280106 : return getInVec(buffer_).size(); 235 305354 : } 236 : 237 1603 : bool operator==(const char* rhs) const { 238 1603 : return getStringView() == absl::NullSafeStringView(rhs); 239 1603 : } 240 8692 : bool operator==(absl::string_view rhs) const { return getStringView() == rhs; } 241 24931 : bool operator!=(const char* rhs) const { 242 24931 : return getStringView() != absl::NullSafeStringView(rhs); 243 24931 : } 244 0 : bool operator!=(absl::string_view rhs) const { return getStringView() != rhs; } 245 : 246 : // Test only method that does not have validation and allows setting arbitrary values. 247 392 : void setCopyUnvalidatedForTestOnly(absl::string_view view) { 248 392 : if (!absl::holds_alternative<InlinedStringVector>(buffer_)) { 249 : // Switching from Type::Reference to Type::Inline 250 0 : buffer_ = InlinedStringVector(); 251 0 : } 252 : 253 392 : getInVec(buffer_).reserve(view.size()); 254 392 : getInVec(buffer_).assign(view.data(), view.data() + view.size()); 255 392 : } 256 : 257 : /** 258 : * @return raw Storage for cross-class move. This method is used to tranfer ownership 259 : * between UnionString with different Validator. 260 : */ 261 0 : Storage& storage() { return buffer_; } 262 : 263 : protected: 264 : enum class Type { Reference, Inline }; 265 : 266 0 : bool valid() const { return Validator()(getStringView()); } 267 : 268 0 : bool valid(absl::string_view data) const { return Validator()(data); } 269 : 270 : /** 271 : * @return the type of backing storage for the string. 272 : */ 273 1028055 : Type type() const { 274 : // buffer_.index() is correlated with the order of Reference and Inline in the 275 : // enum. 276 1028055 : ASSERT((buffer_.index() == 0) || (buffer_.index() == 1)); 277 1028055 : ASSERT((buffer_.index() == 0 && absl::holds_alternative<absl::string_view>(buffer_)) || 278 1028055 : (buffer_.index() != 0)); 279 1028055 : ASSERT((buffer_.index() == 1 && absl::holds_alternative<InlinedStringVector>(buffer_)) || 280 1028055 : (buffer_.index() != 1)); 281 1028055 : return Type(buffer_.index()); 282 1028055 : } 283 : 284 : Storage buffer_; 285 : }; 286 : 287 : class EmptyStringValidator { 288 : public: 289 0 : bool operator()(absl::string_view) { return true; } 290 : }; 291 : 292 : using UnionString = UnionStringBase<EmptyStringValidator>; 293 : 294 : } // namespace Envoy