Coverage Report

Created: 2025-08-26 06:02

/src/sentencepiece/third_party/protobuf-lite/google/protobuf/arenastring.h
Line
Count
Source (jump to first uncovered line)
1
// Protocol Buffers - Google's data interchange format
2
// Copyright 2008 Google Inc.  All rights reserved.
3
// https://developers.google.com/protocol-buffers/
4
//
5
// Redistribution and use in source and binary forms, with or without
6
// modification, are permitted provided that the following conditions are
7
// met:
8
//
9
//     * Redistributions of source code must retain the above copyright
10
// notice, this list of conditions and the following disclaimer.
11
//     * Redistributions in binary form must reproduce the above
12
// copyright notice, this list of conditions and the following disclaimer
13
// in the documentation and/or other materials provided with the
14
// distribution.
15
//     * Neither the name of Google Inc. nor the names of its
16
// contributors may be used to endorse or promote products derived from
17
// this software without specific prior written permission.
18
//
19
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31
#ifndef GOOGLE_PROTOBUF_ARENASTRING_H__
32
#define GOOGLE_PROTOBUF_ARENASTRING_H__
33
34
#include <string>
35
#include <type_traits>
36
#include <utility>
37
38
#include <google/protobuf/stubs/logging.h>
39
#include <google/protobuf/stubs/common.h>
40
#include <google/protobuf/arena.h>
41
#include <google/protobuf/port.h>
42
43
#include <google/protobuf/port_def.inc>
44
45
#ifdef SWIG
46
#error "You cannot SWIG proto headers"
47
#endif
48
49
50
namespace google {
51
namespace protobuf {
52
namespace internal {
53
54
// Lazy string instance to support string fields with non-empty default.
55
// These are initialized on the first call to .get().
56
class PROTOBUF_EXPORT LazyString {
57
 public:
58
  // We explicitly make LazyString an aggregate so that MSVC can do constant
59
  // initialization on it without marking it `constexpr`.
60
  // We do not want to use `constexpr` because it makes it harder to have extern
61
  // storage for it and causes library bloat.
62
  struct InitValue {
63
    const char* ptr;
64
    size_t size;
65
  };
66
  // We keep a union of the initialization value and the std::string to save on
67
  // space. We don't need the string array after Init() is done.
68
  union {
69
    mutable InitValue init_value_;
70
    alignas(std::string) mutable char string_buf_[sizeof(std::string)];
71
  };
72
  mutable std::atomic<const std::string*> inited_;
73
74
0
  const std::string& get() const {
75
    // This check generates less code than a call-once invocation.
76
0
    auto* res = inited_.load(std::memory_order_acquire);
77
0
    if (PROTOBUF_PREDICT_FALSE(res == nullptr)) return Init();
78
0
    return *res;
79
0
  }
80
81
 private:
82
  // Initialize the string in `string_buf_`, update `inited_` and return it.
83
  // We return it here to avoid having to read it again in the inlined code.
84
  const std::string& Init() const;
85
};
86
87
template <typename T>
88
class TaggedPtr {
89
 public:
90
  TaggedPtr() = default;
91
  explicit constexpr TaggedPtr(const std::string* ptr)
92
      : ptr_(const_cast<std::string*>(ptr)) {}
93
94
  void SetTagged(T* p) {
95
    Set(p);
96
    ptr_ = reinterpret_cast<void*>(as_int() | 1);
97
  }
98
131
  void Set(T* p) { ptr_ = p; }
99
0
  T* Get() const { return reinterpret_cast<T*>(as_int() & -2); }
100
0
  bool IsTagged() const { return as_int() & 1; }
101
102
  // Returned value is only safe to dereference if IsTagged() == false.
103
  // It is safe to compare.
104
128
  T* UnsafeGet() const { return static_cast<T*>(ptr_); }
105
106
  bool IsNull() { return ptr_ == nullptr; }
107
108
 private:
109
0
  uintptr_t as_int() const { return reinterpret_cast<uintptr_t>(ptr_); }
110
  void* ptr_;
111
};
112
113
static_assert(std::is_trivial<TaggedPtr<std::string>>::value,
114
              "TaggedPtr must be trivial");
115
116
// This class encapsulates a pointer to a std::string with or without a donated
117
// buffer, tagged by bottom bit. It is a high-level wrapper that almost directly
118
// corresponds to the interface required by string fields in generated
119
// code. It replaces the old std::string* pointer in such cases.
120
//
121
// The object has different but similar code paths for when the default value is
122
// the empty string and when it is a non-empty string.
123
// The empty string is handled different throughout the library and there is a
124
// single global instance of it we can share.
125
//
126
// For fields with an empty string default value, there are three distinct
127
// states:
128
//
129
// - Pointer set to 'String' tag (LSB is 0), equal to
130
//   &GetEmptyStringAlreadyInited(): field is set to its default value. Points
131
//   to a true std::string*, but we do not own that std::string* (it's a
132
//   globally shared instance).
133
//
134
// - Pointer set to 'String' tag (LSB is 0), but not equal to the global empty
135
//   string: field points to a true std::string* instance that we own. This
136
//   instance is either on the heap or on the arena (i.e. registered on
137
//   free()/destructor-call list) as appropriate.
138
//
139
// - Pointer set to 'DonatedString' tag (LSB is 1): points to a std::string
140
//   instance with a buffer on the arena (arena != NULL, always, in this case).
141
//
142
// For fields with a non-empty string default value, there are three distinct
143
// states:
144
//
145
// - Pointer set to 'String' tag (LSB is 0), equal to `nullptr`:
146
//   Field is in "default" mode and does not point to any actual instance.
147
//   Methods that might need to create an instance of the object will pass a
148
//   `const LazyString&` for it.
149
//
150
// - Pointer set to 'String' tag (LSB is 0), but not equal to `nullptr`:
151
//   field points to a true std::string* instance that we own. This instance is
152
//   either on the heap or on the arena (i.e. registered on
153
//   free()/destructor-call list) as appropriate.
154
//
155
// - Pointer set to 'DonatedString' tag (LSB is 1): points to a std::string
156
//   instance with a buffer on the arena (arena != NULL, always, in this case).
157
//
158
// Generated code and reflection code both ensure that ptr_ is never null for
159
// fields with an empty default.
160
// Because ArenaStringPtr is used in oneof unions, its constructor is a NOP and
161
// so the field is always manually initialized via method calls.
162
//
163
// Side-note: why pass information about the default on every API call? Because
164
// we don't want to hold it in a member variable, or else this would go into
165
// every proto message instance. This would be a huge waste of space, since the
166
// default instance pointer is typically a global (static class field). We want
167
// the generated code to be as efficient as possible, and if we take
168
// the default value information as a parameter that's in practice taken from a
169
// static class field, and compare ptr_ to the default value, we end up with a
170
// single "cmp %reg, GLOBAL" in the resulting machine code. (Note that this also
171
// requires the String tag to be 0 so we can avoid the mask before comparing.)
172
struct PROTOBUF_EXPORT ArenaStringPtr {
173
  ArenaStringPtr() = default;
174
  explicit constexpr ArenaStringPtr(const std::string* default_value)
175
0
      : tagged_ptr_(default_value) {}
176
177
  // Some methods below are overloaded on a `default_value` and on tags.
178
  // The tagged overloads help reduce code size in the callers in generated
179
  // code, while the `default_value` overloads are useful from reflection.
180
  // By-value empty struct arguments are elided in the ABI.
181
  struct EmptyDefault {};
182
  struct NonEmptyDefault {};
183
184
  void Set(const std::string* default_value, ConstStringParam value,
185
           ::google::protobuf::Arena* arena);
186
  void Set(const std::string* default_value, std::string&& value,
187
           ::google::protobuf::Arena* arena);
188
  void Set(EmptyDefault, ConstStringParam value, ::google::protobuf::Arena* arena);
189
  void Set(EmptyDefault, std::string&& value, ::google::protobuf::Arena* arena);
190
  void Set(NonEmptyDefault, ConstStringParam value, ::google::protobuf::Arena* arena);
191
  void Set(NonEmptyDefault, std::string&& value, ::google::protobuf::Arena* arena);
192
193
  // Basic accessors.
194
0
  const std::string& Get() const PROTOBUF_ALWAYS_INLINE {
195
    // Unconditionally mask away the tag.
196
0
    return *tagged_ptr_.Get();
197
0
  }
198
0
  const std::string* GetPointer() const PROTOBUF_ALWAYS_INLINE {
199
0
    // Unconditionally mask away the tag.
200
0
    return tagged_ptr_.Get();
201
0
  }
202
203
  // For fields with an empty default value.
204
  std::string* Mutable(EmptyDefault, ::google::protobuf::Arena* arena);
205
  // For fields with a non-empty default value.
206
  std::string* Mutable(const LazyString& default_value, ::google::protobuf::Arena* arena);
207
208
  // Release returns a std::string* instance that is heap-allocated and is not
209
  // Own()'d by any arena. If the field is not set, this returns NULL. The
210
  // caller retains ownership. Clears this field back to NULL state. Used to
211
  // implement release_<field>() methods on generated classes.
212
  std::string* Release(const std::string* default_value,
213
                       ::google::protobuf::Arena* arena);
214
  std::string* ReleaseNonDefault(const std::string* default_value,
215
                                 ::google::protobuf::Arena* arena);
216
217
  // Takes a std::string that is heap-allocated, and takes ownership. The
218
  // std::string's destructor is registered with the arena. Used to implement
219
  // set_allocated_<field> in generated classes.
220
  void SetAllocated(const std::string* default_value, std::string* value,
221
                    ::google::protobuf::Arena* arena);
222
223
  // Swaps internal pointers. Arena-safety semantics: this is guarded by the
224
  // logic in Swap()/UnsafeArenaSwap() at the message level, so this method is
225
  // 'unsafe' if called directly.
226
  inline void Swap(ArenaStringPtr* other, const std::string* default_value,
227
                   Arena* arena) PROTOBUF_ALWAYS_INLINE;
228
229
  // Frees storage (if not on an arena).
230
  void Destroy(const std::string* default_value, ::google::protobuf::Arena* arena);
231
  void Destroy(EmptyDefault, ::google::protobuf::Arena* arena);
232
  void Destroy(NonEmptyDefault, ::google::protobuf::Arena* arena);
233
234
  // Clears content, but keeps allocated std::string, to avoid the overhead of
235
  // heap operations. After this returns, the content (as seen by the user) will
236
  // always be the empty std::string. Assumes that |default_value| is an empty
237
  // std::string.
238
  void ClearToEmpty();
239
240
  // Clears content, assuming that the current value is not the empty
241
  // string default.
242
  void ClearNonDefaultToEmpty();
243
244
  // Clears content, but keeps allocated std::string if arena != NULL, to avoid
245
  // the overhead of heap operations. After this returns, the content (as seen
246
  // by the user) will always be equal to |default_value|.
247
  void ClearToDefault(const LazyString& default_value, ::google::protobuf::Arena* arena);
248
249
  // Called from generated code / reflection runtime only. Resets value to point
250
  // to a default string pointer, with the semantics that this
251
  // ArenaStringPtr does not own the pointed-to memory. Disregards initial value
252
  // of ptr_ (so this is the *ONLY* safe method to call after construction or
253
  // when reinitializing after becoming the active field in a oneof union).
254
  inline void UnsafeSetDefault(const std::string* default_value);
255
256
  // Returns a mutable pointer, but doesn't initialize the string to the
257
  // default value.
258
  std::string* MutableNoArenaNoDefault(const std::string* default_value);
259
260
  // Get a mutable pointer with unspecified contents.
261
  // Similar to `MutableNoArenaNoDefault`, but also handles the arena case.
262
  // If the value was donated, the contents are discarded.
263
  std::string* MutableNoCopy(const std::string* default_value,
264
                             ::google::protobuf::Arena* arena);
265
266
  // Destroy the string. Assumes `arena == nullptr`.
267
  void DestroyNoArena(const std::string* default_value);
268
269
  // Internal setter used only at parse time to directly set a donated string
270
  // value.
271
0
  void UnsafeSetTaggedPointer(TaggedPtr<std::string> value) {
272
0
    tagged_ptr_ = value;
273
0
  }
274
  // Generated code only! An optimization, in certain cases the generated
275
  // code is certain we can obtain a std::string with no default checks and
276
  // tag tests.
277
  std::string* UnsafeMutablePointer() PROTOBUF_RETURNS_NONNULL;
278
279
128
  inline bool IsDefault(const std::string* default_value) const {
280
    // Relies on the fact that kPtrTagString == 0, so if IsString(), ptr_ is the
281
    // actual std::string pointer (and if !IsString(), ptr_ will never be equal
282
    // to any aligned |default_value| pointer). The key is that we want to avoid
283
    // masking in the fastpath const-pointer Get() case for non-arena code.
284
128
    return tagged_ptr_.UnsafeGet() == default_value;
285
128
  }
286
287
 private:
288
  TaggedPtr<std::string> tagged_ptr_;
289
290
0
  bool IsDonatedString() const { return false; }
291
292
  // Slow paths.
293
294
  // MutableSlow requires that !IsString() || IsDefault
295
  // Variadic to support 0 args for EmptyDefault and 1 arg for LazyString.
296
  template <typename... Lazy>
297
  std::string* MutableSlow(::google::protobuf::Arena* arena, const Lazy&... lazy_default);
298
299
};
300
301
131
inline void ArenaStringPtr::UnsafeSetDefault(const std::string* value) {
302
131
  tagged_ptr_.Set(const_cast<std::string*>(value));
303
131
}
304
305
inline void ArenaStringPtr::Swap(ArenaStringPtr* other,
306
                                 const std::string* default_value,
307
0
                                 Arena* arena) {
308
0
#ifndef NDEBUG
309
  // For debug builds, we swap the contents of the string, rather than the
310
  // std::string instances themselves.  This invalidates previously taken const
311
  // references that are (per our documentation) invalidated by calling Swap()
312
  // on the message.
313
  //
314
  // If both strings are the default_value, swapping is uninteresting.
315
  // Otherwise, we use ArenaStringPtr::Mutable() to access the std::string, to
316
  // ensure that we do not try to mutate default_value itself.
317
0
  if (IsDefault(default_value) && other->IsDefault(default_value)) {
318
0
    return;
319
0
  }
320
321
0
  if (default_value == nullptr) {
322
    // If we have non-empty default, then `default_value` is null and we can't
323
    // call Mutable the same way. Just do the regular swap.
324
0
    std::swap(tagged_ptr_, other->tagged_ptr_);
325
0
  } else {
326
0
    std::string* this_ptr = Mutable(EmptyDefault{}, arena);
327
0
    std::string* other_ptr = other->Mutable(EmptyDefault{}, arena);
328
329
0
    this_ptr->swap(*other_ptr);
330
0
  }
331
#else
332
  std::swap(tagged_ptr_, other->tagged_ptr_);
333
#endif
334
0
}
335
336
0
inline void ArenaStringPtr::ClearNonDefaultToEmpty() {
337
  // Unconditionally mask away the tag.
338
0
  tagged_ptr_.Get()->clear();
339
0
}
340
341
inline std::string* ArenaStringPtr::MutableNoArenaNoDefault(
342
0
    const std::string* default_value) {
343
0
  // VERY IMPORTANT for performance and code size: this will reduce to a member
344
0
  // variable load, a pointer check (against |default_value|, in practice a
345
0
  // static global) and a branch to the slowpath (which calls operator new and
346
0
  // the ctor). DO NOT add any tagged-pointer operations here.
347
0
  if (IsDefault(default_value)) {
348
0
    std::string* new_string = new std::string();
349
0
    tagged_ptr_.Set(new_string);
350
0
    return new_string;
351
0
  } else {
352
0
    return UnsafeMutablePointer();
353
0
  }
354
0
}
355
356
128
inline void ArenaStringPtr::DestroyNoArena(const std::string* default_value) {
357
128
  if (!IsDefault(default_value)) {
358
0
    delete UnsafeMutablePointer();
359
0
  }
360
128
}
361
362
0
inline std::string* ArenaStringPtr::UnsafeMutablePointer() {
363
0
  GOOGLE_DCHECK(!tagged_ptr_.IsTagged());
364
0
  GOOGLE_DCHECK(tagged_ptr_.UnsafeGet() != nullptr);
365
0
  return tagged_ptr_.UnsafeGet();
366
0
}
367
368
369
}  // namespace internal
370
}  // namespace protobuf
371
}  // namespace google
372
373
#include <google/protobuf/port_undef.inc>
374
375
#endif  // GOOGLE_PROTOBUF_ARENASTRING_H__