Coverage Report

Created: 2023-11-12 09:30

/proc/self/cwd/external/com_google_protobuf/src/google/protobuf/generated_message_tctable_gen.cc
Line
Count
Source (jump to first uncovered line)
1
// Protocol Buffers - Google's data interchange format
2
// Copyright 2008 Google Inc.  All rights reserved.
3
// https://developers.google.com/protocol-buffers/
4
//
5
// Redistribution and use in source and binary forms, with or without
6
// modification, are permitted provided that the following conditions are
7
// met:
8
//
9
//     * Redistributions of source code must retain the above copyright
10
// notice, this list of conditions and the following disclaimer.
11
//     * Redistributions in binary form must reproduce the above
12
// copyright notice, this list of conditions and the following disclaimer
13
// in the documentation and/or other materials provided with the
14
// distribution.
15
//     * Neither the name of Google Inc. nor the names of its
16
// contributors may be used to endorse or promote products derived from
17
// this software without specific prior written permission.
18
//
19
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31
#include "google/protobuf/generated_message_tctable_gen.h"
32
33
#include <algorithm>
34
#include <limits>
35
#include <string>
36
#include <utility>
37
#include <vector>
38
39
#include "google/protobuf/descriptor.h"
40
#include "google/protobuf/descriptor.pb.h"
41
#include "google/protobuf/generated_message_tctable_decl.h"
42
#include "google/protobuf/generated_message_tctable_impl.h"
43
#include "google/protobuf/wire_format.h"
44
45
// Must come last:
46
#include "google/protobuf/port_def.inc"
47
48
namespace google {
49
namespace protobuf {
50
namespace internal {
51
52
namespace {
53
54
bool GetEnumValidationRange(const EnumDescriptor* enum_type, int16_t& start,
55
0
                            uint16_t& size) {
56
0
  ABSL_CHECK_GT(enum_type->value_count(), 0) << enum_type->DebugString();
57
58
  // Check if the enum values are a single, contiguous range.
59
0
  std::vector<int> enum_values;
60
0
  for (int i = 0, N = static_cast<int>(enum_type->value_count()); i < N; ++i) {
61
0
    enum_values.push_back(enum_type->value(i)->number());
62
0
  }
63
0
  auto values_begin = enum_values.begin();
64
0
  auto values_end = enum_values.end();
65
0
  std::sort(values_begin, values_end);
66
0
  enum_values.erase(std::unique(values_begin, values_end), values_end);
67
68
0
  if (std::numeric_limits<int16_t>::min() <= enum_values[0] &&
69
0
      enum_values[0] <= std::numeric_limits<int16_t>::max() &&
70
0
      enum_values.size() <= std::numeric_limits<uint16_t>::max() &&
71
0
      static_cast<int>(enum_values[0] + enum_values.size() - 1) ==
72
0
          enum_values.back()) {
73
0
    start = static_cast<int16_t>(enum_values[0]);
74
0
    size = static_cast<uint16_t>(enum_values.size());
75
0
    return true;
76
0
  } else {
77
0
    return false;
78
0
  }
79
0
}
80
81
12.0M
absl::string_view ParseFunctionValue(TcParseFunction function) {
82
1.32G
#define PROTOBUF_TC_PARSE_FUNCTION_X(value) #value,
83
12.0M
  static constexpr absl::string_view functions[] = {
84
12.0M
      {}, PROTOBUF_TC_PARSE_FUNCTION_LIST};
85
12.0M
#undef PROTOBUF_TC_PARSE_FUNCTION_X
86
12.0M
  return functions[static_cast<int>(function)];
87
12.0M
};
88
89
enum class EnumRangeInfo {
90
  kNone,         // No contiguous range
91
  kContiguous,   // Has a contiguous range
92
  kContiguous0,  // Has a small contiguous range starting at 0
93
  kContiguous1,  // Has a small contiguous range starting at 1
94
};
95
96
// Returns enum validation range info, and sets `rmax_value` iff
97
// the returned range is a small range. `rmax_value` is guaranteed
98
// to remain unchanged if the enum range is not small.
99
EnumRangeInfo GetEnumRangeInfo(const FieldDescriptor* field,
100
0
                               uint8_t& rmax_value) {
101
0
  int16_t start;
102
0
  uint16_t size;
103
0
  if (!GetEnumValidationRange(field->enum_type(), start, size)) {
104
0
    return EnumRangeInfo::kNone;
105
0
  }
106
0
  int max_value = start + size - 1;
107
0
  if (max_value <= 127 && (start == 0 || start == 1)) {
108
0
    rmax_value = static_cast<uint8_t>(max_value);
109
0
    return start == 0 ? EnumRangeInfo::kContiguous0
110
0
                      : EnumRangeInfo::kContiguous1;
111
0
  }
112
0
  return EnumRangeInfo::kContiguous;
113
0
}
114
115
// options.lazy_opt might be on for fields that don't really support lazy, so we
116
// make sure we only use lazy rep for singular TYPE_MESSAGE fields.
117
// We can't trust the `lazy=true` annotation.
118
bool HasLazyRep(const FieldDescriptor* field,
119
75.7M
                const TailCallTableInfo::PerFieldOptions options) {
120
75.7M
  return field->type() == field->TYPE_MESSAGE && !field->is_repeated() &&
121
75.7M
         options.lazy_opt != 0;
122
75.7M
}
123
124
void PopulateFastFieldEntry(const TailCallTableInfo::FieldEntryInfo& entry,
125
                            const TailCallTableInfo::PerFieldOptions& options,
126
12.0M
                            TailCallTableInfo::FastFieldInfo& info) {
127
12.0M
#define PROTOBUF_PICK_FUNCTION(fn) \
128
12.0M
  (field->number() < 16 ? TcParseFunction::fn##1 : TcParseFunction::fn##2)
129
130
12.0M
#define PROTOBUF_PICK_SINGLE_FUNCTION(fn) PROTOBUF_PICK_FUNCTION(fn##S)
131
132
12.0M
#define PROTOBUF_PICK_REPEATABLE_FUNCTION(fn)           \
133
12.0M
  (field->is_repeated() ? PROTOBUF_PICK_FUNCTION(fn##R) \
134
9.93M
                        : PROTOBUF_PICK_FUNCTION(fn##S))
135
136
12.0M
#define PROTOBUF_PICK_PACKABLE_FUNCTION(fn)               \
137
12.0M
  (field->is_packed()     ? PROTOBUF_PICK_FUNCTION(fn##P) \
138
2.10M
   : field->is_repeated() ? PROTOBUF_PICK_FUNCTION(fn##R) \
139
2.10M
                          : PROTOBUF_PICK_FUNCTION(fn##S))
140
141
12.0M
#define PROTOBUF_PICK_STRING_FUNCTION(fn)                       \
142
12.0M
  (field->options().ctype() == FieldOptions::CORD               \
143
2.71M
       ? PROTOBUF_PICK_FUNCTION(fn##cS)                         \
144
2.71M
   : options.is_string_inlined ? PROTOBUF_PICK_FUNCTION(fn##iS) \
145
2.71M
                               : PROTOBUF_PICK_REPEATABLE_FUNCTION(fn))
146
147
12.0M
  const FieldDescriptor* field = entry.field;
148
12.0M
  info.aux_idx = static_cast<uint8_t>(entry.aux_idx);
149
12.0M
  if (field->type() == FieldDescriptor::TYPE_BYTES ||
150
12.0M
      field->type() == FieldDescriptor::TYPE_STRING) {
151
2.71M
    if (options.is_string_inlined) {
152
0
      ABSL_CHECK(!field->is_repeated());
153
0
      info.aux_idx = static_cast<uint8_t>(entry.inlined_string_idx);
154
0
    }
155
2.71M
  }
156
157
12.0M
  TcParseFunction picked = TcParseFunction::kNone;
158
12.0M
  switch (field->type()) {
159
1.01M
    case FieldDescriptor::TYPE_BOOL:
160
1.01M
      picked = PROTOBUF_PICK_PACKABLE_FUNCTION(kFastV8);
161
1.01M
      break;
162
121k
    case FieldDescriptor::TYPE_INT32:
163
268k
    case FieldDescriptor::TYPE_UINT32:
164
268k
      picked = PROTOBUF_PICK_PACKABLE_FUNCTION(kFastV32);
165
268k
      break;
166
1.89k
    case FieldDescriptor::TYPE_SINT32:
167
1.89k
      picked = PROTOBUF_PICK_PACKABLE_FUNCTION(kFastZ32);
168
1.89k
      break;
169
168k
    case FieldDescriptor::TYPE_INT64:
170
172k
    case FieldDescriptor::TYPE_UINT64:
171
172k
      picked = PROTOBUF_PICK_PACKABLE_FUNCTION(kFastV64);
172
172k
      break;
173
0
    case FieldDescriptor::TYPE_SINT64:
174
0
      picked = PROTOBUF_PICK_PACKABLE_FUNCTION(kFastZ64);
175
0
      break;
176
0
    case FieldDescriptor::TYPE_FLOAT:
177
0
    case FieldDescriptor::TYPE_FIXED32:
178
0
    case FieldDescriptor::TYPE_SFIXED32:
179
0
      picked = PROTOBUF_PICK_PACKABLE_FUNCTION(kFastF32);
180
0
      break;
181
154
    case FieldDescriptor::TYPE_DOUBLE:
182
154
    case FieldDescriptor::TYPE_FIXED64:
183
154
    case FieldDescriptor::TYPE_SFIXED64:
184
154
      picked = PROTOBUF_PICK_PACKABLE_FUNCTION(kFastF64);
185
154
      break;
186
648k
    case FieldDescriptor::TYPE_ENUM:
187
648k
      if (cpp::HasPreservingUnknownEnumSemantics(field)) {
188
648k
        picked = PROTOBUF_PICK_PACKABLE_FUNCTION(kFastV32);
189
648k
      } else {
190
0
        switch (GetEnumRangeInfo(field, info.aux_idx)) {
191
0
          case EnumRangeInfo::kNone:
192
0
            picked = PROTOBUF_PICK_PACKABLE_FUNCTION(kFastEv);
193
0
            break;
194
0
          case EnumRangeInfo::kContiguous:
195
0
            picked = PROTOBUF_PICK_PACKABLE_FUNCTION(kFastEr);
196
0
            break;
197
0
          case EnumRangeInfo::kContiguous0:
198
0
            picked = PROTOBUF_PICK_PACKABLE_FUNCTION(kFastEr0);
199
0
            break;
200
0
          case EnumRangeInfo::kContiguous1:
201
0
            picked = PROTOBUF_PICK_PACKABLE_FUNCTION(kFastEr1);
202
0
            break;
203
0
        }
204
0
      }
205
648k
      break;
206
648k
    case FieldDescriptor::TYPE_BYTES:
207
135k
      picked = PROTOBUF_PICK_STRING_FUNCTION(kFastB);
208
135k
      break;
209
2.57M
    case FieldDescriptor::TYPE_STRING:
210
2.57M
      switch (internal::cpp::GetUtf8CheckMode(field, options.is_lite)) {
211
2.57M
        case internal::cpp::Utf8CheckMode::kStrict:
212
2.57M
          picked = PROTOBUF_PICK_STRING_FUNCTION(kFastU);
213
2.57M
          break;
214
873
        case internal::cpp::Utf8CheckMode::kVerify:
215
873
          picked = PROTOBUF_PICK_STRING_FUNCTION(kFastS);
216
873
          break;
217
0
        case internal::cpp::Utf8CheckMode::kNone:
218
0
          picked = PROTOBUF_PICK_STRING_FUNCTION(kFastB);
219
0
          break;
220
2.57M
      }
221
2.57M
      break;
222
7.22M
    case FieldDescriptor::TYPE_MESSAGE:
223
7.22M
      picked =
224
7.22M
          (HasLazyRep(field, options) ? PROTOBUF_PICK_SINGLE_FUNCTION(kFastMl)
225
7.22M
           : options.use_direct_tcparser_table
226
7.22M
               ? PROTOBUF_PICK_REPEATABLE_FUNCTION(kFastMt)
227
7.22M
               : PROTOBUF_PICK_REPEATABLE_FUNCTION(kFastMd));
228
7.22M
      break;
229
0
    case FieldDescriptor::TYPE_GROUP:
230
0
      picked = (options.use_direct_tcparser_table
231
0
                    ? PROTOBUF_PICK_REPEATABLE_FUNCTION(kFastGt)
232
0
                    : PROTOBUF_PICK_REPEATABLE_FUNCTION(kFastGd));
233
0
      break;
234
12.0M
  }
235
236
12.0M
  ABSL_CHECK(picked != TcParseFunction::kNone);
237
12.0M
  static constexpr absl::string_view ns = "::_pbi::TcParser::";
238
12.0M
  info.func_name = absl::StrCat(ns, ParseFunctionValue(picked));
239
240
12.0M
#undef PROTOBUF_PICK_FUNCTION
241
12.0M
#undef PROTOBUF_PICK_SINGLE_FUNCTION
242
12.0M
#undef PROTOBUF_PICK_REPEATABLE_FUNCTION
243
12.0M
#undef PROTOBUF_PICK_PACKABLE_FUNCTION
244
12.0M
#undef PROTOBUF_PICK_STRING_FUNCTION
245
12.0M
}
246
247
bool IsFieldEligibleForFastParsing(
248
    const TailCallTableInfo::FieldEntryInfo& entry,
249
37.2M
    const TailCallTableInfo::OptionProvider& option_provider) {
250
37.2M
  const auto* field = entry.field;
251
37.2M
  const auto options = option_provider.GetForField(field);
252
37.2M
  ABSL_CHECK(!field->options().weak());
253
  // Map, oneof, weak, and lazy fields are not handled on the fast path.
254
37.2M
  if (field->is_map() || field->real_containing_oneof() ||
255
37.2M
      options.is_implicitly_weak || options.should_split) {
256
7.52M
    return false;
257
7.52M
  }
258
259
29.6M
  if (HasLazyRep(field, options) && !options.uses_codegen) {
260
    // Can't use TDP on lazy fields if we can't do codegen.
261
0
    return false;
262
0
  }
263
264
29.6M
  if (HasLazyRep(field, options) && options.lazy_opt == field_layout::kTvLazy) {
265
    // We only support eagerly verified lazy fields in the fast path.
266
0
    return false;
267
0
  }
268
269
  // We will check for a valid auxiliary index range later. However, we might
270
  // want to change the value we check for inlined string fields.
271
29.6M
  int aux_idx = entry.aux_idx;
272
273
29.6M
  switch (field->type()) {
274
      // Some bytes fields can be handled on fast path.
275
4.07M
    case FieldDescriptor::TYPE_STRING:
276
4.29M
    case FieldDescriptor::TYPE_BYTES:
277
4.29M
      if (field->options().ctype() == FieldOptions::STRING) {
278
        // strings are fine...
279
4.29M
      } else if (field->options().ctype() == FieldOptions::CORD) {
280
        // Cords are worth putting into the fast table, if they're not repeated
281
0
        if (field->is_repeated()) return false;
282
0
      } else {
283
0
        return false;
284
0
      }
285
4.29M
      if (options.is_string_inlined) {
286
0
        ABSL_CHECK(!field->is_repeated());
287
        // For inlined strings, the donation state index is stored in the
288
        // `aux_idx` field of the fast parsing info. We need to check the range
289
        // of that value instead of the auxiliary index.
290
0
        aux_idx = entry.inlined_string_idx;
291
0
      }
292
4.29M
      break;
293
294
25.3M
    default:
295
25.3M
      break;
296
29.6M
  }
297
298
29.6M
  if (cpp::HasHasbit(field)) {
299
    // The tailcall parser can only update the first 32 hasbits. Fields with
300
    // has-bits beyond the first 32 are handled by mini parsing/fallback.
301
14.6M
    ABSL_CHECK_GE(entry.hasbit_idx, 0) << field->DebugString();
302
14.6M
    if (entry.hasbit_idx >= 32) return false;
303
14.6M
  }
304
305
  // If the field needs auxiliary data, then the aux index is needed. This
306
  // must fit in a uint8_t.
307
29.6M
  if (aux_idx > std::numeric_limits<uint8_t>::max()) {
308
0
    return false;
309
0
  }
310
311
  // The largest tag that can be read by the tailcall parser is two bytes
312
  // when varint-coded. This allows 14 bits for the numeric tag value:
313
  //   byte 0   byte 1
314
  //   1nnnnttt 0nnnnnnn
315
  //    ^^^^^^^  ^^^^^^^
316
29.6M
  if (field->number() >= 1 << 11) return false;
317
318
29.6M
  return true;
319
29.6M
}
320
321
795k
absl::optional<uint32_t> GetEndGroupTag(const Descriptor* descriptor) {
322
795k
  auto* parent = descriptor->containing_type();
323
795k
  if (parent == nullptr) return absl::nullopt;
324
1.14M
  for (int i = 0; i < parent->field_count(); ++i) {
325
1.06M
    auto* field = parent->field(i);
326
1.06M
    if (field->type() == field->TYPE_GROUP &&
327
1.06M
        field->message_type() == descriptor) {
328
0
      return WireFormatLite::MakeTag(field->number(),
329
0
                                     WireFormatLite::WIRETYPE_END_GROUP);
330
0
    }
331
1.06M
  }
332
76.4k
  return absl::nullopt;
333
76.4k
}
334
335
29.6M
uint32_t RecodeTagForFastParsing(uint32_t tag) {
336
29.6M
  ABSL_DCHECK_LE(tag, 0x3FFF);
337
  // Construct the varint-coded tag. If it is more than 7 bits, we need to
338
  // shift the high bits and add a continue bit.
339
29.6M
  if (uint32_t hibits = tag & 0xFFFFFF80) {
340
    // hi = tag & ~0x7F
341
    // lo = tag & 0x7F
342
    // This shifts hi to the left by 1 to the next byte and sets the
343
    // continuation bit.
344
11.7M
    tag = tag + hibits + 128;
345
11.7M
  }
346
29.6M
  return tag;
347
29.6M
}
348
349
std::vector<TailCallTableInfo::FastFieldInfo> SplitFastFieldsForSize(
350
    absl::optional<uint32_t> end_group_tag,
351
    const std::vector<TailCallTableInfo::FieldEntryInfo>& field_entries,
352
    int table_size_log2,
353
3.16M
    const TailCallTableInfo::OptionProvider& option_provider) {
354
3.16M
  std::vector<TailCallTableInfo::FastFieldInfo> result(1 << table_size_log2);
355
3.16M
  const uint32_t idx_mask = static_cast<uint32_t>(result.size() - 1);
356
29.6M
  const auto tag_to_idx = [&](uint32_t tag) {
357
    // The field index is determined by the low bits of the field number, where
358
    // the table size determines the width of the mask. The largest table
359
    // supported is 32 entries. The parse loop uses these bits directly, so that
360
    // the dispatch does not require arithmetic:
361
    //        byte 0   byte 1
362
    //   tag: 1nnnnttt 0nnnnnnn
363
    //        ^^^^^
364
    //         idx (table_size_log2=5)
365
    // This means that any field number that does not fit in the lower 4 bits
366
    // will always have the top bit of its table index asserted.
367
29.6M
    return (tag >> 3) & idx_mask;
368
29.6M
  };
369
370
3.16M
  if (end_group_tag.has_value() && (*end_group_tag >> 14) == 0) {
371
    // Fits in 1 or 2 varint bytes.
372
0
    const uint32_t tag = RecodeTagForFastParsing(*end_group_tag);
373
0
    const uint32_t fast_idx = tag_to_idx(tag);
374
375
0
    TailCallTableInfo::FastFieldInfo& info = result[fast_idx];
376
0
    info.func_name = "::_pbi::TcParser::FastEndG";
377
0
    info.func_name.append(*end_group_tag < 128 ? "1" : "2");
378
0
    info.coded_tag = tag;
379
0
    info.nonfield_info = *end_group_tag;
380
0
  }
381
382
37.2M
  for (const auto& entry : field_entries) {
383
37.2M
    if (!IsFieldEligibleForFastParsing(entry, option_provider)) {
384
7.52M
      continue;
385
7.52M
    }
386
387
29.6M
    const auto* field = entry.field;
388
29.6M
    const auto options = option_provider.GetForField(field);
389
29.6M
    const uint32_t tag = RecodeTagForFastParsing(WireFormat::MakeTag(field));
390
29.6M
    const uint32_t fast_idx = tag_to_idx(tag);
391
392
29.6M
    TailCallTableInfo::FastFieldInfo& info = result[fast_idx];
393
29.6M
    if (!info.func_name.empty()) {
394
      // This field entry is already filled.
395
17.6M
      continue;
396
17.6M
    }
397
398
    // Fill in this field's entry:
399
24.0M
    ABSL_CHECK(info.func_name.empty()) << info.func_name;
400
12.0M
    PopulateFastFieldEntry(entry, options, info);
401
12.0M
    info.field = field;
402
12.0M
    info.coded_tag = tag;
403
    // If this field does not have presence, then it can set an out-of-bounds
404
    // bit (tailcall parsing uses a uint64_t for hasbits, but only stores 32).
405
12.0M
    info.hasbit_idx = cpp::HasHasbit(field) ? entry.hasbit_idx : 63;
406
12.0M
  }
407
3.16M
  return result;
408
3.16M
}
409
410
// We only need field names for reporting UTF-8 parsing errors, so we only
411
// emit them for string fields with Utf8 transform specified.
412
7.09M
bool NeedsFieldNameForTable(const FieldDescriptor* field, bool is_lite) {
413
7.09M
  if (cpp::GetUtf8CheckMode(field, is_lite) == cpp::Utf8CheckMode::kNone)
414
0
    return false;
415
7.09M
  return field->type() == FieldDescriptor::TYPE_STRING ||
416
7.09M
         (field->is_map() && (field->message_type()->map_key()->type() ==
417
116k
                                  FieldDescriptor::TYPE_STRING ||
418
116k
                              field->message_type()->map_value()->type() ==
419
70
                                  FieldDescriptor::TYPE_STRING));
420
7.09M
}
421
422
absl::string_view FieldNameForTable(
423
    const TailCallTableInfo::FieldEntryInfo& entry,
424
7.09M
    const TailCallTableInfo::OptionProvider& option_provider) {
425
7.09M
  if (NeedsFieldNameForTable(
426
7.09M
          entry.field, option_provider.GetForField(entry.field).is_lite)) {
427
1.22M
    return entry.field->name();
428
1.22M
  }
429
5.86M
  return "";
430
7.09M
}
431
432
std::vector<uint8_t> GenerateFieldNames(
433
    const Descriptor* descriptor,
434
    const std::vector<TailCallTableInfo::FieldEntryInfo>& entries,
435
795k
    const TailCallTableInfo::OptionProvider& option_provider) {
436
795k
  static constexpr int kMaxNameLength = 255;
437
795k
  std::vector<uint8_t> out;
438
439
795k
  std::vector<absl::string_view> names;
440
795k
  bool found_needed_name = false;
441
7.09M
  for (const auto& entry : entries) {
442
7.09M
    names.push_back(FieldNameForTable(entry, option_provider));
443
7.09M
    if (!names.back().empty()) found_needed_name = true;
444
7.09M
  }
445
446
  // No names needed. Omit the whole table.
447
795k
  if (!found_needed_name) {
448
270k
    return out;
449
270k
  }
450
451
  // First, we output the size of each string, as an unsigned byte. The first
452
  // string is the message name.
453
525k
  int count = 1;
454
525k
  out.push_back(std::min(static_cast<int>(descriptor->full_name().size()),
455
525k
                         kMaxNameLength));
456
5.88M
  for (auto field_name : names) {
457
5.88M
    out.push_back(field_name.size());
458
5.88M
    ++count;
459
5.88M
  }
460
2.38M
  while (count & 7) {  // align to an 8-byte boundary
461
1.86M
    out.push_back(0);
462
1.86M
    ++count;
463
1.86M
  }
464
  // The message name is stored at the beginning of the string
465
525k
  std::string message_name = descriptor->full_name();
466
525k
  if (message_name.size() > kMaxNameLength) {
467
0
    static constexpr int kNameHalfLength = (kMaxNameLength - 3) / 2;
468
0
    message_name = absl::StrCat(
469
0
        message_name.substr(0, kNameHalfLength), "...",
470
0
        message_name.substr(message_name.size() - kNameHalfLength));
471
0
  }
472
525k
  out.insert(out.end(), message_name.begin(), message_name.end());
473
  // Then we output the actual field names
474
5.88M
  for (auto field_name : names) {
475
5.88M
    out.insert(out.end(), field_name.begin(), field_name.end());
476
5.88M
  }
477
478
525k
  return out;
479
795k
}
480
481
TailCallTableInfo::NumToEntryTable MakeNumToEntryTable(
482
795k
    const std::vector<const FieldDescriptor*>& field_descriptors) {
483
795k
  TailCallTableInfo::NumToEntryTable num_to_entry_table;
484
795k
  num_to_entry_table.skipmap32 = static_cast<uint32_t>(-1);
485
486
  // skip_entry_block is the current block of SkipEntries that we're
487
  // appending to.  cur_block_first_fnum is the number of the first
488
  // field represented by the block.
489
795k
  uint16_t field_entry_index = 0;
490
795k
  uint16_t N = field_descriptors.size();
491
  // First, handle field numbers 1-32, which affect only the initial
492
  // skipmap32 and don't generate additional skip-entry blocks.
493
6.95M
  for (; field_entry_index != N; ++field_entry_index) {
494
6.22M
    auto* field_descriptor = field_descriptors[field_entry_index];
495
6.22M
    if (field_descriptor->number() > 32) break;
496
6.16M
    auto skipmap32_index = field_descriptor->number() - 1;
497
6.16M
    num_to_entry_table.skipmap32 -= 1 << skipmap32_index;
498
6.16M
  }
499
  // If all the field numbers were less than or equal to 32, we will have
500
  // no further entries to process, and we are already done.
501
795k
  if (field_entry_index == N) return num_to_entry_table;
502
503
59.8k
  TailCallTableInfo::SkipEntryBlock* block = nullptr;
504
59.8k
  bool start_new_block = true;
505
  // To determine sparseness, track the field number corresponding to
506
  // the start of the most recent skip entry.
507
59.8k
  uint32_t last_skip_entry_start = 0;
508
992k
  for (; field_entry_index != N; ++field_entry_index) {
509
932k
    auto* field_descriptor = field_descriptors[field_entry_index];
510
932k
    uint32_t fnum = static_cast<uint32_t>(field_descriptor->number());
511
932k
    ABSL_CHECK_GT(fnum, last_skip_entry_start);
512
932k
    if (start_new_block == false) {
513
      // If the next field number is within 15 of the last_skip_entry_start, we
514
      // continue writing just to that entry.  If it's between 16 and 31 more,
515
      // then we just extend the current block by one. If it's more than 31
516
      // more, we have to add empty skip entries in order to continue using the
517
      // existing block.  Obviously it's just 32 more, it doesn't make sense to
518
      // start a whole new block, since new blocks mean having to write out
519
      // their starting field number, which is 32 bits, as well as the size of
520
      // the additional block, which is 16... while an empty SkipEntry16 only
521
      // costs 32 bits.  So if it was 48 more, it's a slight space win; we save
522
      // 16 bits, but probably at the cost of slower run time.  We're choosing
523
      // 96 for now.
524
872k
      if (fnum - last_skip_entry_start > 96) start_new_block = true;
525
872k
    }
526
932k
    if (start_new_block) {
527
59.8k
      num_to_entry_table.blocks.push_back({fnum});
528
59.8k
      block = &num_to_entry_table.blocks.back();
529
59.8k
      start_new_block = false;
530
59.8k
    }
531
532
932k
    auto skip_entry_num = (fnum - block->first_fnum) / 16;
533
932k
    auto skip_entry_index = (fnum - block->first_fnum) % 16;
534
1.02M
    while (skip_entry_num >= block->entries.size())
535
91.0k
      block->entries.push_back({0xFFFF, field_entry_index});
536
932k
    block->entries[skip_entry_num].skipmap -= 1 << (skip_entry_index);
537
538
932k
    last_skip_entry_start = fnum - skip_entry_index;
539
932k
  }
540
59.8k
  return num_to_entry_table;
541
795k
}
542
543
uint16_t MakeTypeCardForField(
544
    const FieldDescriptor* field,
545
7.09M
    const TailCallTableInfo::PerFieldOptions& options) {
546
7.09M
  uint16_t type_card;
547
7.09M
  namespace fl = internal::field_layout;
548
7.09M
  if (internal::cpp::HasHasbit(field)) {
549
2.62M
    type_card = fl::kFcOptional;
550
4.47M
  } else if (field->is_repeated()) {
551
1.24M
    type_card = fl::kFcRepeated;
552
3.23M
  } else if (field->real_containing_oneof()) {
553
1.43M
    type_card = fl::kFcOneof;
554
1.79M
  } else {
555
1.79M
    type_card = fl::kFcSingular;
556
1.79M
  }
557
558
  // The rest of the type uses convenience aliases:
559
7.09M
  switch (field->type()) {
560
7.34k
    case FieldDescriptor::TYPE_DOUBLE:
561
7.34k
      type_card |= field->is_repeated() && field->is_packed()
562
7.34k
                       ? fl::kPackedDouble
563
7.34k
                       : fl::kDouble;
564
7.34k
      break;
565
0
    case FieldDescriptor::TYPE_FLOAT:
566
0
      type_card |= field->is_repeated() && field->is_packed() ? fl::kPackedFloat
567
0
                                                              : fl::kFloat;
568
0
      break;
569
0
    case FieldDescriptor::TYPE_FIXED32:
570
0
      type_card |= field->is_repeated() && field->is_packed()
571
0
                       ? fl::kPackedFixed32
572
0
                       : fl::kFixed32;
573
0
      break;
574
0
    case FieldDescriptor::TYPE_SFIXED32:
575
0
      type_card |= field->is_repeated() && field->is_packed()
576
0
                       ? fl::kPackedSFixed32
577
0
                       : fl::kSFixed32;
578
0
      break;
579
0
    case FieldDescriptor::TYPE_FIXED64:
580
0
      type_card |= field->is_repeated() && field->is_packed()
581
0
                       ? fl::kPackedFixed64
582
0
                       : fl::kFixed64;
583
0
      break;
584
0
    case FieldDescriptor::TYPE_SFIXED64:
585
0
      type_card |= field->is_repeated() && field->is_packed()
586
0
                       ? fl::kPackedSFixed64
587
0
                       : fl::kSFixed64;
588
0
      break;
589
725k
    case FieldDescriptor::TYPE_BOOL:
590
725k
      type_card |= field->is_repeated() && field->is_packed() ? fl::kPackedBool
591
725k
                                                              : fl::kBool;
592
725k
      break;
593
268k
    case FieldDescriptor::TYPE_ENUM:
594
268k
      if (internal::cpp::HasPreservingUnknownEnumSemantics(field)) {
595
        // No validation is required.
596
268k
        type_card |= field->is_repeated() && field->is_packed()
597
268k
                         ? fl::kPackedOpenEnum
598
268k
                         : fl::kOpenEnum;
599
268k
      } else {
600
0
        int16_t start;
601
0
        uint16_t size;
602
0
        if (GetEnumValidationRange(field->enum_type(), start, size)) {
603
          // Validation is done by range check (start/length in FieldAux).
604
0
          type_card |= field->is_repeated() && field->is_packed()
605
0
                           ? fl::kPackedEnumRange
606
0
                           : fl::kEnumRange;
607
0
        } else {
608
          // Validation uses the generated _IsValid function.
609
0
          type_card |= field->is_repeated() && field->is_packed()
610
0
                           ? fl::kPackedEnum
611
0
                           : fl::kEnum;
612
0
        }
613
0
      }
614
268k
      break;
615
84.8k
    case FieldDescriptor::TYPE_UINT32:
616
84.8k
      type_card |= field->is_repeated() && field->is_packed()
617
84.8k
                       ? fl::kPackedUInt32
618
84.8k
                       : fl::kUInt32;
619
84.8k
      break;
620
630
    case FieldDescriptor::TYPE_SINT32:
621
630
      type_card |= field->is_repeated() && field->is_packed()
622
630
                       ? fl::kPackedSInt32
623
630
                       : fl::kSInt32;
624
630
      break;
625
59.2k
    case FieldDescriptor::TYPE_INT32:
626
59.2k
      type_card |= field->is_repeated() && field->is_packed() ? fl::kPackedInt32
627
59.2k
                                                              : fl::kInt32;
628
59.2k
      break;
629
1.72k
    case FieldDescriptor::TYPE_UINT64:
630
1.72k
      type_card |= field->is_repeated() && field->is_packed()
631
1.72k
                       ? fl::kPackedUInt64
632
1.72k
                       : fl::kUInt64;
633
1.72k
      break;
634
0
    case FieldDescriptor::TYPE_SINT64:
635
0
      type_card |= field->is_repeated() && field->is_packed()
636
0
                       ? fl::kPackedSInt64
637
0
                       : fl::kSInt64;
638
0
      break;
639
56.0k
    case FieldDescriptor::TYPE_INT64:
640
56.0k
      type_card |= field->is_repeated() && field->is_packed() ? fl::kPackedInt64
641
56.0k
                                                              : fl::kInt64;
642
56.0k
      break;
643
644
91.7k
    case FieldDescriptor::TYPE_BYTES:
645
91.7k
      type_card |= fl::kBytes;
646
91.7k
      break;
647
1.11M
    case FieldDescriptor::TYPE_STRING: {
648
1.11M
      switch (internal::cpp::GetUtf8CheckMode(field, options.is_lite)) {
649
1.10M
        case internal::cpp::Utf8CheckMode::kStrict:
650
1.10M
          type_card |= fl::kUtf8String;
651
1.10M
          break;
652
582
        case internal::cpp::Utf8CheckMode::kVerify:
653
582
          type_card |= fl::kRawString;
654
582
          break;
655
0
        case internal::cpp::Utf8CheckMode::kNone:
656
0
          type_card |= fl::kBytes;
657
0
          break;
658
1.11M
      }
659
1.11M
      break;
660
1.11M
    }
661
662
1.11M
    case FieldDescriptor::TYPE_GROUP:
663
0
      type_card |= 0 | fl::kMessage | fl::kRepGroup;
664
0
      if (options.is_implicitly_weak) {
665
0
        type_card |= fl::kTvWeakPtr;
666
0
      } else if (options.use_direct_tcparser_table) {
667
0
        type_card |= fl::kTvTable;
668
0
      } else {
669
0
        type_card |= fl::kTvDefault;
670
0
      }
671
0
      break;
672
4.68M
    case FieldDescriptor::TYPE_MESSAGE:
673
4.68M
      if (field->is_map()) {
674
116k
        type_card |= fl::kMap;
675
4.57M
      } else {
676
4.57M
        type_card |= fl::kMessage;
677
4.57M
        if (HasLazyRep(field, options)) {
678
0
          ABSL_CHECK(options.lazy_opt == field_layout::kTvEager ||
679
0
                     options.lazy_opt == field_layout::kTvLazy);
680
0
          type_card |= +fl::kRepLazy | options.lazy_opt;
681
4.57M
        } else {
682
4.57M
          if (options.is_implicitly_weak) {
683
0
            type_card |= fl::kTvWeakPtr;
684
4.57M
          } else if (options.use_direct_tcparser_table) {
685
0
            type_card |= fl::kTvTable;
686
4.57M
          } else {
687
4.57M
            type_card |= fl::kTvDefault;
688
4.57M
          }
689
4.57M
        }
690
4.57M
      }
691
4.68M
      break;
692
7.09M
  }
693
694
  // Fill in extra information about string and bytes field representations.
695
7.09M
  if (field->type() == FieldDescriptor::TYPE_BYTES ||
696
7.09M
      field->type() == FieldDescriptor::TYPE_STRING) {
697
1.20M
    if (field->is_repeated()) {
698
233k
      type_card |= fl::kRepSString;
699
967k
    } else {
700
967k
      type_card |= fl::kRepAString;
701
967k
    }
702
1.20M
  }
703
704
7.09M
  if (options.should_split) {
705
0
    type_card |= fl::kSplitTrue;
706
0
  }
707
708
7.09M
  return type_card;
709
7.09M
}
710
711
}  // namespace
712
713
TailCallTableInfo::TailCallTableInfo(
714
    const Descriptor* descriptor,
715
    const std::vector<const FieldDescriptor*>& ordered_fields,
716
    const OptionProvider& option_provider,
717
    const std::vector<int>& has_bit_indices,
718
795k
    const std::vector<int>& inlined_string_indices) {
719
  // If this message has any inlined string fields, store the donation state
720
  // offset in the first auxiliary entry, which is kInlinedStringAuxIdx.
721
795k
  if (!inlined_string_indices.empty()) {
722
771k
    aux_entries.resize(kInlinedStringAuxIdx + 1);  // Allocate our slot
723
771k
    aux_entries[kInlinedStringAuxIdx] = {kInlinedStringDonatedOffset};
724
771k
  }
725
726
  // If this message is split, store the split pointer offset in the second
727
  // and third auxiliary entries, which are kSplitOffsetAuxIdx and
728
  // kSplitSizeAuxIdx.
729
7.09M
  for (auto* field : ordered_fields) {
730
7.09M
    if (option_provider.GetForField(field).should_split) {
731
0
      static_assert(kSplitOffsetAuxIdx + 1 == kSplitSizeAuxIdx, "");
732
0
      aux_entries.resize(kSplitSizeAuxIdx + 1);  // Allocate our 2 slots
733
0
      aux_entries[kSplitOffsetAuxIdx] = {kSplitOffset};
734
0
      aux_entries[kSplitSizeAuxIdx] = {kSplitSizeof};
735
0
      break;
736
0
    }
737
7.09M
  }
738
739
  // Fill in mini table entries.
740
7.09M
  for (const FieldDescriptor* field : ordered_fields) {
741
7.09M
    auto options = option_provider.GetForField(field);
742
7.09M
    field_entries.push_back(
743
7.09M
        {field, internal::cpp ::HasHasbit(field)
744
7.09M
                    ? has_bit_indices[static_cast<size_t>(field->index())]
745
7.09M
                    : -1});
746
7.09M
    auto& entry = field_entries.back();
747
7.09M
    entry.type_card = MakeTypeCardForField(field, options);
748
749
7.09M
    if (field->type() == FieldDescriptor::TYPE_MESSAGE ||
750
7.09M
        field->type() == FieldDescriptor::TYPE_GROUP) {
751
      // Message-typed fields have a FieldAux with the default instance pointer.
752
4.68M
      if (field->is_map()) {
753
116k
        field_entries.back().aux_idx = aux_entries.size();
754
116k
        aux_entries.push_back({kMapAuxInfo, {field}});
755
116k
        if (options.uses_codegen) {
756
          // If we don't use codegen we can't add these.
757
0
          auto* map_value = field->message_type()->map_value();
758
0
          if (auto* sub = map_value->message_type()) {
759
0
            aux_entries.push_back({kCreateInArena});
760
0
            aux_entries.back().desc = sub;
761
0
          } else if (map_value->type() == FieldDescriptor::TYPE_ENUM &&
762
0
                     !cpp::HasPreservingUnknownEnumSemantics(map_value)) {
763
0
            aux_entries.push_back({kEnumValidator, {map_value}});
764
0
          }
765
0
        }
766
4.57M
      } else if (HasLazyRep(field, options)) {
767
0
        if (options.uses_codegen) {
768
0
          field_entries.back().aux_idx = aux_entries.size();
769
0
          aux_entries.push_back({kSubMessage, {field}});
770
0
          if (options.lazy_opt == field_layout::kTvEager) {
771
0
            aux_entries.push_back({kMessageVerifyFunc, {field}});
772
0
          } else {
773
0
            aux_entries.push_back({kNothing});
774
0
          }
775
0
        } else {
776
0
          field_entries.back().aux_idx =
777
0
              TcParseTableBase::FieldEntry::kNoAuxIdx;
778
0
        }
779
4.57M
      } else {
780
4.57M
        field_entries.back().aux_idx = aux_entries.size();
781
4.57M
        aux_entries.push_back({options.is_implicitly_weak ? kSubMessageWeak
782
4.57M
                               : options.use_direct_tcparser_table
783
4.57M
                                   ? kSubTable
784
4.57M
                                   : kSubMessage,
785
4.57M
                               {field}});
786
4.57M
      }
787
4.68M
    } else if (field->type() == FieldDescriptor::TYPE_ENUM &&
788
2.40M
               !cpp::HasPreservingUnknownEnumSemantics(field)) {
789
      // Enum fields which preserve unknown values (proto3 behavior) are
790
      // effectively int32 fields with respect to parsing -- i.e., the value
791
      // does not need to be validated at parse time.
792
      //
793
      // Enum fields which do not preserve unknown values (proto2 behavior) use
794
      // a FieldAux to store validation information. If the enum values are
795
      // sequential (and within a range we can represent), then the FieldAux
796
      // entry represents the range using the minimum value (which must fit in
797
      // an int16_t) and count (a uint16_t). Otherwise, the entry holds a
798
      // pointer to the generated Name_IsValid function.
799
800
0
      entry.aux_idx = aux_entries.size();
801
0
      aux_entries.push_back({});
802
0
      auto& aux_entry = aux_entries.back();
803
804
0
      if (GetEnumValidationRange(field->enum_type(), aux_entry.enum_range.start,
805
0
                                 aux_entry.enum_range.size)) {
806
0
        aux_entry.type = kEnumRange;
807
0
      } else {
808
0
        aux_entry.type = kEnumValidator;
809
0
        aux_entry.field = field;
810
0
      }
811
812
2.40M
    } else if ((field->type() == FieldDescriptor::TYPE_STRING ||
813
2.40M
                field->type() == FieldDescriptor::TYPE_BYTES) &&
814
2.40M
               options.is_string_inlined) {
815
0
      ABSL_CHECK(!field->is_repeated());
816
      // Inlined strings have an extra marker to represent their donation state.
817
0
      int idx = inlined_string_indices[static_cast<size_t>(field->index())];
818
      // For mini parsing, the donation state index is stored as an `offset`
819
      // auxiliary entry.
820
0
      entry.aux_idx = aux_entries.size();
821
0
      aux_entries.push_back({kNumericOffset});
822
0
      aux_entries.back().offset = idx;
823
      // For fast table parsing, the donation state index is stored instead of
824
      // the aux_idx (this will limit the range to 8 bits).
825
0
      entry.inlined_string_idx = idx;
826
0
    }
827
7.09M
  }
828
829
795k
  table_size_log2 = 0;  // fallback value
830
795k
  int num_fast_fields = -1;
831
795k
  auto end_group_tag = GetEndGroupTag(descriptor);
832
3.16M
  for (int try_size_log2 : {0, 1, 2, 3, 4, 5}) {
833
3.16M
    size_t try_size = 1 << try_size_log2;
834
3.16M
    auto split_fields = SplitFastFieldsForSize(end_group_tag, field_entries,
835
3.16M
                                               try_size_log2, option_provider);
836
3.16M
    ABSL_CHECK_EQ(split_fields.size(), try_size);
837
3.16M
    int try_num_fast_fields = 0;
838
17.6M
    for (const auto& info : split_fields) {
839
17.6M
      if (info.field != nullptr) ++try_num_fast_fields;
840
17.6M
    }
841
    // Use this size if (and only if) it covers more fields.
842
3.16M
    if (try_num_fast_fields > num_fast_fields) {
843
2.38M
      fast_path_fields = std::move(split_fields);
844
2.38M
      table_size_log2 = try_size_log2;
845
2.38M
      num_fast_fields = try_num_fast_fields;
846
2.38M
    }
847
    // The largest table we allow has the same number of entries as the
848
    // message has fields, rounded up to the next power of 2 (e.g., a message
849
    // with 5 fields can have a fast table of size 8). A larger table *might*
850
    // cover more fields in certain cases, but a larger table in that case
851
    // would have mostly empty entries; so, we cap the size to avoid
852
    // pathologically sparse tables.
853
3.16M
    if (end_group_tag.has_value()) {
854
      // If this message uses group encoding, the tables are sometimes very
855
      // sparse because the fields in the group avoid using the same field
856
      // numbering as the parent message (even though currently, the proto
857
      // compiler allows the overlap, and there is no possible conflict.)
858
      // As such, this test produces a false negative as far as whether the
859
      // large table will be worth it.  So we disable the test in this case.
860
3.16M
    } else {
861
3.16M
      if (try_size > ordered_fields.size()) {
862
736k
        break;
863
736k
      }
864
3.16M
    }
865
3.16M
  }
866
867
795k
  num_to_entry_table = MakeNumToEntryTable(ordered_fields);
868
795k
  ABSL_CHECK_EQ(field_entries.size(), ordered_fields.size());
869
795k
  field_name_data =
870
795k
      GenerateFieldNames(descriptor, field_entries, option_provider);
871
795k
}
872
873
}  // namespace internal
874
}  // namespace protobuf
875
}  // namespace google
876
877
#include "google/protobuf/port_undef.inc"