1
#include "source/common/formatter/substitution_formatter.h"
2

            
3
namespace Envoy {
4
namespace Formatter {
5

            
6
291007
const re2::RE2& commandWithArgsRegex() {
7
  // The following regex is used to check validity of the formatter command and to
8
  // extract groups.
9
  // The formatter command has the following format:
10
  //    % COMMAND(SUBCOMMAND):LENGTH%
11
  // % signs at the beginning and end are used by parser to find next COMMAND.
12
  // COMMAND must always be present and must consist of characters: "A-Z", "0-9" or "_".
13
  // SUBCOMMAND presence depends on the COMMAND. Format is flexible but cannot contain ")".:
14
  // - for some commands SUBCOMMAND is not allowed (for example %PROTOCOL%)
15
  // - for some commands SUBCOMMAND is required (for example %REQ(:AUTHORITY)%, just %REQ% will
16
  // cause error)
17
  // - for some commands SUBCOMMAND is optional (for example %START_TIME% and
18
  // %START_TIME(%f.%1f.%2f.%3f)% are both correct).
19
  // LENGTH presence depends on the command. Some
20
  // commands allow LENGTH to be specified, so not. Regex is used to validate the syntax and also
21
  // to extract values for COMMAND, SUBCOMMAND and LENGTH.
22
  //
23
  // Below is explanation of capturing and non-capturing groups. Non-capturing groups are used
24
  // to specify that certain part of the formatter command is optional and should contain specific
25
  // characters. Capturing groups are used to extract the values when regex is matched against
26
  // formatter command string.
27
  //
28
  // clang-format off
29
  // Non-capturing group specifying optional :LENGTH ----------------------
30
  //                                                                       |
31
  // Non-capturing group specifying optional (SUBCOMMAND)---               |
32
  //                                                        |              |
33
  // Non-capturing group specifying mandatory COMMAND       |              |
34
  //  which uses only A-Z, 0-9 and _ characters             |              |
35
  //  Group is used only to specify allowed characters.     |              |
36
  //                                      |                 |              |
37
  //                                      |                 |              |
38
  //                              _________________  _____________ _____________
39
  //                              |               |  |           | |           |
40
291007
  CONSTRUCT_ON_FIRST_USE(re2::RE2,
41
291007
                         R"EOF(^%((?:[A-Z]|[0-9]|_)+)(?:\((.*?)\))?(?::([0-9]+))?%)EOF");
42
  //                             |__________________|     |___|        |______|
43
  //                                       |                |              |
44
  // Capturing group specifying COMMAND ---                 |              |
45
  // The index of this group is 1.                          |              |
46
  //                                                        |              |
47
  // Capturing group for SUBCOMMAND. If present, it will ---               |
48
  // contain SUBCOMMAND without "(" and ")". The index                     |
49
  // of SUBCOMMAND group is 2.                                             |
50
  //                                                                       |
51
  // Capturing group for LENGTH. If present, it will ----------------------
52
  // contain just number without ":". The index of
53
  // LENGTH group is 3.
54
  // clang-format on
55
291007
}
56

            
57
// Helper class to write value to output buffer in JSON style.
58
// NOTE: This helper class has duplicated logic with the Json::BufferStreamer class but
59
// provides lower level of APIs to operate on the output buffer (like control the
60
// delimiters). This is designed for special scenario of substitution formatter and
61
// is not intended to be used by other parts of the code.
62
class JsonStringSerializer {
63
public:
64
  using OutputBufferType = Json::StringOutput;
65
155
  explicit JsonStringSerializer(std::string& output_buffer) : output_buffer_(output_buffer) {}
66

            
67
  // Methods that be used to add JSON delimiter to output buffer.
68
168
  void addMapBeginDelimiter() { output_buffer_.add(Json::Constants::MapBegin); }
69
168
  void addMapEndDelimiter() { output_buffer_.add(Json::Constants::MapEnd); }
70
9
  void addArrayBeginDelimiter() { output_buffer_.add(Json::Constants::ArrayBegin); }
71
9
  void addArrayEndDelimiter() { output_buffer_.add(Json::Constants::ArrayEnd); }
72
860
  void addElementsDelimiter() { output_buffer_.add(Json::Constants::Comma); }
73
1008
  void addKeyValueDelimiter() { output_buffer_.add(Json::Constants::Colon); }
74

            
75
  // Methods that be used to add JSON key or value to output buffer.
76
1131
  void addString(absl::string_view value) { addSanitized(R"(")", value, R"(")"); }
77
  /**
78
   * Serializes a number.
79
   */
80
3
  void addNumber(double d) {
81
3
    if (std::isnan(d)) {
82
      output_buffer_.add(Json::Constants::Null);
83
3
    } else {
84
3
      Buffer::Util::serializeDouble(d, output_buffer_);
85
3
    }
86
3
  }
87
  /**
88
   * Serializes a integer number.
89
   * NOTE: All numbers in JSON is float. When loading output of this serializer, the parser's
90
   * implementation decides if the full precision of big integer could be preserved or not.
91
   * See discussion here https://stackoverflow.com/questions/13502398/json-integers-limit-on-size
92
   * and spec https://www.rfc-editor.org/rfc/rfc7159#section-6 for more details.
93
   */
94
  void addNumber(uint64_t i) { output_buffer_.add(absl::StrCat(i)); }
95
  void addNumber(int64_t i) { output_buffer_.add(absl::StrCat(i)); }
96
2
  void addBool(bool b) { output_buffer_.add(b ? Json::Constants::True : Json::Constants::False); }
97
  void addNull() { output_buffer_.add(Json::Constants::Null); }
98

            
99
  // Low-level methods that be used to provide a low-level control to buffer.
100
1131
  void addSanitized(absl::string_view prefix, absl::string_view value, absl::string_view suffix) {
101
1131
    output_buffer_.add(prefix, Json::sanitize(sanitize_buffer_, value), suffix);
102
1131
  }
103
  void addRawString(absl::string_view value) { output_buffer_.add(value); }
104

            
105
protected:
106
  std::string sanitize_buffer_;
107
  OutputBufferType output_buffer_;
108
};
109

            
110
// Helper class to parse the Json format configuration. The class will be used to parse
111
// the JSON format configuration and convert it to a list of raw JSON pieces and
112
// substitution format template strings. See comments below for more details.
113
class JsonFormatBuilder {
114
public:
115
  struct FormatElement {
116
    // Pre-sanitized JSON piece or a format template string that contains
117
    // substitution commands.
118
    std::string value_;
119
    // Whether the value is a template string.
120
    // If true, the value is a format template string that contains substitution commands.
121
    // If false, the value is a pre-sanitized JSON piece.
122
    bool is_template_;
123
  };
124
  using FormatElements = std::vector<FormatElement>;
125

            
126
  /**
127
   * Constructor of JsonFormatBuilder.
128
   */
129
155
  JsonFormatBuilder() = default;
130

            
131
  /**
132
   * Convert a proto struct format configuration to an array of raw JSON pieces and
133
   * substitution format template strings.
134
   *
135
   * The keys, raw values, delimiters will be serialized as JSON string pieces (raw
136
   * JSON strings) directly when loading the configuration.
137
   * The substitution format template strings will be kept as template string pieces and
138
   * will be parsed to formatter providers by the JsonFormatter.
139
   *
140
   * NOTE: This class is used to parse the configuration of the proto struct format
141
   * and should only be used in the context of parsing the configuration.
142
   *
143
   * For example given the following proto struct format configuration:
144
   *
145
   *   json_format:
146
   *     name: "value"
147
   *     template: "%START_TIME%"
148
   *     number: 2
149
   *     bool: true
150
   *     list:
151
   *       - "list_raw_value"
152
   *       - false
153
   *       - "%EMIT_TIME%"
154
   *     nested:
155
   *       nested_name: "nested_value"
156
   *
157
   * It will be parsed to the following pieces:
158
   *
159
   *   - '{"name":"value","template":'                                      # Raw JSON piece.
160
   *   - '%START_TIME%'                                                     # Format template piece.
161
   *   - ',"number":2,"bool":true,"list":["list_raw_value",false,'          # Raw JSON piece.
162
   *   - '%EMIT_TIME%'                                                      # Format template piece.
163
   *   - '],"nested":{"nested_name":"nested_value"}}'                       # Raw JSON piece.
164
   *
165
   * Finally, join the raw JSON pieces and output of substitution formatters in order
166
   * to construct the final JSON output.
167
   *
168
   * @param struct_format the proto struct format configuration.
169
   */
170
  FormatElements fromStruct(const Protobuf::Struct& struct_format);
171

            
172
private:
173
  using ProtoDict = Protobuf::Map<std::string, Protobuf::Value>;
174
  using ProtoList = Protobuf::RepeatedPtrField<Protobuf::Value>;
175

            
176
  void formatValueToFormatElements(const ProtoDict& dict_value);
177
  void formatValueToFormatElements(const Protobuf::Value& value);
178
  void formatValueToFormatElements(const ProtoList& list_value);
179

            
180
  std::string buffer_;                       // JSON writer buffer.
181
  JsonStringSerializer serializer_{buffer_}; // JSON serializer.
182
  FormatElements elements_;                  // Parsed elements.
183
};
184

            
185
JsonFormatBuilder::FormatElements
186
155
JsonFormatBuilder::fromStruct(const Protobuf::Struct& struct_format) {
187
155
  elements_.clear();
188

            
189
  // This call will iterate through the map tree and serialize the key/values as JSON.
190
  // If a string value that contains a substitution commands is found, the current
191
  // JSON piece and the substitution command will be pushed into the output list.
192
  // After that, the iteration will continue until the whole tree is traversed.
193
155
  formatValueToFormatElements(struct_format.fields());
194
155
  elements_.push_back(FormatElement{std::move(buffer_), false});
195
155
  buffer_.clear();
196

            
197
155
  return std::move(elements_);
198
155
};
199

            
200
1036
void JsonFormatBuilder::formatValueToFormatElements(const Protobuf::Value& value) {
201
1036
  switch (value.kind_case()) {
202
  case Protobuf::Value::KIND_NOT_SET:
203
  case Protobuf::Value::kNullValue:
204
    serializer_.addNull();
205
    break;
206
3
  case Protobuf::Value::kNumberValue:
207
3
    serializer_.addNumber(value.number_value());
208
3
    break;
209
1009
  case Protobuf::Value::kStringValue: {
210
1009
    absl::string_view string_format = value.string_value();
211
1009
    if (!absl::StrContains(string_format, '%')) {
212
123
      serializer_.addString(string_format);
213
123
      break;
214
123
    }
215

            
216
    // The string contains a formatter, we need to push the current exist JSON piece
217
    // into the output list first.
218
886
    elements_.push_back(FormatElement{std::move(buffer_), false});
219
886
    buffer_.clear();
220

            
221
    // Now a formatter is coming, we need to push the current raw string into
222
    // the output list.
223
886
    elements_.push_back(FormatElement{std::string(string_format), true});
224
886
    break;
225
1009
  }
226
2
  case Protobuf::Value::kBoolValue:
227
2
    serializer_.addBool(value.bool_value());
228
2
    break;
229
13
  case Protobuf::Value::kStructValue: {
230
13
    formatValueToFormatElements(value.struct_value().fields());
231
13
    break;
232
9
  case Protobuf::Value::kListValue:
233
9
    formatValueToFormatElements(value.list_value().values());
234
9
    break;
235
1009
  }
236
1036
  }
237
1036
}
238

            
239
9
void JsonFormatBuilder::formatValueToFormatElements(const ProtoList& list_value) {
240
9
  serializer_.addArrayBeginDelimiter(); // Delimiter to start list.
241
37
  for (int i = 0; i < list_value.size(); ++i) {
242
28
    if (i > 0) {
243
19
      serializer_.addElementsDelimiter(); // Delimiter to separate list elements.
244
19
    }
245
28
    formatValueToFormatElements(list_value[i]);
246
28
  }
247
9
  serializer_.addArrayEndDelimiter(); // Delimiter to end list.
248
9
}
249

            
250
168
void JsonFormatBuilder::formatValueToFormatElements(const ProtoDict& dict_value) {
251
168
  std::vector<std::pair<absl::string_view, ProtoDict::const_iterator>> sorted_fields;
252
168
  sorted_fields.reserve(dict_value.size());
253

            
254
1176
  for (auto it = dict_value.begin(); it != dict_value.end(); ++it) {
255
1008
    sorted_fields.push_back({it->first, it});
256
1008
  }
257

            
258
  // Sort the keys to make the output deterministic.
259
168
  std::sort(sorted_fields.begin(), sorted_fields.end(),
260
3792
            [](const auto& a, const auto& b) { return a.first < b.first; });
261

            
262
168
  serializer_.addMapBeginDelimiter(); // Delimiter to start map.
263
1176
  for (size_t i = 0; i < sorted_fields.size(); ++i) {
264
1008
    if (i > 0) {
265
841
      serializer_.addElementsDelimiter(); // Delimiter to separate map elements.
266
841
    }
267
    // Add the key.
268
1008
    serializer_.addString(sorted_fields[i].first);
269
1008
    serializer_.addKeyValueDelimiter(); // Delimiter to separate key and value.
270
1008
    formatValueToFormatElements(sorted_fields[i].second->second);
271
1008
  }
272
168
  serializer_.addMapEndDelimiter(); // Delimiter to end map.
273
168
}
274

            
275
absl::StatusOr<std::vector<FormatterProviderPtr>>
276
SubstitutionFormatParser::parse(absl::string_view format,
277
26403
                                const std::vector<CommandParserPtr>& command_parsers) {
278
26403
  std::string current_token;
279
26403
  current_token.reserve(32);
280
26403
  std::vector<FormatterProviderPtr> formatters;
281

            
282
1174218
  for (size_t pos = 0; pos < format.size();) {
283
1147898
    if (format[pos] != '%') {
284
856873
      current_token.push_back(format[pos]);
285
856873
      pos++;
286
856873
      continue;
287
856873
    }
288

            
289
    // escape '%%'
290
291025
    if (format.size() > pos + 1) {
291
291016
      if (format[pos + 1] == '%') {
292
18
        current_token.push_back('%');
293
18
        pos += 2;
294
18
        continue;
295
18
      }
296
291016
    }
297

            
298
291007
    if (!current_token.empty()) {
299
287338
      formatters.emplace_back(FormatterProviderPtr{new PlainStringFormatter(current_token)});
300
287338
      current_token.clear();
301
287338
    }
302

            
303
291007
    absl::string_view sub_format = format.substr(pos);
304
291007
    const size_t sub_format_size = sub_format.size();
305

            
306
291007
    absl::string_view command, command_arg;
307
291007
    absl::optional<size_t> max_len;
308

            
309
291007
    if (!re2::RE2::Consume(&sub_format, commandWithArgsRegex(), &command, &command_arg, &max_len)) {
310
58
      return absl::InvalidArgumentError(fmt::format(
311
58
          "Incorrect configuration: {}. Couldn't find valid command at position {}", format, pos));
312
58
    }
313

            
314
290949
    bool added = false;
315

            
316
    // First try the command parsers provided by the user. This allows the user to override
317
    // built-in command parsers.
318
290953
    for (const auto& cmd : command_parsers) {
319
341
      auto formatter = cmd->parse(command, command_arg, max_len);
320
341
      if (formatter) {
321
286
        formatters.push_back(std::move(formatter));
322
286
        added = true;
323
286
        break;
324
286
      }
325
341
    }
326

            
327
    // Next, try the built-in command parsers.
328
290949
    if (!added) {
329
433481
      for (const auto& cmd : BuiltInCommandParserFactoryHelper::commandParsers()) {
330
433478
        auto formatter = cmd->parse(command, command_arg, max_len);
331
433478
        if (formatter) {
332
290598
          formatters.push_back(std::move(formatter));
333
290598
          added = true;
334
290598
          break;
335
290598
        }
336
433478
      }
337
290663
    }
338

            
339
290949
    if (!added) {
340
25
      return absl::InvalidArgumentError(
341
25
          fmt::format("Not supported field in StreamInfo: {}", command));
342
25
    }
343

            
344
290924
    pos += (sub_format_size - sub_format.size());
345
290924
  }
346

            
347
26320
  if (!current_token.empty() || format.empty()) {
348
    // Create a PlainStringFormatter with the final string literal. If the format string
349
    // was empty, this creates a PlainStringFormatter with an empty string.
350
24293
    formatters.emplace_back(FormatterProviderPtr{new PlainStringFormatter(current_token)});
351
24293
  }
352

            
353
26320
  return formatters;
354
26403
}
355

            
356
absl::StatusOr<std::unique_ptr<FormatterImpl>>
357
FormatterImpl::create(absl::string_view format, bool omit_empty_values,
358
25278
                      const CommandParsers& command_parsers) {
359
25278
  absl::Status creation_status = absl::OkStatus();
360
25278
  auto ret = std::unique_ptr<FormatterImpl>(
361
25278
      new FormatterImpl(creation_status, format, omit_empty_values, command_parsers));
362
25278
  RETURN_IF_NOT_OK_REF(creation_status);
363
25223
  return ret;
364
25278
}
365

            
366
std::string FormatterImpl::format(const Context& context,
367
82961
                                  const StreamInfo::StreamInfo& stream_info) const {
368
82961
  std::string log_line;
369
82961
  log_line.reserve(256);
370

            
371
2261409
  for (const auto& provider : providers_) {
372
2261409
    const absl::optional<std::string> bit = provider->format(context, stream_info);
373
    // Add the formatted value if there is one. Otherwise add a default value
374
    // of "-" if omit_empty_values_ is not set.
375
2261409
    if (bit.has_value()) {
376
1996906
      log_line += bit.value();
377
1996908
    } else if (!omit_empty_values_) {
378
262782
      log_line += DefaultUnspecifiedValueStringView;
379
262782
    }
380
2261409
  }
381

            
382
82961
  return log_line;
383
82961
}
384

            
385
void stringValueToLogLine(const JsonFormatterImpl::Formatters& formatters, const Context& context,
386
                          const StreamInfo::StreamInfo& info, std::string& log_line,
387
9
                          std::string& sanitize, bool omit_empty_values) {
388
9
  log_line.push_back('"'); // Start the JSON string.
389
26
  for (const JsonFormatterImpl::Formatter& formatter : formatters) {
390
26
    const absl::optional<std::string> value = formatter->format(context, info);
391
26
    if (!value.has_value()) {
392
      // Add the empty value. This needn't be sanitized.
393
      log_line.append(omit_empty_values ? EMPTY_STRING : DefaultUnspecifiedValueStringView);
394
      continue;
395
    }
396
    // Sanitize the string value and add it to the buffer. The string value will not be quoted
397
    // since we handle the quoting by ourselves at the outer level.
398
26
    log_line.append(Json::sanitize(sanitize, value.value()));
399
26
  }
400
9
  log_line.push_back('"'); // End the JSON string.
401
9
}
402

            
403
JsonFormatterImpl::JsonFormatterImpl(const Protobuf::Struct& struct_format, bool omit_empty_values,
404
                                     const CommandParsers& commands)
405
155
    : omit_empty_values_(omit_empty_values) {
406
1919
  for (JsonFormatBuilder::FormatElement& element : JsonFormatBuilder().fromStruct(struct_format)) {
407
1919
    if (element.is_template_) {
408
884
      parsed_elements_.emplace_back(
409
884
          THROW_OR_RETURN_VALUE(SubstitutionFormatParser::parse(element.value_, commands),
410
884
                                std::vector<FormatterProviderPtr>));
411
1035
    } else {
412
1035
      parsed_elements_.emplace_back(std::move(element.value_));
413
1035
    }
414
1919
  }
415
155
}
416

            
417
std::string JsonFormatterImpl::format(const Context& context,
418
129
                                      const StreamInfo::StreamInfo& info) const {
419
129
  std::string log_line;
420
129
  log_line.reserve(2048);
421
129
  std::string sanitize; // Helper to serialize the value to log line.
422

            
423
1803
  for (const ParsedFormatElement& element : parsed_elements_) {
424
    // 1. Handle the raw string element.
425
1803
    if (absl::holds_alternative<std::string>(element)) {
426
      // The raw string element will be added to the buffer directly.
427
      // It is sanitized when loading the configuration.
428
966
      log_line.append(absl::get<std::string>(element));
429
966
      continue;
430
966
    }
431

            
432
837
    ASSERT(absl::holds_alternative<Formatters>(element));
433
837
    const Formatters& formatters = absl::get<Formatters>(element);
434
837
    ASSERT(!formatters.empty());
435

            
436
837
    if (formatters.size() != 1) {
437
      // 2. Handle the formatter element with multiple or zero providers.
438
9
      stringValueToLogLine(formatters, context, info, log_line, sanitize, omit_empty_values_);
439
831
    } else {
440
      // 3. Handle the formatter element with a single provider and value
441
      //    type needs to be kept.
442
828
      const auto value = formatters[0]->formatValue(context, info);
443
828
      Json::Utility::appendValueToString(value, log_line);
444
828
    }
445
837
  }
446

            
447
129
  log_line.push_back('\n');
448
129
  return log_line;
449
129
}
450

            
451
} // namespace Formatter
452
} // namespace Envoy