1
#include "source/extensions/tracers/zipkin/span_context_extractor.h"
2

            
3
#include <charconv>
4

            
5
#include "source/common/common/assert.h"
6
#include "source/common/common/utility.h"
7
#include "source/extensions/tracers/zipkin/span_context.h"
8
#include "source/extensions/tracers/zipkin/zipkin_core_constants.h"
9

            
10
namespace Envoy {
11
namespace Extensions {
12
namespace Tracers {
13
namespace Zipkin {
14
namespace {
15
constexpr int FormatMaxLength = 32 + 1 + 16 + 3 + 16; // traceid128-spanid-1-parentid
16

            
17
25
bool validSamplingFlags(char c) {
18
25
  if (c == '1' || c == '0' || c == 'd') {
19
22
    return true;
20
22
  }
21
3
  return false;
22
25
}
23

            
24
10
absl::optional<bool> getSamplingFlags(char c) {
25
10
  if (validSamplingFlags(c)) {
26
9
    return c == '0' ? false : true;
27
9
  } else {
28
1
    return absl::nullopt;
29
1
  }
30
10
}
31

            
32
// Helper function to parse hex string_view to uint64_t using std::from_chars
33
6
bool parseHexStringView(absl::string_view hex_str, uint64_t& result) {
34
6
  const char* begin = hex_str.data();
35
6
  const char* end = begin + hex_str.size();
36
6
  auto [ptr, ec] = std::from_chars(begin, end, result, 16);
37
6
  return ec == std::errc{} && ptr == end;
38
6
}
39

            
40
} // namespace
41

            
42
SpanContextExtractor::SpanContextExtractor(Tracing::TraceContext& trace_context,
43
                                           bool w3c_fallback_enabled)
44
108
    : trace_context_(trace_context), w3c_fallback_enabled_(w3c_fallback_enabled) {}
45

            
46
108
SpanContextExtractor::~SpanContextExtractor() = default;
47

            
48
62
absl::optional<bool> SpanContextExtractor::extractSampled() {
49
62
  bool sampled(false);
50
  // Try B3 single format first.
51
62
  auto b3_header_entry = ZipkinCoreConstants::get().B3.get(trace_context_);
52
62
  if (b3_header_entry.has_value()) {
53
    // This is an implicitly untrusted header, so only the first value is used.
54
15
    absl::string_view b3 = b3_header_entry.value();
55
15
    int sampled_pos = 0;
56
15
    switch (b3.length()) {
57
4
    case 1:
58
4
      break;
59
2
    case 35: // 16 + 1 + 16 + 2
60
2
      sampled_pos = 34;
61
2
      break;
62
1
    case 51: // 32 + 1 + 16 + 2
63
1
      sampled_pos = 50;
64
1
      break;
65
2
    case 52: // 16 + 1 + 16 + 2 + 1 + 16
66
2
      sampled_pos = 34;
67
2
      break;
68
1
    case 68: // 32 + 1 + 16 + 2 + 1 + 16
69
1
      sampled_pos = 50;
70
1
      break;
71
5
    default:
72
5
      return absl::nullopt; // invalid length
73
15
    }
74
10
    return getSamplingFlags(b3[sampled_pos]);
75
15
  }
76

            
77
  // Try individual B3 sampled header.
78
47
  auto x_b3_sampled_entry = ZipkinCoreConstants::get().X_B3_SAMPLED.get(trace_context_);
79

            
80
47
  if (x_b3_sampled_entry.has_value()) {
81
    // Checking if sampled flag has been specified. Also checking for 'true' value, as some old
82
    // zipkin tracers may still use that value, although should be 0 or 1.
83
    // This is an implicitly untrusted header, so only the first value is used.
84
7
    absl::string_view xb3_sampled = x_b3_sampled_entry.value();
85
7
    sampled = xb3_sampled == SAMPLED || xb3_sampled == "true";
86
7
    return sampled;
87
7
  }
88

            
89
  // Try W3C Trace Context format as fallback only if enabled.
90
40
  if (w3c_fallback_enabled_) {
91
3
    Extensions::Tracers::OpenTelemetry::SpanContextExtractor w3c_extractor(
92
3
        const_cast<Tracing::TraceContext&>(trace_context_));
93
3
    if (w3c_extractor.propagationHeaderPresent()) {
94
3
      auto w3c_span_context = w3c_extractor.extractSpanContext();
95
3
      if (w3c_span_context.ok()) {
96
2
        return w3c_span_context.value().sampled();
97
2
      }
98
3
    }
99
3
  }
100

            
101
38
  return absl::nullopt;
102
40
}
103

            
104
108
std::pair<SpanContext, bool> SpanContextExtractor::extractSpanContext(bool is_sampled) {
105
  // Try B3 single format first.
106
108
  if (ZipkinCoreConstants::get().B3.get(trace_context_).has_value()) {
107
39
    return extractSpanContextFromB3SingleFormat(is_sampled);
108
39
  }
109

            
110
  // Try individual B3 headers.
111
69
  auto b3_trace_id_entry = ZipkinCoreConstants::get().X_B3_TRACE_ID.get(trace_context_);
112
69
  auto b3_span_id_entry = ZipkinCoreConstants::get().X_B3_SPAN_ID.get(trace_context_);
113
69
  if (b3_span_id_entry.has_value() && b3_trace_id_entry.has_value()) {
114
12
    uint64_t trace_id(0);
115
12
    uint64_t trace_id_high(0);
116
12
    uint64_t span_id(0);
117
12
    uint64_t parent_id(0);
118

            
119
    // Extract trace id - which can either be 128 or 64 bit. For 128 bit,
120
    // it needs to be divided into two 64 bit numbers (high and low).
121
    // This is an implicitly untrusted header, so only the first value is used.
122
12
    const std::string tid(b3_trace_id_entry.value());
123
12
    if (b3_trace_id_entry.value().size() == 32) {
124
3
      const std::string high_tid = tid.substr(0, 16);
125
3
      const std::string low_tid = tid.substr(16, 16);
126
3
      if (!StringUtil::atoull(high_tid.c_str(), trace_id_high, 16) ||
127
3
          !StringUtil::atoull(low_tid.c_str(), trace_id, 16)) {
128
1
        throw ExtractorException(
129
1
            fmt::format("Invalid traceid_high {} or tracid {}", high_tid.c_str(), low_tid.c_str()));
130
1
      }
131
9
    } else if (!StringUtil::atoull(tid.c_str(), trace_id, 16)) {
132
1
      throw ExtractorException(absl::StrCat("Invalid trace_id ", tid.c_str()));
133
1
    }
134

            
135
    // This is an implicitly untrusted header, so only the first value is used.
136
10
    const std::string spid(b3_span_id_entry.value());
137
10
    if (!StringUtil::atoull(spid.c_str(), span_id, 16)) {
138
1
      throw ExtractorException(absl::StrCat("Invalid span id ", spid.c_str()));
139
1
    }
140

            
141
9
    auto b3_parent_id_entry = ZipkinCoreConstants::get().X_B3_PARENT_SPAN_ID.get(trace_context_);
142
9
    if (b3_parent_id_entry.has_value() && !b3_parent_id_entry.value().empty()) {
143
      // This is an implicitly untrusted header, so only the first value is used.
144
4
      const std::string pspid(b3_parent_id_entry.value());
145
4
      if (!StringUtil::atoull(pspid.c_str(), parent_id, 16)) {
146
1
        throw ExtractorException(absl::StrCat("Invalid parent span id ", pspid.c_str()));
147
1
      }
148
4
    }
149

            
150
8
    return {SpanContext(trace_id_high, trace_id, span_id, parent_id, is_sampled), true};
151
9
  }
152

            
153
  // Try W3C Trace Context format as fallback only if enabled.
154
57
  if (w3c_fallback_enabled_) {
155
24
    Extensions::Tracers::OpenTelemetry::SpanContextExtractor w3c_extractor(
156
24
        const_cast<Tracing::TraceContext&>(trace_context_));
157
24
    if (w3c_extractor.propagationHeaderPresent()) {
158
24
      auto w3c_span_context = w3c_extractor.extractSpanContext();
159
24
      if (w3c_span_context.ok()) {
160
2
        return convertW3CToZipkin(w3c_span_context.value(), is_sampled);
161
2
      }
162
24
    }
163
24
  }
164

            
165
55
  return {SpanContext(), false};
166
57
}
167

            
168
std::pair<SpanContext, bool>
169
39
SpanContextExtractor::extractSpanContextFromB3SingleFormat(bool is_sampled) {
170
39
  auto b3_head_entry = ZipkinCoreConstants::get().B3.get(trace_context_);
171
39
  ASSERT(b3_head_entry.has_value());
172
  // This is an implicitly untrusted header, so only the first value is used.
173
39
  const std::string b3(b3_head_entry.value());
174
39
  if (!b3.length()) {
175
1
    throw ExtractorException("Invalid input: empty");
176
1
  }
177

            
178
38
  if (b3.length() == 1) { // possibly sampling flags
179
4
    if (validSamplingFlags(b3[0])) {
180
3
      return {SpanContext(), false};
181
3
    }
182
1
    throw ExtractorException(fmt::format("Invalid input: invalid sampling flag {}", b3[0]));
183
4
  }
184

            
185
34
  if (b3.length() < 16 + 1 + 16 /* traceid64-spanid */) {
186
8
    throw ExtractorException("Invalid input: truncated");
187
26
  } else if (b3.length() > FormatMaxLength) {
188
2
    throw ExtractorException("Invalid input: too long");
189
2
  }
190

            
191
24
  uint64_t trace_id(0);
192
24
  uint64_t trace_id_high(0);
193
24
  uint64_t span_id(0);
194
24
  uint64_t parent_id(0);
195

            
196
24
  uint64_t pos = 0;
197

            
198
24
  const std::string trace_id_str = b3.substr(pos, 16);
199
24
  if (b3[pos + 32] == '-') {
200
6
    if (!StringUtil::atoull(trace_id_str.c_str(), trace_id_high, 16)) {
201
1
      throw ExtractorException(
202
1
          fmt::format("Invalid input: invalid trace id high {}", trace_id_str.c_str()));
203
1
    }
204
5
    pos += 16;
205
5
    const std::string trace_id_low_str = b3.substr(pos, 16);
206
5
    if (!StringUtil::atoull(trace_id_low_str.c_str(), trace_id, 16)) {
207
1
      throw ExtractorException(
208
1
          fmt::format("Invalid input: invalid trace id {}", trace_id_low_str.c_str()));
209
1
    }
210
18
  } else {
211
18
    if (!StringUtil::atoull(trace_id_str.c_str(), trace_id, 16)) {
212
2
      throw ExtractorException(
213
2
          fmt::format("Invalid input: invalid trace id {}", trace_id_str.c_str()));
214
2
    }
215
18
  }
216

            
217
20
  pos += 16; // traceId ended
218
20
  if (!(b3[pos++] == '-')) {
219
1
    throw ExtractorException("Invalid input: not exists span id");
220
1
  }
221

            
222
19
  const std::string span_id_str = b3.substr(pos, 16);
223
19
  if (!StringUtil::atoull(span_id_str.c_str(), span_id, 16)) {
224
1
    throw ExtractorException(fmt::format("Invalid input: invalid span id {}", span_id_str.c_str()));
225
1
  }
226
18
  pos += 16; // spanId ended
227

            
228
18
  if (b3.length() > pos) {
229
    // If we are at this point, we have more than just traceId-spanId.
230
    // If the sampling field is present, we'll have a delimiter 2 characters from now. Ex "-1"
231
    // If it is absent, but a parent ID is (which is strange), we'll have at least 17 characters.
232
    // Therefore, if we have less than two characters, the input is truncated.
233
16
    if (b3.length() == (pos + 1)) {
234
2
      throw ExtractorException("Invalid input: truncated");
235
2
    }
236

            
237
14
    if (!(b3[pos++] == '-')) {
238
1
      throw ExtractorException("Invalid input: not exists sampling field");
239
1
    }
240

            
241
    // If our position is at the end of the string, or another delimiter is one character past our
242
    // position, try to read sampled status.
243
13
    if (b3.length() == pos + 1 || ((b3.length() >= pos + 2) && (b3[pos + 1] == '-'))) {
244
11
      if (!validSamplingFlags(b3[pos])) {
245
1
        throw ExtractorException(fmt::format("Invalid input: invalid sampling flag {}", b3[pos]));
246
1
      }
247
10
      pos++; // consume the sampled status
248
10
    } else {
249
2
      throw ExtractorException("Invalid input: truncated");
250
2
    }
251

            
252
10
    if (b3.length() > pos) {
253
      // If we are at this point, we should have a parent ID, encoded as "-[0-9a-f]{16}".
254
6
      if (b3.length() != pos + 17) {
255
2
        throw ExtractorException("Invalid input: truncated");
256
2
      }
257

            
258
4
      ASSERT(b3[pos] == '-');
259
4
      pos++;
260

            
261
4
      const std::string parent_id_str = b3.substr(pos, b3.length() - pos);
262
4
      if (!StringUtil::atoull(parent_id_str.c_str(), parent_id, 16)) {
263
1
        throw ExtractorException(
264
1
            fmt::format("Invalid input: invalid parent id {}", parent_id_str.c_str()));
265
1
      }
266
4
    }
267
10
  }
268

            
269
9
  return {SpanContext(trace_id_high, trace_id, span_id, parent_id, is_sampled), true};
270
18
}
271

            
272
std::pair<SpanContext, bool> SpanContextExtractor::convertW3CToZipkin(
273
2
    const Extensions::Tracers::OpenTelemetry::SpanContext& w3c_context, bool fallback_sampled) {
274
  // Convert W3C 128-bit trace ID (32 hex chars) to Zipkin format.
275
2
  const absl::string_view trace_id_str = w3c_context.traceId();
276

            
277
2
  if (trace_id_str.length() != 32) {
278
    throw ExtractorException(fmt::format("Invalid W3C trace ID length: {}", trace_id_str.length()));
279
  }
280

            
281
  // Split 128-bit trace ID into high and low 64-bit parts for Zipkin.
282
2
  const absl::string_view trace_id_high_str = absl::string_view(trace_id_str).substr(0, 16);
283
2
  const absl::string_view trace_id_low_str = absl::string_view(trace_id_str).substr(16, 16);
284

            
285
2
  uint64_t trace_id_high(0);
286
2
  uint64_t trace_id(0);
287
2
  if (!parseHexStringView(trace_id_high_str, trace_id_high) ||
288
2
      !parseHexStringView(trace_id_low_str, trace_id)) {
289
    throw ExtractorException(fmt::format("Invalid W3C trace ID: {}", trace_id_str));
290
  }
291

            
292
  // Convert W3C span ID (16 hex chars) to Zipkin span ID.
293
2
  const absl::string_view span_id_str = w3c_context.spanId();
294
2
  if (span_id_str.length() != 16) {
295
    throw ExtractorException(fmt::format("Invalid W3C span ID length: {}", span_id_str.length()));
296
  }
297

            
298
2
  uint64_t span_id(0);
299
2
  if (!parseHexStringView(span_id_str, span_id)) {
300
    throw ExtractorException(fmt::format("Invalid W3C span ID: {}", span_id_str));
301
  }
302

            
303
  // W3C doesn't have a direct parent span concept like B3
304
  // The W3C span-id becomes our span-id, and we don't set a parent.
305
2
  uint64_t parent_id(0);
306

            
307
  // Use W3C sampling decision, or fallback if not specified.
308
2
  bool sampled = w3c_context.sampled() || fallback_sampled;
309

            
310
2
  return {SpanContext(trace_id_high, trace_id, span_id, parent_id, sampled), true};
311
2
}
312

            
313
} // namespace Zipkin
314
} // namespace Tracers
315
} // namespace Extensions
316
} // namespace Envoy