1
#include "source/common/http/sse/sse_parser.h"
2

            
3
#include <algorithm>
4
#include <cstdint>
5

            
6
#include "absl/strings/ascii.h"
7
#include "absl/strings/string_view.h"
8

            
9
namespace Envoy {
10
namespace Http {
11
namespace Sse {
12

            
13
231
SseParser::ParsedEvent SseParser::parseEvent(absl::string_view event) {
14
  // TODO(optimization): Consider merging findEventEnd and parseEvent into a single-pass
15
  // algorithm to avoid traversing the buffer twice.
16
231
  ParsedEvent parsed_event;
17
231
  absl::string_view remaining = event;
18

            
19
498
  while (!remaining.empty()) {
20
267
    auto [line_end, next_line] = findLineEnd(remaining, true);
21
267
    absl::string_view line = remaining.substr(0, line_end);
22
267
    remaining = remaining.substr(next_line);
23

            
24
267
    auto [field_name, field_value] = parseFieldLine(line);
25
267
    if (field_name == "data") {
26
225
      if (!parsed_event.data.has_value()) {
27
        // Optimization: Reserve memory to avoid allocations during append.
28
        // The total data cannot be larger than the input event string.
29
218
        parsed_event.data = std::string();
30
218
        parsed_event.data->reserve(event.size());
31
218
      } else {
32
        // Per SSE spec, multiple data fields are concatenated with newlines.
33
7
        parsed_event.data->append("\n");
34
7
      }
35
225
      parsed_event.data->append(field_value.data(), field_value.size());
36
225
    } else if (field_name == "id") {
37
      // Per SSE spec, if the field value contains U+0000 NULL, the field is ignored.
38
      // Otherwise, set the last event ID to the field value. If multiple id fields exist,
39
      // the last one wins.
40
12
      if (field_value.find('\0') == absl::string_view::npos) {
41
11
        parsed_event.id = std::string(field_value);
42
11
      }
43
30
    } else if (field_name == "event") {
44
      // Per SSE spec, the event field sets the event type. If multiple event fields exist,
45
      // the last one wins.
46
13
      parsed_event.event_type = std::string(field_value);
47
19
    } else if (field_name == "retry") {
48
      // Per SSE spec, the retry field must consist of only ASCII digits.
49
      // If it contains any other character, the field is ignored.
50
8
      if (!field_value.empty()) {
51
7
        uint64_t value = 0;
52
7
        bool valid = true;
53
38
        for (char c : field_value) {
54
38
          if (!absl::ascii_isdigit(c)) {
55
2
            valid = false;
56
2
            break;
57
2
          }
58
36
          uint64_t new_value = value * 10 + static_cast<uint64_t>(c - '0');
59
36
          if (new_value < value) {
60
1
            valid = false;
61
1
            break;
62
1
          }
63
35
          value = new_value;
64
35
        }
65
7
        if (valid) {
66
4
          parsed_event.retry = value;
67
4
        }
68
7
      }
69
8
    }
70
267
  }
71

            
72
231
  return parsed_event;
73
231
}
74

            
75
516
SseParser::FindEventEndResult SseParser::findEventEnd(absl::string_view buffer, bool end_stream) {
76
516
  size_t consumed = 0;
77
516
  size_t event_start = 0;
78
516
  absl::string_view remaining = buffer;
79

            
80
  // Per SSE spec: Strip UTF-8 BOM (0xEF 0xBB 0xBF) if present at stream start.
81
516
  if (consumed == 0 && remaining.size() >= 3 && static_cast<uint8_t>(remaining[0]) == 0xEF &&
82
516
      static_cast<uint8_t>(remaining[1]) == 0xBB && static_cast<uint8_t>(remaining[2]) == 0xBF) {
83
1
    remaining = remaining.substr(3);
84
1
    consumed = 3;
85
1
    event_start = 3; // Event content starts after BOM
86
1
  }
87

            
88
732
  while (!remaining.empty()) {
89
725
    auto [line_end, next_line] = findLineEnd(remaining, end_stream);
90

            
91
725
    if (line_end == absl::string_view::npos) {
92
302
      return {absl::string_view::npos, absl::string_view::npos, absl::string_view::npos};
93
302
    }
94

            
95
423
    if (line_end == 0) {
96
      // Found blank line so this is the end of event
97
207
      return {event_start, consumed, consumed + next_line};
98
207
    }
99

            
100
216
    consumed += next_line;
101
216
    remaining = remaining.substr(next_line);
102
216
  }
103

            
104
  // Per SSE spec: Once the end of the file is reached, any pending data must be discarded.
105
  // (i.e., incomplete events without a closing blank line are dropped)
106
7
  return {absl::string_view::npos, absl::string_view::npos, absl::string_view::npos};
107
516
}
108

            
109
267
std::pair<absl::string_view, absl::string_view> SseParser::parseFieldLine(absl::string_view line) {
110
267
  if (line.empty()) {
111
2
    return {"", ""};
112
2
  }
113

            
114
  // Per SSE spec, lines starting with ':' are comments and should be ignored.
115
265
  if (line[0] == ':') {
116
5
    return {"", ""};
117
5
  }
118

            
119
260
  const auto colon_pos = line.find(':');
120
260
  if (colon_pos == absl::string_view::npos) {
121
2
    return {line, ""};
122
2
  }
123

            
124
258
  absl::string_view field_name = line.substr(0, colon_pos);
125
258
  absl::string_view field_value = line.substr(colon_pos + 1);
126

            
127
  // Per SSE spec, remove leading space from value if present.
128
258
  if (!field_value.empty() && field_value[0] == ' ') {
129
252
    field_value = field_value.substr(1);
130
252
  }
131

            
132
258
  return {field_name, field_value};
133
260
}
134

            
135
992
std::pair<size_t, size_t> SseParser::findLineEnd(absl::string_view str, bool end_stream) {
136
992
  const auto pos = str.find_first_of("\r\n");
137

            
138
  // Case 1: No delimiter found
139
992
  if (pos == absl::string_view::npos) {
140
303
    if (end_stream) {
141
3
      return {str.size(), str.size()};
142
3
    }
143
300
    return {absl::string_view::npos, absl::string_view::npos};
144
303
  }
145

            
146
  // Case 2: LF (\n)
147
689
  if (str[pos] == '\n') {
148
664
    return {pos, pos + 1};
149
664
  }
150

            
151
  // Case 3: CR (\r) or CRLF (\r\n), handle per SSE spec
152
25
  if (pos + 1 < str.size()) {
153
19
    if (str[pos + 1] == '\n') {
154
13
      return {pos, pos + 2};
155
13
    }
156
6
    return {pos, pos + 1};
157
19
  }
158

            
159
  // Case 4: Split CRLF edge case
160
  // If '\r' is at the end and more data may come, wait to see if it's CRLF.
161
6
  if (end_stream) {
162
4
    return {pos, pos + 1};
163
4
  }
164
2
  return {absl::string_view::npos, absl::string_view::npos};
165
6
}
166

            
167
} // namespace Sse
168
} // namespace Http
169
} // namespace Envoy