1
#pragma once
2

            
3
#include <array>
4
#include <cstddef>
5
#include <cstdint>
6
#include <string>
7
#include <vector>
8

            
9
#include "absl/status/statusor.h"
10
#include "absl/strings/string_view.h"
11
#include "absl/types/optional.h"
12
#include "absl/types/variant.h"
13

            
14
namespace Envoy {
15
namespace Extensions {
16
namespace Common {
17
namespace Aws {
18
namespace Eventstream {
19

            
20
// AWS Eventstream protocol constants.
21
// Reference: https://smithy.io/2.0/aws/amazon-eventstream.html
22

            
23
// Message structure sizes.
24
constexpr uint32_t PRELUDE_SIZE = 12; // total_length(4) + headers_length(4) + prelude_crc(4)
25
constexpr uint32_t TRAILER_SIZE = 4;  // message_crc(4)
26
constexpr uint32_t MIN_MESSAGE_SIZE = PRELUDE_SIZE + TRAILER_SIZE; // 16 bytes minimum
27
constexpr uint32_t MAX_PAYLOAD_SIZE = 24 * 1024 * 1024;            // 24 MB
28
constexpr uint32_t MAX_HEADERS_SIZE = 128 * 1024;                  // 128 KB
29
// Upper bound on the total_length wire field to prevent unbounded buffering. Even with a valid
30
// prelude CRC (which is not a MAC), an attacker could craft a message with an absurd total_length.
31
constexpr uint32_t MAX_TOTAL_LENGTH =
32
    PRELUDE_SIZE + MAX_HEADERS_SIZE + MAX_PAYLOAD_SIZE + TRAILER_SIZE;
33
constexpr uint16_t MAX_HEADER_STRING_LENGTH = 32767; // 2^15 - 1
34

            
35
// Prelude field offsets.
36
constexpr size_t TOTAL_LENGTH_OFFSET = 0;
37
constexpr size_t HEADERS_LENGTH_OFFSET = 4;
38
constexpr size_t PRELUDE_CRC_OFFSET = 8;
39

            
40
// Header field sizes.
41
constexpr size_t NAME_LENGTH_SIZE = 1;   // 1 byte for header name length
42
constexpr size_t TYPE_SIZE = 1;          // 1 byte for header value type
43
constexpr size_t STRING_LENGTH_SIZE = 2; // 2 bytes for string/bytearray length prefix
44

            
45
// Value type sizes.
46
constexpr size_t BYTE_VALUE_SIZE = 1;
47
constexpr size_t SHORT_VALUE_SIZE = 2;
48
constexpr size_t INT32_VALUE_SIZE = 4;
49
constexpr size_t INT64_VALUE_SIZE = 8;
50
constexpr size_t UUID_VALUE_SIZE = 16;
51

            
52
/**
53
 * Header value types as defined by the AWS eventstream specification.
54
 */
55
enum class HeaderValueType : uint8_t {
56
  BoolTrue = 0,
57
  BoolFalse = 1,
58
  Byte = 2,
59
  Short = 3,
60
  Int32 = 4,
61
  Int64 = 5,
62
  ByteArray = 6,
63
  String = 7,
64
  Timestamp = 8,
65
  Uuid = 9,
66
};
67

            
68
/**
69
 * Represents a parsed header value.
70
 * Uses absl::variant to hold the appropriate type based on HeaderValueType.
71
 */
72
struct HeaderValue {
73
  HeaderValueType type;
74
  absl::variant<bool,                   // BoolTrue, BoolFalse
75
                int8_t,                 // Byte
76
                int16_t,                // Short
77
                int32_t,                // Int32
78
                int64_t,                // Int64, Timestamp
79
                std::string,            // ByteArray, String
80
                std::array<uint8_t, 16> // Uuid
81
                >
82
      value;
83
};
84

            
85
/**
86
 * Represents a single header (name-value pair).
87
 */
88
struct Header {
89
  std::string name;
90
  HeaderValue value;
91
};
92

            
93
/**
94
 * Represents a fully parsed eventstream message.
95
 */
96
struct ParsedMessage {
97
  std::vector<Header> headers;
98
  std::string payload_bytes; // Arbitrary bytes; not necessarily UTF-8.
99
};
100

            
101
/**
102
 * Result of attempting to parse a message from a buffer.
103
 * Single-pass design: returns either a parsed message, indication of incomplete data, or error.
104
 */
105
struct ParseResult {
106
  // The parsed message, if a complete message was found.
107
  // nullopt if the buffer doesn't contain a complete message yet.
108
  absl::optional<ParsedMessage> message;
109

            
110
  // Number of bytes consumed from the buffer.
111
  // 0 if incomplete (need more data).
112
  // > 0 if a message was parsed (remove this many bytes from buffer).
113
  size_t bytes_consumed;
114
};
115

            
116
/**
117
 * Parser for AWS Eventstream binary protocol.
118
 * Implements the specification: https://smithy.io/2.0/aws/amazon-eventstream.html
119
 *
120
 * Example usage:
121
 *   absl::string_view remaining = buffer;
122
 *
123
 *   while (true) {
124
 *     auto result = EventstreamParser::parseMessage(remaining);
125
 *     if (!result.ok()) {
126
 *       // Handle error (corrupt data)
127
 *       break;
128
 *     }
129
 *     if (!result->message.has_value()) {
130
 *       // Incomplete - wait for more data
131
 *       break;
132
 *     }
133
 *     // Process result->message->headers and result->message->payload_bytes
134
 *     remaining.remove_prefix(result->bytes_consumed);
135
 *   }
136
 */
137
class EventstreamParser {
138
public:
139
  /**
140
   * Attempts to parse an eventstream message from the buffer.
141
   * Single-pass design: checks for completeness and parses in one call.
142
   * Validates both prelude CRC and message CRC.
143
   *
144
   * @param buffer contiguous bytes containing incoming data (may be incomplete).
145
   *               Callers should ensure the buffer is linearized before passing.
146
   * @return ParseResult with message if complete, nullopt if incomplete, or error status.
147
   */
148
  static absl::StatusOr<ParseResult> parseMessage(absl::string_view buffer);
149

            
150
private:
151
  /**
152
   * Parses the headers section of an eventstream message.
153
   *
154
   * @param headers_bytes the headers section bytes.
155
   * @return vector of Header on success, or error status on failure.
156
   */
157
  static absl::StatusOr<std::vector<Header>> parseHeaders(absl::string_view headers_bytes);
158

            
159
  /**
160
   * Computes CRC32 checksum using the same algorithm as AWS eventstream.
161
   * Uses zlib's crc32() function.
162
   *
163
   * @param data the data to compute checksum for.
164
   * @param initial_crc the initial CRC value (0 for first computation).
165
   * @return the computed CRC32 value.
166
   */
167
  static uint32_t computeCrc32(absl::string_view data, uint32_t initial_crc = 0);
168
};
169

            
170
} // namespace Eventstream
171
} // namespace Aws
172
} // namespace Common
173
} // namespace Extensions
174
} // namespace Envoy