1
#pragma once
2

            
3
#include <chrono>
4
#include <memory>
5

            
6
#include "envoy/api/io_error.h"
7
#include "envoy/api/os_sys_calls_common.h"
8
#include "envoy/buffer/buffer.h"
9
#include "envoy/common/platform.h"
10
#include "envoy/common/pure.h"
11
#include "envoy/event/file_event.h"
12
#include "envoy/network/address.h"
13

            
14
#include "source/common/buffer/buffer_impl.h"
15

            
16
#include "absl/container/fixed_array.h"
17
#include "absl/types/optional.h"
18

            
19
namespace Envoy {
20
namespace Buffer {
21
struct RawSlice;
22
class Instance;
23
} // namespace Buffer
24

            
25
namespace Event {
26
class Dispatcher;
27
} // namespace Event
28

            
29
using RawSliceArrays = absl::FixedArray<absl::FixedArray<Buffer::RawSlice>>;
30

            
31
namespace Network {
32

            
33
struct Win32RedirectRecords {
34
  // The size of the buffer is selected based on:
35
  // https://docs.microsoft.com/en-us/windows-hardware/drivers/network/sio-query-wfp-connection-redirect-records
36
  uint8_t buf_[2048];
37
  unsigned long buf_size_;
38
};
39

            
40
/**
41
 * IoHandle: an abstract interface for all I/O operations
42
 */
43
class IoHandle {
44
public:
45
204952
  virtual ~IoHandle() = default;
46

            
47
  /**
48
   * NOTE: Must NOT be used for new use cases!
49
   *
50
   * This is most probably not the function you are looking for. IoHandle has wrappers for most of
51
   * the POSIX socket api functions so there should be no need to interact with the internal fd by
52
   * means of syscalls. Moreover, depending on the IoHandle implementation, the fd might not be an
53
   * underlying OS file descriptor. If any api function is missing, a wrapper for it should be added
54
   * to the IoHandle interface.
55
   *
56
   * Return data associated with IoHandle. It is not necessarily a file descriptor.
57
   */
58
  virtual os_fd_t fdDoNotUse() const PURE;
59

            
60
  /**
61
   * Clean up IoHandle resources
62
   */
63
  virtual Api::IoCallUint64Result close() PURE;
64

            
65
  /**
66
   * Return true if close() hasn't been called.
67
   */
68
  virtual bool isOpen() const PURE;
69

            
70
  /**
71
   * Return true if the socket has had connect() successfully called on it.
72
   * Use isOpen() to check if the socket is still connected or not.
73
   */
74
  virtual bool wasConnected() const PURE;
75

            
76
  /**
77
   * Read data into given slices.
78
   * @param max_length supplies the maximum length to read.
79
   * @param slices points to the output location.
80
   * @param num_slice indicates the number of slices |slices| contains.
81
   * @return a Api::IoCallUint64Result with err_ = an Api::IoError instance or
82
   * err_ = nullptr and rc_ = the bytes read for success.
83
   */
84
  virtual Api::IoCallUint64Result readv(uint64_t max_length, Buffer::RawSlice* slices,
85
                                        uint64_t num_slice) PURE;
86

            
87
  /**
88
   * Read from a io handle directly into buffer.
89
   * @param buffer supplies the buffer to read into.
90
   * @param max_length supplies the maximum length to read. A value of absl::nullopt means to read
91
   *   as much data as possible, within the constraints of available buffer size.
92
   * @return a IoCallUint64Result with err_ = nullptr and rc_ = the number of bytes
93
   * read if successful, or err_ = some IoError for failure. If call failed, rc_ shouldn't be used.
94
   */
95
  virtual Api::IoCallUint64Result read(Buffer::Instance& buffer,
96
                                       absl::optional<uint64_t> max_length) PURE;
97

            
98
  /**
99
   * Write the data in slices out.
100
   * @param slices points to the location of data to be written.
101
   * @param num_slice indicates number of slices |slices| contains.
102
   * @return a Api::IoCallUint64Result with err_ = an Api::IoError instance or
103
   * err_ = nullptr and rc_ = the bytes written for success.
104
   */
105
  virtual Api::IoCallUint64Result writev(const Buffer::RawSlice* slices, uint64_t num_slice) PURE;
106

            
107
  /**
108
   * Write the contents of the buffer out to a file descriptor. Bytes that were successfully written
109
   * are drained from the buffer.
110
   * @param buffer supplies the buffer to write from.
111
   * @return a IoCallUint64Result with err_ = nullptr and rc_ = if successful, the number of bytes
112
   * written and drained from the buffer, or err_ = some IoError for failure. If call failed, rc_
113
   * shouldn't be used.
114
   */
115
  virtual Api::IoCallUint64Result write(Buffer::Instance& buffer) PURE;
116

            
117
  /**
118
   * Send a message to the address.
119
   * @param slices points to the location of data to be sent.
120
   * @param num_slice indicates number of slices |slices| contains.
121
   * @param flags flags to pass to the underlying sendmsg function (see man 2 sendmsg).
122
   * @param self_ip is the source address whose port should be ignored. Nullptr
123
   * if caller wants kernel to select source address.
124
   * @param peer_address is the destination address.
125
   * @return a Api::IoCallUint64Result with err_ = an Api::IoError instance or
126
   * err_ = nullptr and rc_ = the bytes written for success.
127
   */
128
  virtual Api::IoCallUint64Result sendmsg(const Buffer::RawSlice* slices, uint64_t num_slice,
129
                                          int flags, const Address::Ip* self_ip,
130
                                          const Address::Instance& peer_address) PURE;
131

            
132
  struct RecvMsgPerPacketInfo {
133
    // The destination address from transport header.
134
    Address::InstanceConstSharedPtr local_address_;
135
    // The source address from transport header.
136
    Address::InstanceConstSharedPtr peer_address_;
137
    // The payload length of this packet.
138
    unsigned int msg_len_{0};
139
    // The gso_size, if specified in the transport header
140
    unsigned int gso_size_{0};
141
    // If true indicates a successful syscall, but the packet was dropped due to truncation. We do
142
    // not support receiving truncated packets.
143
    bool truncated_and_dropped_{false};
144
    // The contents of the TOS byte in the IP header.
145
    uint8_t tos_{0};
146
    // UDP control message specified by save_cmsg_config in QUIC config.
147
    Buffer::OwnedImpl saved_cmsg_;
148
  };
149

            
150
  /**
151
   * The output parameter type for recvmsg and recvmmsg.
152
   */
153
  struct RecvMsgOutput {
154
    /*
155
     * @param num_packets_per_call is the max number of packets allowed per
156
     * recvmmsg call. For recvmsg call, any value larger than 0 is allowed, but
157
     * only one packet will be returned.
158
     * @param dropped_packets points to a variable to store how many packets are
159
     * dropped so far. If nullptr, recvmsg() won't try to get this information
160
     * from transport header.
161
     */
162
    RecvMsgOutput(size_t num_packets_per_call, uint32_t* dropped_packets)
163
561320
        : dropped_packets_(dropped_packets), msg_(num_packets_per_call) {}
164

            
165
    // If not nullptr, its value is the total number of packets dropped. recvmsg() will update it
166
    // when more packets are dropped.
167
    uint32_t* dropped_packets_;
168

            
169
    // Packet headers for each received packet. It's populated according to packet receive order.
170
    // Only the first entry is used to return per packet information by recvmsg.
171
    absl::FixedArray<RecvMsgPerPacketInfo> msg_;
172
  };
173

            
174
  // Struct representation of QuicProtocolOptions::SaveCmsgConfig config proto.
175
  struct UdpSaveCmsgConfig {
176
    absl::optional<uint32_t> level;
177
    absl::optional<uint32_t> type;
178
    uint32_t expected_size = 0;
179

            
180
1376282
    bool hasConfig() const { return (level.has_value() && type.has_value()); }
181
  };
182

            
183
  /**
184
   * Receive a message into given slices, output overflow, source/destination
185
   * addresses via passed-in parameters upon success.
186
   * @param slices points to the location of receiving buffer.
187
   * @param num_slice indicates number of slices |slices| contains.
188
   * @param self_port the port this handle is assigned to. This is used to populate
189
   * local_address because local port can't be retrieved from control message.
190
   * @param save_cmsg_config config that determines whether cmsg is saved to output.
191
   * @param output modified upon each call to return fields requested in it.
192
   * @return a Api::IoCallUint64Result with err_ = an Api::IoError instance or
193
   * err_ = nullptr and rc_ = the bytes received for success.
194
   */
195
  virtual Api::IoCallUint64Result recvmsg(Buffer::RawSlice* slices, const uint64_t num_slice,
196
                                          uint32_t self_port,
197
                                          const UdpSaveCmsgConfig& save_cmsg_config,
198
                                          RecvMsgOutput& output) PURE;
199

            
200
  /**
201
   * If the platform supports, receive multiple messages into given slices, output overflow,
202
   * source/destination addresses per message via passed-in parameters upon success.
203
   * @param slices are the receive buffers for the messages. Each message
204
   * received are stored in an individual entry of |slices|.
205
   * @param self_port is the same as the one in recvmsg().
206
   * @param save_cmsg_config config that determines whether cmsg is saved to output.
207
   * @param output is modified upon each call and each message received.
208
   */
209
  virtual Api::IoCallUint64Result recvmmsg(RawSliceArrays& slices, uint32_t self_port,
210
                                           const UdpSaveCmsgConfig& save_cmsg_config,
211
                                           RecvMsgOutput& output) PURE;
212

            
213
  /**
214
   * Read data into given buffer for connected handles
215
   * @param buffer buffer to read the data into
216
   * @param length buffer length
217
   * @param flags flags to pass to the underlying recv function (see man 2 recv)
218
   */
219
  virtual Api::IoCallUint64Result recv(void* buffer, size_t length, int flags) PURE;
220

            
221
  /**
222
   * return true if the platform supports recvmmsg() and sendmmsg().
223
   */
224
  virtual bool supportsMmsg() const PURE;
225

            
226
  /**
227
   * return true if the platform supports udp_gro
228
   */
229
  virtual bool supportsUdpGro() const PURE;
230

            
231
  /**
232
   * Bind to address. The handle should have been created with a call to socket()
233
   * @param address address to bind to.
234
   * @return a Api::SysCallIntResult with rc_ = 0 for success and rc_ = -1 for failure. If the call
235
   *   is successful, errno_ shouldn't be used.
236
   */
237
  virtual Api::SysCallIntResult bind(Address::InstanceConstSharedPtr address) PURE;
238

            
239
  /**
240
   * Listen on bound handle.
241
   * @param backlog maximum number of pending connections for listener
242
   * @return a Api::SysCallIntResult with rc_ = 0 for success and rc_ = -1 for failure. If the call
243
   *   is successful, errno_ shouldn't be used.
244
   */
245
  virtual Api::SysCallIntResult listen(int backlog) PURE;
246

            
247
  /**
248
   * Accept on listening handle
249
   * @param addr remote address to be returned
250
   * @param addrlen remote address length
251
   * @return accepted IoHandlePtr
252
   */
253
  virtual std::unique_ptr<IoHandle> accept(struct sockaddr* addr, socklen_t* addrlen) PURE;
254

            
255
  /**
256
   * Connect to address. The handle should have been created with a call to socket()
257
   * on this object.
258
   * @param address remote address to connect to.
259
   * @return a Api::SysCallIntResult with rc_ = 0 for success and rc_ = -1 for failure. If the call
260
   *   is successful, errno_ shouldn't be used.
261
   */
262
  virtual Api::SysCallIntResult connect(Address::InstanceConstSharedPtr address) PURE;
263

            
264
  /**
265
   * Set option (see man 2 setsockopt)
266
   */
267
  virtual Api::SysCallIntResult setOption(int level, int optname, const void* optval,
268
                                          socklen_t optlen) PURE;
269

            
270
  /**
271
   * Get option (see man 2 getsockopt)
272
   */
273
  virtual Api::SysCallIntResult getOption(int level, int optname, void* optval,
274
                                          socklen_t* optlen) PURE;
275

            
276
  /**
277
   * @see MSDN WSAIoctl. Controls the mode of a socket.
278
   */
279
  virtual Api::SysCallIntResult ioctl(unsigned long control_code, void* in_buffer,
280
                                      unsigned long in_buffer_len, void* out_buffer,
281
                                      unsigned long out_buffer_len,
282
                                      unsigned long* bytes_returned) PURE;
283
  /**
284
   * Toggle blocking behavior
285
   * @param blocking flag to set/unset blocking state
286
   * @return a Api::SysCallIntResult with rc_ = 0 for success and rc_ = -1 for failure. If the call
287
   * is successful, errno_ shouldn't be used.
288
   */
289
  virtual Api::SysCallIntResult setBlocking(bool blocking) PURE;
290

            
291
  /**
292
   * @return the domain used by underlying socket (see man 2 socket)
293
   */
294
  virtual absl::optional<int> domain() PURE;
295

            
296
  /**
297
   * Get local address (ip:port pair)
298
   * @return local address as @ref Address::InstanceConstSharedPtr or error status.
299
   */
300
  virtual absl::StatusOr<Address::InstanceConstSharedPtr> localAddress() PURE;
301

            
302
  /**
303
   * Get peer's address (ip:port pair)
304
   * @return peer's address as @ref Address::InstanceConstSharedPtr or error status.
305
   */
306
  virtual absl::StatusOr<Address::InstanceConstSharedPtr> peerAddress() PURE;
307

            
308
  /**
309
   * Duplicates the handle. This is intended to be used only on listener sockets. (see man dup)
310
   * @return a pointer to the new handle.
311
   */
312
  virtual std::unique_ptr<IoHandle> duplicate() PURE;
313

            
314
  /**
315
   * Initializes the internal file event that will signal when the io handle is readable, writable
316
   * or closed. Each handle is allowed to have only a single file event. The internal file event is
317
   * managed by the handle and it is turned on and off when the socket would block. Calls to this
318
   * function must be paired with calls to reset the file event or close the socket.
319
   * @param dispatcher dispatcher to be used to allocate the file event.
320
   * @param cb supplies the callback to fire when the handle is ready.
321
   * @param trigger specifies whether to edge or level trigger.
322
   * @param events supplies a logical OR of @ref Event::FileReadyType events that the file event
323
   *               should initially listen on.
324
   */
325
  virtual void initializeFileEvent(Event::Dispatcher& dispatcher, Event::FileReadyCb cb,
326
                                   Event::FileTriggerType trigger, uint32_t events) PURE;
327

            
328
  /**
329
   * Activates file events for the current underlying fd.
330
   * @param events events that will be activated.
331
   */
332
  virtual void activateFileEvents(uint32_t events) PURE;
333

            
334
  /**
335
   * Enables file events for the current underlying fd.
336
   * @param events events that will be enabled.
337
   */
338
  virtual void enableFileEvents(uint32_t events) PURE;
339

            
340
  /**
341
   * Resets the file event.
342
   */
343
  virtual void resetFileEvents() PURE;
344

            
345
  /**
346
   * Shut down part of a full-duplex connection (see man 2 shutdown)
347
   */
348
  virtual Api::SysCallIntResult shutdown(int how) PURE;
349

            
350
  /**
351
   *  @return absl::optional<std::chrono::milliseconds> An optional of the most recent round-trip
352
   *  time of the connection. If the platform does not support this, then an empty optional is
353
   *  returned.
354
   */
355
  virtual absl::optional<std::chrono::milliseconds> lastRoundTripTime() PURE;
356

            
357
  /**
358
   * @return the current congestion window in bytes, or unset if not available or not
359
   * congestion-controlled.
360
   * @note some congestion controller's cwnd is measured in number of packets, in that case the
361
   * return value is cwnd(in packets) times the connection's MSS.
362
   */
363
  virtual absl::optional<uint64_t> congestionWindowInBytes() const PURE;
364

            
365
  /**
366
   * @return the interface name for the socket, if the OS supports it. Otherwise, absl::nullopt.
367
   */
368
  virtual absl::optional<std::string> interfaceName() PURE;
369
};
370

            
371
using IoHandlePtr = std::unique_ptr<IoHandle>;
372

            
373
} // namespace Network
374
} // namespace Envoy