1
#pragma once
2

            
3
#include <cstdint>
4
#include <list>
5
#include <string>
6

            
7
#include "envoy/common/platform.h"
8
#include "envoy/config/core/v3/address.pb.h"
9
#include "envoy/network/connection.h"
10
#include "envoy/network/listener.h"
11

            
12
#include "source/common/api/os_sys_calls_impl.h"
13
#include "source/common/common/cleanup.h"
14
#include "source/common/common/statusor.h"
15

            
16
#if defined(__linux__)
17
#include "source/common/api/os_sys_calls_impl_linux.h"
18
#endif
19

            
20
#include "absl/strings/string_view.h"
21

            
22
namespace Envoy {
23
namespace Network {
24

            
25
/**
26
 * A callback interface used by readFromSocket() to pass packets read from
27
 * socket.
28
 */
29
class UdpPacketProcessor {
30
public:
31
5754
  virtual ~UdpPacketProcessor() = default;
32

            
33
  /**
34
   * Consume the packet read out of the socket with the information from UDP
35
   * header.
36
   * @param local_address is the destination address in the UDP header.
37
   * @param peer_address is the source address in the UDP header.
38
   * @param buffer contains the packet read.
39
   * @param receive_time is the time when the packet is read.
40
   */
41
  virtual void processPacket(Address::InstanceConstSharedPtr local_address,
42
                             Address::InstanceConstSharedPtr peer_address,
43
                             Buffer::InstancePtr buffer, MonotonicTime receive_time, uint8_t tos,
44
                             Buffer::OwnedImpl saved_cmsg) PURE;
45

            
46
  /**
47
   * Called whenever datagrams are dropped due to overflow or truncation.
48
   * @param dropped supplies the number of dropped datagrams.
49
   */
50
  virtual void onDatagramsDropped(uint32_t dropped) PURE;
51

            
52
  /**
53
   * The expected max size of the datagram to be read. If it's smaller than
54
   * the size of datagrams received, they will be dropped.
55
   */
56
  virtual uint64_t maxDatagramSize() const PURE;
57

            
58
  /**
59
   * An estimated number of packets to read in each read event.
60
   */
61
  virtual size_t numPacketsExpectedPerEventLoop() const PURE;
62

            
63
  virtual const IoHandle::UdpSaveCmsgConfig& saveCmsgConfig() const PURE;
64
};
65

            
66
static const uint64_t DEFAULT_UDP_MAX_DATAGRAM_SIZE = 1500;
67
static const uint64_t NUM_DATAGRAMS_PER_RECEIVE = 16;
68
static const uint64_t MAX_NUM_PACKETS_PER_EVENT_LOOP = 6000;
69

            
70
/**
71
 * Wrapper which resolves UDP socket proto config with defaults.
72
 */
73
struct ResolvedUdpSocketConfig {
74
  ResolvedUdpSocketConfig(const envoy::config::core::v3::UdpSocketConfig& config,
75
                          bool prefer_gro_default);
76

            
77
  uint64_t max_rx_datagram_size_;
78
  bool prefer_gro_;
79
};
80

            
81
// The different options for receiving UDP packet(s) from system calls.
82
enum class UdpRecvMsgMethod {
83
  // The `recvmsg` system call.
84
  RecvMsg,
85
  // The `recvmsg` system call using GRO (generic receive offload). This is the preferred method,
86
  // if the platform supports it.
87
  RecvMsgWithGro,
88
  // The `recvmmsg` system call.
89
  RecvMmsg,
90
};
91

            
92
/**
93
 * Common network utility routines.
94
 */
95
class Utility {
96
public:
97
  static constexpr absl::string_view TCP_SCHEME{"tcp://"};
98
  static constexpr absl::string_view UDP_SCHEME{"udp://"};
99
  static constexpr absl::string_view UNIX_SCHEME{"unix://"};
100

            
101
  /**
102
   * Make a URL from a datagram Address::Instance; will be udp:// prefix for
103
   * an IP address, and unix:// prefix otherwise. Giving a tcp address to this
104
   * function will result in incorrect behavior (addresses don't know if they
105
   * are datagram or stream).
106
   * @param addr supplies the address to convert to string.
107
   * @return The appropriate url string compatible with resolveUrl.
108
   */
109
  static std::string urlFromDatagramAddress(const Address::Instance& addr);
110

            
111
  /**
112
   * Resolve a URL.
113
   * @param url supplies the url to resolve.
114
   * @return Address::InstanceConstSharedPtr the resolved address or an error status
115
   */
116
  static absl::StatusOr<Address::InstanceConstSharedPtr> resolveUrl(const std::string& url);
117

            
118
  /**
119
   * Determine the socket type for a URL.
120
   *
121
   * @param url supplies the url to resolve.
122
   * @return StatusOr<Socket::Type> of the socket type, or an error status if url is invalid.
123
   */
124
  static StatusOr<Socket::Type> socketTypeFromUrl(const std::string& url);
125

            
126
  /**
127
   * Match a URL to the TCP scheme
128
   * @param url supplies the URL to match.
129
   * @return bool true if the URL matches the TCP scheme, false otherwise.
130
   */
131
  static bool urlIsTcpScheme(absl::string_view url);
132

            
133
  /**
134
   * Match a URL to the UDP scheme
135
   * @param url supplies the URL to match.
136
   * @return bool true if the URL matches the UDP scheme, false otherwise.
137
   */
138
  static bool urlIsUdpScheme(absl::string_view url);
139

            
140
  /**
141
   * Match a URL to the Unix scheme
142
   * @param url supplies the Unix to match.
143
   * @return bool true if the URL matches the Unix scheme, false otherwise.
144
   */
145
  static bool urlIsUnixScheme(absl::string_view url);
146

            
147
  /**
148
   * Parse an internet host address (IPv4 or IPv6) and create an Instance from it. The address must
149
   * not include a port number.
150
   * @param ip_address string to be parsed as an internet address.
151
   * @param port optional port to include in Instance created from ip_address, 0 by default.
152
   * @param v6only disable IPv4-IPv6 mapping for IPv6 addresses?
153
   * @param network_namespace network namespace containing the address.
154
   * @return pointer to the Instance, or nullptr if unable to parse the address.
155
   */
156
  static Address::InstanceConstSharedPtr
157
  parseInternetAddressNoThrow(const std::string& ip_address, uint16_t port = 0, bool v6only = true,
158
                              absl::optional<std::string> network_namespace = absl::nullopt);
159

            
160
  /**
161
   * Parse an internet host address (IPv4 or IPv6) AND port, and create an Instance from it. Throws
162
   * EnvoyException if unable to parse the address. This is needed when a shared pointer is needed
163
   * but only a raw instance is available.
164
   * @param Address::Ip& to be copied to the new instance.
165
   * @return pointer to the Instance.
166
   */
167
  static Address::InstanceConstSharedPtr copyInternetAddressAndPort(const Address::Ip& ip);
168

            
169
  /**
170
   * Create a new Instance from an internet host address (IPv4 or IPv6) and port.
171
   * @param ip_addr string to be parsed as an internet address and port. Examples:
172
   *        - "1.2.3.4:80"
173
   *        - "[1234:5678::9]:443"
174
   * @param v6only disable IPv4-IPv6 mapping for IPv6 addresses?
175
   * @param network_namespace network namespace containing the address.
176
   * @return pointer to the Instance, or a nullptr in case of a malformed IP address.
177
   */
178
  static Address::InstanceConstSharedPtr
179
  parseInternetAddressAndPortNoThrow(const std::string& ip_address, bool v6only = true,
180
                                     absl::optional<std::string> network_namespace = absl::nullopt);
181

            
182
  /**
183
   * Get the local address of the first interface address that is of type
184
   * version and is not a loopback address. If no matches are found, return the
185
   * loopback address of type version.
186
   * @param the local address IP version.
187
   * @return the local IP address of the server
188
   */
189
  static Address::InstanceConstSharedPtr getLocalAddress(const Address::IpVersion version);
190

            
191
  /**
192
   * Determine whether this is a local connection.
193
   * @return bool the address is a local connection.
194
   */
195
  static bool isSameIpOrLoopback(const ConnectionInfoProvider& socket);
196

            
197
  /**
198
   * Determine whether this is an internal (RFC1918) address.
199
   * @return bool the address is an RFC1918 address.
200
   */
201
  static bool isInternalAddress(const Address::Instance& address);
202

            
203
  /**
204
   * Check if address is loopback address.
205
   * @param address IP address to check.
206
   * @return true if so, otherwise false
207
   */
208
  static bool isLoopbackAddress(const Address::Instance& address);
209

            
210
  /**
211
   * @return Address::InstanceConstSharedPtr an address that represents the canonical IPv4 loopback
212
   *         address (i.e. "127.0.0.1"). Note that the range "127.0.0.0/8" is all defined as the
213
   *         loopback range, but the address typically used (e.g. in tests) is "127.0.0.1".
214
   */
215
  static Address::InstanceConstSharedPtr getCanonicalIpv4LoopbackAddress();
216

            
217
  /**
218
   * @return Address::InstanceConstSharedPtr an address that represents the IPv6 loopback address
219
   *         (i.e. "::1").
220
   */
221
  static Address::InstanceConstSharedPtr getIpv6LoopbackAddress();
222

            
223
  /**
224
   * @return Address::InstanceConstSharedPtr an address that represents the IPv4 wildcard address
225
   *         (i.e. "0.0.0.0"). Used during binding to indicate that incoming connections to any
226
   *         local IPv4 address are to be accepted.
227
   */
228
  static Address::InstanceConstSharedPtr getIpv4AnyAddress();
229

            
230
  /**
231
   * @return Address::InstanceConstSharedPtr an address that represents the IPv6 wildcard address
232
   *         (i.e. "::"). Used during binding to indicate that incoming connections to any local
233
   *         IPv6 address are to be accepted.
234
   */
235
  static Address::InstanceConstSharedPtr getIpv6AnyAddress();
236

            
237
  /**
238
   * @return the IPv4 CIDR catch-all address (0.0.0.0/0).
239
   */
240
  static const std::string& getIpv4CidrCatchAllAddress();
241

            
242
  /**
243
   * @return the IPv6 CIDR catch-all address (::/0).
244
   */
245
  static const std::string& getIpv6CidrCatchAllAddress();
246

            
247
  /**
248
   * @param address IP address instance.
249
   * @param port to update.
250
   * @return Address::InstanceConstSharedPtr a new address instance with updated port.
251
   */
252
  static Address::InstanceConstSharedPtr getAddressWithPort(const Address::Instance& address,
253
                                                            uint32_t port);
254

            
255
  /**
256
   * Retrieve the original destination address from an accepted socket.
257
   * The address (IP and port) may be not local and the port may differ from
258
   * the listener port if the packets were redirected using iptables
259
   * @param sock is accepted socket
260
   * @return the original destination or nullptr if not available.
261
   */
262
  static Address::InstanceConstSharedPtr getOriginalDst(Socket& sock);
263

            
264
  /**
265
   * Converts IPv6 absl::uint128 in network byte order to host byte order.
266
   * @param address supplies the IPv6 address in network byte order.
267
   * @return the absl::uint128 IPv6 address in host byte order.
268
   */
269
  static absl::uint128 Ip6ntohl(const absl::uint128& address);
270

            
271
  /**
272
   * Converts IPv6 absl::uint128 in host byte order to network byte order.
273
   * @param address supplies the IPv6 address in host byte order.
274
   * @return the absl::uint128 IPv6 address in network byte order.
275
   */
276
  static absl::uint128 Ip6htonl(const absl::uint128& address);
277

            
278
  /**
279
   * @param proto_address supplies the proto address to convert
280
   * @return the InstanceConstSharedPtr for the address, or null if proto_address is invalid.
281
   */
282
  static Address::InstanceConstSharedPtr
283
  protobufAddressToAddressNoThrow(const envoy::config::core::v3::Address& proto_address);
284

            
285
  /**
286
   * Copies the address instance into the protobuf representation of an address.
287
   * @param address is the address to be copied into the protobuf representation of this address.
288
   * @param proto_address is the protobuf address to which the address instance is copied into.
289
   */
290
  static void addressToProtobufAddress(const Address::Instance& address,
291
                                       envoy::config::core::v3::Address& proto_address);
292

            
293
  /**
294
   * Returns socket type corresponding to SocketAddress.protocol value of the
295
   * given address, or SocketType::Stream if the address is a pipe address.
296
   * @param proto_address the address protobuf
297
   * @return socket type
298
   */
299
  static Socket::Type
300
  protobufAddressSocketType(const envoy::config::core::v3::Address& proto_address);
301

            
302
  /**
303
   * Send a packet via given UDP socket with specific source address.
304
   * @param handle is the UDP socket used to send.
305
   * @param slices points to the buffers containing the packet.
306
   * @param num_slices is the number of buffers.
307
   * @param local_ip is the source address to be used to send.
308
   * @param peer_address is the destination address to send to.
309
   */
310
  static Api::IoCallUint64Result writeToSocket(IoHandle& handle, Buffer::RawSlice* slices,
311
                                               uint64_t num_slices, const Address::Ip* local_ip,
312
                                               const Address::Instance& peer_address);
313
  static Api::IoCallUint64Result writeToSocket(IoHandle& handle, const Buffer::Instance& buffer,
314
                                               const Address::Ip* local_ip,
315
                                               const Address::Instance& peer_address);
316

            
317
  /**
318
   * Read a packet from a given UDP socket and pass the packet to given UdpPacketProcessor.
319
   * @param handle is the UDP socket to read from.
320
   * @param local_address is the socket's local address used to populate port.
321
   * @param udp_packet_processor is the callback to receive the packet.
322
   * @param receive_time is the timestamp passed to udp_packet_processor for the
323
   * receive time of the packet.
324
   * @param recv_msg_method the type of system call and socket options combination to use when
325
   * receiving packets from the kernel.
326
   * @param packets_dropped is the output parameter for number of packets dropped in kernel. If the
327
   * caller is not interested in it, nullptr can be passed in.
328
   * @param num_packets_read is the output parameter for the number of packets passed to the
329
   * udp_packet_processor in this call. If the caller is not interested in it, nullptr can be passed
330
   * in.
331
   */
332
  static Api::IoCallUint64Result
333
  readFromSocket(IoHandle& handle, const Address::Instance& local_address,
334
                 UdpPacketProcessor& udp_packet_processor, TimeSource& time_source,
335
                 UdpRecvMsgMethod recv_msg_method, uint32_t* packets_dropped,
336
                 uint32_t* num_packets_read);
337

            
338
  /**
339
   * Read some packets from a given UDP socket and pass the packet to a given
340
   * UdpPacketProcessor. Read no more than MAX_NUM_PACKETS_PER_EVENT_LOOP packets.
341
   * @param handle is the UDP socket to read from.
342
   * @param local_address is the socket's local address used to populate port.
343
   * @param udp_packet_processor is the callback to receive the packets.
344
   * @param time_source is the time source used to generate the time stamp of the received packets.
345
   * @param allow_gro whether to use GRO, iff the platform supports it. This function will check
346
   * the IoHandle to ensure the platform supports GRO before using it.
347
   * @param allow_mmsg whether to use recvmmsg, iff the platform supports it. This function will
348
   * check the IoHandle to ensure the platform supports recvmmsg before using it. If `allow_gro` is
349
   * true and the platform supports GRO, then it will take precedence over using recvmmsg.
350
   * @param packets_dropped is the output parameter for number of packets dropped in kernel.
351
   * Return the io error encountered or nullptr if no io error but read stopped
352
   * because of MAX_NUM_PACKETS_PER_EVENT_LOOP.
353
   *
354
   * TODO(mattklein123): Allow the number of packets read to be limited for fairness. Currently
355
   *                     this function will always return an error, even if EAGAIN. In the future
356
   *                     we can return no error if we limited the number of packets read and have
357
   *                     to fake another read event.
358
   * TODO(mattklein123): Can we potentially share this with the TCP stack somehow? Similar code
359
   *                     exists there.
360
   */
361
  static Api::IoErrorPtr readPacketsFromSocket(IoHandle& handle,
362
                                               const Address::Instance& local_address,
363
                                               UdpPacketProcessor& udp_packet_processor,
364
                                               TimeSource& time_source, bool allow_gro,
365
                                               bool allow_mmsg, uint32_t& packets_dropped);
366

            
367
#if defined(__linux__)
368
  /**
369
   * Changes the calling thread's network namespace to the one referenced by the file at `netns`,
370
   * calls the function `f`, and returns its result after switching back to the original network
371
   * namespace.
372
   *
373
   * @param f the function to execute in the specified network namespace.
374
   * @param netns filepath referencing the network namespace in which `f` is executed.
375
   * @return the result of 'f' wrapped in absl::StatusOr to any indicate syscall failures.
376
   */
377
  template <typename Func>
378
  static auto execInNetworkNamespace(Func&& f, const char* netns)
379
3
      -> absl::StatusOr<typename std::invoke_result_t<Func>> {
380
3
    Api::OsSysCalls& posix = Api::OsSysCallsSingleton().get();
381

            
382
    // Open the original netns fd, so that we can return to it.
383
3
    constexpr auto curr_netns_file = "/proc/self/ns/net";
384
3
    auto og_netns_fd_result = posix.open(curr_netns_file, O_RDONLY);
385
3
    int og_netns_fd = og_netns_fd_result.return_value_;
386
3
    if (og_netns_fd_result.errno_ != 0) {
387
1
      return absl::InternalError(fmt::format("failed to open netns file {}: {}", curr_netns_file,
388
1
                                             errorDetails(og_netns_fd_result.errno_)));
389
1
    }
390
2
    Cleanup cleanup_og_fd([&og_netns_fd, &posix]() { posix.close(og_netns_fd); });
391

            
392
    // Open the fd for the network namespace we want the socket in.
393
2
    auto netns_fd_result = posix.open(netns, O_RDONLY);
394
2
    const int netns_fd = netns_fd_result.return_value_;
395
2
    if (netns_fd <= 0) {
396
      return absl::InternalError(fmt::format("failed to open netns file {}: {}", netns,
397
                                             errorDetails(netns_fd_result.errno_)));
398
    }
399
2
    Cleanup cleanup_netns_fd([&posix, &netns_fd]() { posix.close(netns_fd); });
400

            
401
    // Change the network namespace of this thread.
402
2
    auto setns_result = Api::LinuxOsSysCallsSingleton().get().setns(netns_fd, CLONE_NEWNET);
403
2
    if (setns_result.return_value_ != 0) {
404
      return absl::InternalError(fmt::format("failed to set netns to {} (fd={}): {}", netns,
405
                                             netns_fd, errorDetails(errno)));
406
    }
407

            
408
    // Calling function from the specified network namespace.
409
2
    auto result = std::forward<Func>(f)();
410

            
411
    // Restore the original network namespace before returning the function result.
412
2
    setns_result = Api::LinuxOsSysCallsSingleton().get().setns(og_netns_fd, CLONE_NEWNET);
413

            
414
    // If we cannot jump back into the original network namespace, this is an unrecoverable error.
415
    // It would leave the current thread in another network namespace indefinitely, so we cannot
416
    // continue running in that state.
417
2
    RELEASE_ASSERT(
418
2
        setns_result.return_value_ == 0,
419
2
        fmt::format("failed to restore original netns (fd={}): {}", netns_fd, errorDetails(errno)));
420

            
421
2
    return result;
422
2
  }
423
#endif
424

            
425
private:
426
  /**
427
   * Takes a number and flips the order in byte chunks. The last byte of the input will be the
428
   * first byte in the output. The second to last byte will be the second to first byte in the
429
   * output. Etc..
430
   * @param input supplies the input to have the bytes flipped.
431
   * @return the absl::uint128 of the input having the bytes flipped.
432
   */
433
  static absl::uint128 flipOrder(const absl::uint128& input);
434
};
435

            
436
/**
437
 * Log formatter for an address.
438
 */
439
struct AddressStrFormatter {
440
133
  void operator()(std::string* out, const Network::Address::InstanceConstSharedPtr& instance) {
441
133
    out->append(instance->asString());
442
133
  }
443
};
444

            
445
} // namespace Network
446
} // namespace Envoy