1
#pragma once
2

            
3
#include <fcntl.h>
4
#include <sys/stat.h>
5
#include <sys/un.h>
6
#include <unistd.h>
7

            
8
#include <array>
9
#include <atomic>
10
#include <cstdint>
11
#include <string>
12

            
13
#include "envoy/common/platform.h"
14
#include "envoy/server/hot_restart.h"
15
#include "envoy/server/options.h"
16
#include "envoy/stats/scope.h"
17

            
18
#include "source/common/common/assert.h"
19
#include "source/server/hot_restart.pb.h"
20

            
21
namespace Envoy {
22
namespace Server {
23

            
24
class RpcStream : public Logger::Loggable<Logger::Id::main> {
25
public:
26
  enum class Blocking { Yes, No };
27

            
28
346
  explicit RpcStream(uint64_t base_id) : base_id_(base_id) {}
29
  ~RpcStream();
30
  void initDomainSocketAddress(sockaddr_un* address);
31
  sockaddr_un createDomainSocketAddress(uint64_t id, const std::string& role,
32
                                        const std::string& socket_path, mode_t socket_mode);
33
  void bindDomainSocket(uint64_t id, const std::string& role, const std::string& socket_path,
34
                        mode_t socket_mode);
35

            
36
  // Protocol description:
37
  //
38
  // In each direction between parent<-->child, a series of pairs of:
39
  //   A uint64 'length' (bytes in network order),
40
  //   followed by 'length' bytes of a serialized HotRestartMessage.
41
  // Each new message must start in a new sendmsg datagram, i.e. 'length' must always start at
42
  // byte 0. Each sendmsg datagram can be up to 4096 bytes (including 'length' if present). When
43
  // the serialized protobuf is longer than 4096-8 bytes, and so cannot fit in just one datagram,
44
  // it is delivered by a series of datagrams. In each of these continuation datagrams, the
45
  // protobuf data starts at byte 0.
46
  //
47
  // There is no mechanism to explicitly pair responses to requests. However, the child initiates
48
  // all exchanges, and blocks until a reply is received, so there is implicit pairing.
49
  bool sendHotRestartMessage(sockaddr_un& address, const envoy::HotRestartMessage& proto,
50
                             bool allow_failure = false);
51

            
52
  // Receive data, possibly enough to build one of our protocol messages.
53
  // If block is true, blocks until a full protocol message is available.
54
  // If block is false, returns nullptr if we run out of data to receive before a full protocol
55
  // message is available. In either case, the HotRestartingBase may end up buffering some data
56
  // for the next protocol message, even if the function returns a protobuf.
57
  std::unique_ptr<envoy::HotRestartMessage> receiveHotRestartMessage(Blocking block);
58
  bool replyIsExpectedType(const envoy::HotRestartMessage* proto,
59
                           envoy::HotRestartMessage::Reply::ReplyCase oneof_type) const;
60

            
61
  int domain_socket_{-1};
62

            
63
private:
64
  void getPassedFdIfPresent(envoy::HotRestartMessage* out, msghdr* message);
65
  std::unique_ptr<envoy::HotRestartMessage> parseProtoAndResetState();
66
  void initRecvBufIfNewMessage();
67
  // An int in [0, MaxConcurrentProcesses). As hot restarts happen, each next process gets the
68
  // next of 0,1,2,0,1,...
69
  // A HotRestartingBase's domain socket's name contains its base_id_ value, and so we can use
70
  // this value to determine which domain socket name to treat as our parent, and which to treat
71
  // as our child. (E.g. if we are 2, 1 is parent and 0 is child).
72
  const uint64_t base_id_;
73
  // State for the receiving half of the protocol.
74
  //
75
  // When filled, the size in bytes that the in-flight HotRestartMessage should be.
76
  // When empty, we're ready to start receiving a new message (starting with a uint64 'length').
77
  absl::optional<uint64_t> expected_proto_length_;
78
  // How much of the current in-flight message (including both the uint64 'length', plus the proto
79
  // itself) we have received. Once this equals expected_proto_length_ + sizeof(uint64_t), we're
80
  // ready to parse the HotRestartMessage. Should be set to 0 in between messages, to indicate
81
  // readiness for a new message.
82
  uint64_t cur_msg_recvd_bytes_{};
83
  // The first 8 bytes will always be the raw net-order bytes of the current value of
84
  // expected_proto_length_. The protobuf partial data starts at byte 8.
85
  // Should be resized to 0 in between messages, to indicate readiness for a new message.
86
  std::vector<uint8_t> recv_buf_;
87
};
88

            
89
/**
90
 * Logic shared by the implementations of both sides of the child<-->parent hot restart protocol:
91
 * domain socket communication, and our ad hoc RPC protocol.
92
 */
93
class HotRestartingBase : public Logger::Loggable<Logger::Id::main> {
94
protected:
95
  HotRestartingBase(uint64_t base_id)
96
173
      : main_rpc_stream_(base_id), udp_forwarding_rpc_stream_(base_id) {}
97

            
98
  // Returns a Gauge that tracks hot-restart generation, where every successive
99
  // child increments this number.
100
  static Stats::Gauge& hotRestartGeneration(Stats::Scope& scope);
101

            
102
  // A stream over a unix socket between the parent and child instances, used
103
  // for the child instance to request socket information and control draining
104
  // and shutdown of the parent.
105
  RpcStream main_rpc_stream_;
106

            
107
  // A separate channel is used for udp forwarding because udp forwarding can
108
  // begin while communication on the main channel is still occurring. The hot
109
  // restarter is single-threaded, so we don't have to worry about packets coming
110
  // in a jumbled order, but there are two instances of the hot restarter, the
111
  // parent and the child; it is possible for the child to send a udp packet
112
  // while the parent is sending a request on the main channel, for which it will
113
  // expect to receive a response (and not an unrelated udp packet). Therefore, a
114
  // separate channel is used to deliver udp packets, ensuring no interference
115
  // between the two data sources.
116
  RpcStream udp_forwarding_rpc_stream_;
117
};
118

            
119
} // namespace Server
120
} // namespace Envoy