1
#include "conntrack.h"
2

            
3
#include <netinet/in.h>
4

            
5
#include <cerrno> // IWYU pragma: keep
6
#include <cstdint>
7
#include <cstring>
8
#include <memory>
9
#include <string>
10
#include <utility>
11

            
12
#include "envoy/common/platform.h"
13
#include "envoy/network/address.h"
14

            
15
#include "source/common/common/lock_guard.h"
16
#include "source/common/common/logger.h"
17
#include "source/common/common/utility.h"
18

            
19
#include "absl/container/flat_hash_map.h"
20
#include "absl/container/flat_hash_set.h"
21
#include "absl/numeric/int128.h"
22
#include "cilium/bpf.h"
23
#include "linux/bpf.h"
24
#include "linux/type_mapper.h"
25

            
26
namespace Envoy {
27
namespace Cilium {
28

            
29
// These must be kept in sync with Cilium source code, should refactor
30
// them to a separate include file we can include here instead of
31
// copying them!
32

            
33
using __u64 = uint64_t;
34
using __be32 = uint32_t; // Beware of the byte order!
35
using __u32 = uint32_t;
36
using __be16 = uint16_t; // Beware of the byte order!
37
using __u16 = uint16_t;
38
using __u8 = uint8_t;
39

            
40
#define TUPLE_F_OUT 0
41
#define TUPLE_F_IN 1
42

            
43
PACKED_STRUCT(struct IPv6CtTuple {
44
  __be32 saddr[4];
45
  __be32 daddr[4];
46
  __be16 dport;
47
  __be16 sport;
48
  __u8 nexthdr;
49
  __u8 flags;
50
});
51

            
52
PACKED_STRUCT(struct IPv4CtTuple {
53
  __be32 saddr;
54
  __be32 daddr;
55
  __be16 dport;
56
  __be16 sport;
57
  __u8 nexthdr;
58
  __u8 flags;
59
});
60

            
61
struct CtEntry {
62
  __u64 rx_packets;
63
  __u64 rx_bytes;
64
  __u64 tx_packets;
65
  __u64 tx_bytes;
66
  __u32 lifetime;
67
  __u16 rx_closing : 1, tx_closing : 1, nat46 : 1, lb_loopback : 1, seen_non_syn : 1, reserve : 11;
68
  __u16 rev_nat_index;
69
  __u16 slave;
70

            
71
  /* *x_flags_seen represents the OR of all TCP flags seen for the
72
   * transmit/receive direction of this entry. */
73
  __u8 tx_flags_seen;
74
  __u8 rx_flags_seen;
75

            
76
  __u32 src_sec_id; /* Used from userspace proxies, do not change offset! */
77

            
78
  /* last_*x_report is a timestamp of the last time a monitor
79
   * notification was sent for the transmit/receive direction. */
80
  __u32 last_tx_report;
81
  __u32 last_rx_report;
82
};
83

            
84
CtMap::CtMap4::CtMap4()
85
    : Bpf(BPF_MAP_TYPE_HASH, sizeof(struct IPv4CtTuple), sizeof(struct CtEntry)) {}
86

            
87
CtMap::CtMap6::CtMap6()
88
    : Bpf(BPF_MAP_TYPE_HASH, sizeof(struct IPv6CtTuple), sizeof(struct CtEntry)) {}
89

            
90
CtMap::CtMaps4::CtMaps4(const std::string& bpf_root, const std::string& map_name) : ok_(false) {
91
  // Open the IPv4 bpf maps from Cilium specific paths
92

            
93
  std::string path4tcp(bpf_root + "/tc/globals/cilium_ct4_" + map_name);
94
  if (!ctmap4_tcp_.open(path4tcp)) {
95
    ENVOY_LOG(warn, "cilium.bpf_metadata: Cannot open IPv4 conntrack map at {}", path4tcp);
96
    return;
97
  }
98
  std::string path4any(bpf_root + "/tc/globals/cilium_ct_any4_" + map_name);
99
  if (!ctmap4_any_.open(path4any)) {
100
    ENVOY_LOG(info, "cilium.bpf_metadata: Cannot open IPv4 conntrack map at {}", path4any);
101
    // do not fail if non-TCP map can not be opened
102
  }
103

            
104
  ok_ = true;
105
}
106

            
107
CtMap::CtMaps6::CtMaps6(const std::string& bpf_root, const std::string& map_name) : ok_(false) {
108
  // Open the IPv6 bpf maps from Cilium specific paths
109

            
110
  std::string path6tcp(bpf_root + "/tc/globals/cilium_ct6_" + map_name);
111
  if (!ctmap6_tcp_.open(path6tcp)) {
112
    ENVOY_LOG(warn, "cilium.bpf_metadata: Cannot open IPv6 conntrack map at {}", path6tcp);
113
    return;
114
  }
115
  std::string path6any(bpf_root + "/tc/globals/cilium_ct_any6_" + map_name);
116
  if (!ctmap6_any_.open(path6any)) {
117
    ENVOY_LOG(info, "cilium.bpf_metadata: Cannot open IPv6 conntrack map at {}", path6any);
118
    // do not fail if non-TCP map can not be opened
119
  }
120

            
121
  ok_ = true;
122
}
123

            
124
// Must hold mutex!
125
absl::flat_hash_map<const std::string, std::unique_ptr<CtMap::CtMaps4>>::iterator
126
CtMap::openMap4(const std::string& map_name) {
127
  auto pair = ct_maps4_.emplace(std::make_pair(map_name, nullptr));
128
  // construct the maps only if the entry was inserted
129
  if (pair.second) {
130
    auto maps = new CtMaps4(bpf_root_, map_name);
131
    if (!maps->ok_) {
132
      // Map open failed, delete and return nullptr
133
      delete maps;
134
      ct_maps4_.erase(pair.first);
135
      return ct_maps4_.end();
136
    }
137
    pair.first->second.reset(maps);
138
  }
139
  ENVOY_LOG(debug, "cilium.bpf_metadata: Opened IPv4 conntrack map {}", map_name);
140
  return pair.first;
141
}
142

            
143
// Must hold mutex!
144
absl::flat_hash_map<const std::string, std::unique_ptr<CtMap::CtMaps6>>::iterator
145
CtMap::openMap6(const std::string& map_name) {
146
  auto pair = ct_maps6_.emplace(std::make_pair(map_name, nullptr));
147
  // construct the maps only if the entry was inserted
148
  if (pair.second) {
149
    auto maps = new CtMaps6(bpf_root_, map_name);
150
    if (!maps->ok_) {
151
      // Map open failed, delete and return nullptr
152
      delete maps;
153
      ct_maps6_.erase(pair.first);
154
      return ct_maps6_.end();
155
    }
156
    pair.first->second.reset(maps);
157
  }
158
  ENVOY_LOG(debug, "cilium.bpf_metadata: Opened IPv6 conntrack map {}", map_name);
159
  return pair.first;
160
}
161

            
162
void CtMap::closeMaps(const absl::flat_hash_set<std::string>& to_be_closed) {
163
  Thread::LockGuard guard(maps_mutex_);
164

            
165
  for (const auto& name : to_be_closed) {
166
    auto ct4 = ct_maps4_.find(name);
167
    if (ct4 != ct_maps4_.end()) {
168
      ct_maps4_.erase(ct4);
169
      ENVOY_LOG(debug, "cilium.bpf_metadata: Closed IPv4 conntrack map {}", name);
170
    }
171
    auto ct6 = ct_maps6_.find(name);
172
    if (ct6 != ct_maps6_.end()) {
173
      ct_maps6_.erase(ct6);
174
      ENVOY_LOG(debug, "cilium.bpf_metadata: Closed IPv6 conntrack map {}", name);
175
    }
176
  }
177
}
178

            
179
CtMap::CtMap(const std::string& bpf_root) : bpf_root_(bpf_root) {
180
  if (openMap4("global") == ct_maps4_.end() && openMap6("global") == ct_maps6_.end()) {
181
    ENVOY_LOG(debug, "cilium.bpf_metadata: conntrack map global open failed: ({})",
182
              Envoy::errorDetails(errno));
183
  }
184
}
185

            
186
// map_name is "global" for the global maps, or endpoint ID for local maps
187
uint32_t CtMap::lookupSrcIdentity(const std::string& map_name, const Network::Address::Ip* sip,
188
                                  const Network::Address::Ip* dip, bool ingress) {
189
  ENVOY_LOG(debug, "cilium.bpf_metadata: Using conntrack map {}", map_name);
190

            
191
  struct IPv4CtTuple key4 {};
192
  struct IPv6CtTuple key6 {};
193
  struct CtEntry value {};
194

            
195
  if (sip->version() == Network::Address::IpVersion::v4 &&
196
      dip->version() == Network::Address::IpVersion::v4) {
197
    key4.daddr = dip->ipv4()->address();
198
    key4.saddr = sip->ipv4()->address();
199
    key4.sport = htons(sip->port());
200
    key4.dport = htons(dip->port());
201
    key4.nexthdr = 6;                                // TCP only for now
202
    key4.flags = ingress ? TUPLE_F_IN : TUPLE_F_OUT; // also reversed
203

            
204
    ENVOY_LOG(trace,
205
              "cilium.bpf_metadata: Looking up key: {:x}, {:x}, {:x}, {:x}, "
206
              "{:x}, {:x}",
207
              ntohl(key4.daddr), ntohl(key4.saddr), ntohs(key4.dport), ntohs(key4.sport),
208
              key4.nexthdr, key4.flags);
209
  } else if (sip->version() == Network::Address::IpVersion::v6 &&
210
             dip->version() == Network::Address::IpVersion::v6) {
211
    absl::uint128 daddr = dip->ipv6()->address();
212
    absl::uint128 saddr = sip->ipv6()->address();
213
    memcpy(&key6.daddr, &daddr, 16); // NOLINT(safe-memcpy)
214
    memcpy(&key6.saddr, &saddr, 16); // NOLINT(safe-memcpy)
215
    key6.sport = htons(sip->port());
216
    key6.dport = htons(dip->port());
217
    key6.nexthdr = 6; // TCP only for now
218
    key6.flags = ingress ? TUPLE_F_IN : TUPLE_F_OUT;
219
  } else {
220
    ENVOY_LOG(info, "cilium.bpf_metadata: Address type mismatch: Source: {}, Dest: {}",
221
              sip->addressAsString(), dip->addressAsString());
222
    return 0;
223
  }
224

            
225
  if (dip->version() == Network::Address::IpVersion::v4) {
226
    // Lock for the duration of the map lookup and conntrack lookup
227
    Thread::LockGuard guard(maps_mutex_);
228
    auto it = ct_maps4_.find(map_name);
229
    if (it == ct_maps4_.end()) {
230
      it = openMap4(map_name);
231
    }
232
    if (it == ct_maps4_.end()) {
233
      ENVOY_LOG(error, "cilium.bpf_metadata: No IPv4 conntrack map {}", map_name);
234
      return 0;
235
    }
236
    auto ct = it->second.get();
237
    if (!ct->ctmap4_tcp_.lookup(&key4, &value)) {
238
      ct_maps4_.erase(it); // flush the map to force reload after each failure.
239
      ENVOY_LOG(debug, "cilium.bpf_metadata: IPv4 conntrack map {} lookup failed: {}", map_name,
240
                Envoy::errorDetails(errno));
241
      return 0;
242
    }
243
  } else {
244
    // Lock for the duration of the map lookup and conntrack lookup
245
    Thread::LockGuard guard(maps_mutex_);
246
    auto it = ct_maps6_.find(map_name);
247
    if (it == ct_maps6_.end()) {
248
      it = openMap6(map_name);
249
    }
250
    if (it == ct_maps6_.end()) {
251
      ENVOY_LOG(error, "cilium.bpf_metadata: No IPv6 conntrack map {}", map_name);
252
      return 0;
253
    }
254
    auto ct = it->second.get();
255
    if (!ct->ctmap6_tcp_.lookup(&key6, &value)) {
256
      ct_maps6_.erase(it); // flush the map to force reload after each failure.
257
      ENVOY_LOG(debug, "cilium.bpf_metadata: IPv6 conntrack map {} lookup failed: {}", map_name,
258
                Envoy::errorDetails(errno));
259
      return 0;
260
    }
261
  }
262
  return value.src_sec_id;
263
}
264

            
265
} // namespace Cilium
266
} // namespace Envoy