/src/openvswitch/lib/dp-packet-gso.c
Line | Count | Source |
1 | | /* |
2 | | * Copyright (c) 2023 Red Hat, Inc. |
3 | | * |
4 | | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | | * you may not use this file except in compliance with the License. |
6 | | * You may obtain a copy of the License at: |
7 | | * |
8 | | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | | * |
10 | | * Unless required by applicable law or agreed to in writing, software |
11 | | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | | * See the License for the specific language governing permissions and |
14 | | * limitations under the License. |
15 | | */ |
16 | | |
17 | | #include <config.h> |
18 | | #include <stdlib.h> |
19 | | #include <string.h> |
20 | | |
21 | | #include "dp-packet.h" |
22 | | #include "dp-packet-gso.h" |
23 | | #include "netdev-provider.h" |
24 | | #include "openvswitch/vlog.h" |
25 | | |
26 | | VLOG_DEFINE_THIS_MODULE(dp_packet_gso); |
27 | | |
28 | | /* Retuns a new packet that is a segment of packet 'p'. |
29 | | * |
30 | | * The new packet is initialized with 'hdr_len' bytes from the |
31 | | * start of packet 'p' and then appended with 'data_len' bytes |
32 | | * from the packet 'p' at offset 'data_off'. |
33 | | * |
34 | | * Note: The packet headers are not updated. */ |
35 | | static struct dp_packet * |
36 | | dp_packet_gso_seg_new(const struct dp_packet *p, size_t hdr_len, |
37 | | size_t data_off, size_t data_len) |
38 | 0 | { |
39 | 0 | struct dp_packet *seg = dp_packet_new_with_headroom(hdr_len + data_len, |
40 | 0 | dp_packet_headroom(p)); |
41 | | |
42 | | /* Append the original packet headers and then the payload. */ |
43 | 0 | dp_packet_put(seg, dp_packet_data(p), hdr_len); |
44 | 0 | dp_packet_put(seg, (char *) dp_packet_data(p) + data_off, data_len); |
45 | | |
46 | | /* The new segment should have the same offsets. */ |
47 | 0 | seg->l2_5_ofs = p->l2_5_ofs; |
48 | 0 | seg->l3_ofs = p->l3_ofs; |
49 | 0 | seg->l4_ofs = p->l4_ofs; |
50 | 0 | seg->inner_l3_ofs = p->inner_l3_ofs; |
51 | 0 | seg->inner_l4_ofs = p->inner_l4_ofs; |
52 | | |
53 | | /* The protocol headers remain the same, so preserve hash and mark. */ |
54 | 0 | seg->has_hash = p->has_hash; |
55 | 0 | *dp_packet_rss_ptr(seg) = *dp_packet_rss_ptr(p); |
56 | 0 | seg->has_mark = p->has_mark; |
57 | 0 | *dp_packet_flow_mark_ptr(seg) = *dp_packet_flow_mark_ptr(p); |
58 | |
|
59 | 0 | seg->offloads = p->offloads; |
60 | |
|
61 | 0 | return seg; |
62 | 0 | } |
63 | | |
64 | | /* Returns the calculated number of TCP segments in packet 'p'. */ |
65 | | unsigned int |
66 | | dp_packet_gso_nr_segs(struct dp_packet *p) |
67 | 0 | { |
68 | 0 | uint16_t segsz = dp_packet_get_tso_segsz(p); |
69 | 0 | uint32_t data_length; |
70 | |
|
71 | 0 | if (dp_packet_tunnel(p)) { |
72 | 0 | data_length = dp_packet_get_inner_tcp_payload_length(p); |
73 | 0 | } else { |
74 | 0 | data_length = dp_packet_get_tcp_payload_length(p); |
75 | 0 | } |
76 | |
|
77 | 0 | return DIV_ROUND_UP(data_length, segsz); |
78 | 0 | } |
79 | | |
80 | | /* For partial segmentation, we try to pack as much data as we can in a first |
81 | | * packet (up to the final number of segments on the wire). |
82 | | * If there is still some data left, we need an extra "little" packet |
83 | | * (shorter than tso_segsz). */ |
84 | | unsigned int |
85 | | dp_packet_gso_partial_nr_segs(struct dp_packet *p) |
86 | 0 | { |
87 | 0 | if ((dp_packet_tunnel_geneve(p) || dp_packet_tunnel_vxlan(p)) |
88 | 0 | && dp_packet_l4_checksum_partial(p) |
89 | 0 | && dp_packet_get_inner_tcp_payload_length(p) |
90 | 0 | != dp_packet_gso_nr_segs(p) * dp_packet_get_tso_segsz(p)) { |
91 | 0 | return 2; |
92 | 0 | } |
93 | | |
94 | 0 | return 1; |
95 | 0 | } |
96 | | |
97 | | static void |
98 | | dp_packet_gso_update_segment(struct dp_packet *seg, unsigned int seg_no, |
99 | | unsigned int n_segs, uint16_t tso_segsz, |
100 | | bool udp_tnl, bool gre_tnl) |
101 | 0 | { |
102 | 0 | struct tcp_header *tcp_hdr; |
103 | 0 | struct ip_header *ip_hdr; |
104 | 0 | uint32_t tcp_seq; |
105 | |
|
106 | 0 | if (udp_tnl) { |
107 | | /* Update tunnel UDP header length. */ |
108 | 0 | struct udp_header *tnl_hdr; |
109 | |
|
110 | 0 | tnl_hdr = dp_packet_l4(seg); |
111 | 0 | tnl_hdr->udp_len = htons(dp_packet_l4_size(seg)); |
112 | 0 | } |
113 | |
|
114 | 0 | if (udp_tnl || gre_tnl) { |
115 | | /* Update tunnel inner L3 header. */ |
116 | 0 | ip_hdr = dp_packet_inner_l3(seg); |
117 | 0 | if (IP_VER(ip_hdr->ip_ihl_ver) == 4) { |
118 | 0 | ip_hdr->ip_tot_len = htons(dp_packet_inner_l3_size(seg)); |
119 | 0 | ip_hdr->ip_id = htons(ntohs(ip_hdr->ip_id) + seg_no); |
120 | 0 | ip_hdr->ip_csum = 0; |
121 | 0 | dp_packet_inner_ip_checksum_set_partial(seg); |
122 | 0 | } else { |
123 | 0 | struct ovs_16aligned_ip6_hdr *ip6_hdr; |
124 | |
|
125 | 0 | ip6_hdr = dp_packet_inner_l3(seg); |
126 | 0 | ip6_hdr->ip6_ctlun.ip6_un1.ip6_un1_plen |
127 | 0 | = htons(dp_packet_inner_l3_size(seg) - sizeof *ip6_hdr); |
128 | 0 | } |
129 | 0 | } |
130 | | |
131 | | /* Update L3 header. */ |
132 | 0 | ip_hdr = dp_packet_l3(seg); |
133 | 0 | if (IP_VER(ip_hdr->ip_ihl_ver) == 4) { |
134 | 0 | ip_hdr->ip_tot_len = htons(dp_packet_l3_size(seg)); |
135 | 0 | ip_hdr->ip_id = htons(ntohs(ip_hdr->ip_id) + seg_no); |
136 | 0 | ip_hdr->ip_csum = 0; |
137 | 0 | dp_packet_ip_checksum_set_partial(seg); |
138 | 0 | } else { |
139 | 0 | struct ovs_16aligned_ip6_hdr *ip6_hdr = dp_packet_l3(seg); |
140 | |
|
141 | 0 | ip6_hdr->ip6_ctlun.ip6_un1.ip6_un1_plen |
142 | 0 | = htons(dp_packet_l3_size(seg) - sizeof *ip6_hdr); |
143 | 0 | } |
144 | | |
145 | | /* Update L4 header. */ |
146 | 0 | if (udp_tnl || gre_tnl) { |
147 | 0 | tcp_hdr = dp_packet_inner_l4(seg); |
148 | 0 | dp_packet_inner_l4_checksum_set_partial(seg); |
149 | 0 | } else { |
150 | 0 | tcp_hdr = dp_packet_l4(seg); |
151 | 0 | dp_packet_l4_checksum_set_partial(seg); |
152 | 0 | } |
153 | 0 | tcp_seq = ntohl(get_16aligned_be32(&tcp_hdr->tcp_seq)); |
154 | 0 | tcp_seq += seg_no * tso_segsz; |
155 | 0 | put_16aligned_be32(&tcp_hdr->tcp_seq, htonl(tcp_seq)); |
156 | |
|
157 | 0 | if (seg_no < (n_segs - 1) && !dp_packet_get_tso_segsz(seg)) { |
158 | 0 | uint16_t tcp_offset = TCP_OFFSET(tcp_hdr->tcp_ctl); |
159 | | /* Reset flags PUSH and FIN unless it is the last segment. */ |
160 | 0 | uint16_t tcp_flags = TCP_FLAGS(tcp_hdr->tcp_ctl) |
161 | 0 | & ~(TCP_PSH | TCP_FIN); |
162 | 0 | tcp_hdr->tcp_ctl = TCP_CTL(tcp_flags, tcp_offset); |
163 | 0 | } |
164 | |
|
165 | 0 | if (gre_tnl) { |
166 | 0 | struct gre_base_hdr *ghdr; |
167 | |
|
168 | 0 | ghdr = dp_packet_l4(seg); |
169 | |
|
170 | 0 | if (ghdr->flags & htons(GRE_CSUM)) { |
171 | 0 | ovs_be16 *csum_opt = (ovs_be16 *) (ghdr + 1); |
172 | 0 | *csum_opt = 0; |
173 | 0 | *csum_opt = csum(ghdr, dp_packet_l4_size(seg)); |
174 | 0 | } |
175 | 0 | } |
176 | 0 | } |
177 | | |
178 | | static void |
179 | | dp_packet_gso__(struct dp_packet *p, struct dp_packet_batch **batches, |
180 | | bool partial_seg) |
181 | 0 | { |
182 | 0 | struct dp_packet_batch *curr_batch = *batches; |
183 | 0 | struct dp_packet *seg; |
184 | 0 | unsigned int n_segs; |
185 | 0 | uint16_t tso_segsz; |
186 | 0 | size_t data_len; |
187 | 0 | size_t hdr_len; |
188 | 0 | bool udp_tnl; |
189 | 0 | bool gre_tnl; |
190 | |
|
191 | 0 | tso_segsz = dp_packet_get_tso_segsz(p); |
192 | 0 | ovs_assert(tso_segsz); |
193 | 0 | n_segs = dp_packet_gso_nr_segs(p); |
194 | 0 | udp_tnl = dp_packet_tunnel_vxlan(p) || dp_packet_tunnel_geneve(p); |
195 | 0 | gre_tnl = dp_packet_tunnel_gre(p); |
196 | | |
197 | | /* Put back the first segment in the batch, it will be trimmed after |
198 | | * all segments have been copied. */ |
199 | 0 | if (dp_packet_batch_is_full(curr_batch)) { |
200 | 0 | curr_batch++; |
201 | 0 | } |
202 | 0 | dp_packet_batch_add(curr_batch, p); |
203 | |
|
204 | 0 | if (n_segs <= 1) { |
205 | 0 | goto out; |
206 | 0 | } |
207 | | |
208 | 0 | if (dp_packet_tunnel(p)) { |
209 | 0 | hdr_len = (char *) dp_packet_get_inner_tcp_payload(p) |
210 | 0 | - (char *) dp_packet_eth(p); |
211 | 0 | data_len = dp_packet_get_inner_tcp_payload_length(p); |
212 | 0 | } else { |
213 | 0 | hdr_len = (char *) dp_packet_get_tcp_payload(p) |
214 | 0 | - (char *) dp_packet_eth(p); |
215 | 0 | data_len = dp_packet_get_tcp_payload_length(p); |
216 | 0 | } |
217 | |
|
218 | 0 | if (partial_seg) { |
219 | 0 | if (dp_packet_gso_partial_nr_segs(p) != 1) { |
220 | 0 | goto last_seg; |
221 | 0 | } |
222 | 0 | goto first_seg; |
223 | 0 | } |
224 | | |
225 | 0 | for (unsigned int i = 1; i < n_segs - 1; i++) { |
226 | 0 | seg = dp_packet_gso_seg_new(p, hdr_len, hdr_len + i * tso_segsz, |
227 | 0 | tso_segsz); |
228 | 0 | dp_packet_gso_update_segment(seg, i, n_segs, tso_segsz, udp_tnl, |
229 | 0 | gre_tnl); |
230 | |
|
231 | 0 | if (dp_packet_batch_is_full(curr_batch)) { |
232 | 0 | curr_batch++; |
233 | 0 | } |
234 | 0 | dp_packet_batch_add(curr_batch, seg); |
235 | 0 | } |
236 | |
|
237 | 0 | last_seg: |
238 | | /* Create the last segment. */ |
239 | 0 | seg = dp_packet_gso_seg_new(p, hdr_len, hdr_len + (n_segs - 1) * tso_segsz, |
240 | 0 | data_len - (n_segs - 1) * tso_segsz); |
241 | 0 | dp_packet_gso_update_segment(seg, n_segs - 1, n_segs, tso_segsz, udp_tnl, |
242 | 0 | gre_tnl); |
243 | |
|
244 | 0 | if (dp_packet_batch_is_full(curr_batch)) { |
245 | 0 | curr_batch++; |
246 | 0 | } |
247 | 0 | dp_packet_batch_add(curr_batch, seg); |
248 | |
|
249 | 0 | first_seg: |
250 | 0 | if (partial_seg) { |
251 | 0 | if (dp_packet_gso_partial_nr_segs(p) != 1) { |
252 | 0 | dp_packet_set_size(p, hdr_len + (n_segs - 1) * tso_segsz); |
253 | 0 | if (n_segs == 2) { |
254 | | /* No need to ask HW segmentation, we already did the job. */ |
255 | 0 | dp_packet_set_tso_segsz(p, 0); |
256 | 0 | } |
257 | 0 | } |
258 | 0 | } else { |
259 | | /* Trim the first segment and reset TSO. */ |
260 | 0 | dp_packet_set_size(p, hdr_len + tso_segsz); |
261 | 0 | dp_packet_set_tso_segsz(p, 0); |
262 | 0 | } |
263 | 0 | dp_packet_gso_update_segment(p, 0, n_segs, tso_segsz, udp_tnl, gre_tnl); |
264 | |
|
265 | 0 | out: |
266 | 0 | *batches = curr_batch; |
267 | 0 | } |
268 | | |
269 | | /* Perform software segmentation on packet 'p'. |
270 | | * |
271 | | * Segments packet 'p' into the array of preallocated batches in 'batches', |
272 | | * updating the 'batches' pointer as needed. */ |
273 | | void |
274 | | dp_packet_gso(struct dp_packet *p, struct dp_packet_batch **batches) |
275 | 0 | { |
276 | 0 | dp_packet_gso__(p, batches, false); |
277 | 0 | } |
278 | | |
279 | | /* Perform partial software segmentation on packet 'p'. |
280 | | * |
281 | | * For UDP tunnels, if the packet payload length is not aligned on the |
282 | | * segmentation size, segments the last segment of packet 'p' into the array |
283 | | * of preallocated batches in 'batches', updating the 'batches' pointer |
284 | | * as needed. */ |
285 | | void |
286 | | dp_packet_gso_partial(struct dp_packet *p, struct dp_packet_batch **batches) |
287 | 0 | { |
288 | | dp_packet_gso__(p, batches, true); |
289 | 0 | } |