Coverage Report

Created: 2025-07-01 06:50

/src/openvswitch/lib/dpif-netdev-private-extract.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright (c) 2021 Intel.
3
 *
4
 * Licensed under the Apache License, Version 2.0 (the "License");
5
 * you may not use this file except in compliance with the License.
6
 * You may obtain a copy of the License at:
7
 *
8
 *     http://www.apache.org/licenses/LICENSE-2.0
9
 *
10
 * Unless required by applicable law or agreed to in writing, software
11
 * distributed under the License is distributed on an "AS IS" BASIS,
12
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
 * See the License for the specific language governing permissions and
14
 * limitations under the License.
15
 */
16
17
#include <config.h>
18
#include <errno.h>
19
#include <stdint.h>
20
#include <string.h>
21
22
#include "cpu.h"
23
#include "dp-packet.h"
24
#include "dpif-netdev-private-dpcls.h"
25
#include "dpif-netdev-private-extract.h"
26
#include "dpif-netdev-private-thread.h"
27
#include "flow.h"
28
#include "openvswitch/vlog.h"
29
#include "ovs-thread.h"
30
#include "util.h"
31
32
VLOG_DEFINE_THIS_MODULE(dpif_netdev_extract);
33
34
/* Variable to hold the default MFEX implementation. */
35
static ATOMIC(miniflow_extract_func) default_mfex_func;
36
37
#if MFEX_IMPL_AVX512_CHECK
38
static int32_t
39
avx512_isa_probe(bool needs_vbmi)
40
{
41
    static enum ovs_cpu_isa isa_required[] = {
42
        OVS_CPU_ISA_X86_AVX512F,
43
        OVS_CPU_ISA_X86_AVX512BW,
44
        OVS_CPU_ISA_X86_BMI2,
45
    };
46
47
    for (uint32_t i = 0; i < ARRAY_SIZE(isa_required); i++) {
48
        if (!cpu_has_isa(isa_required[i])) {
49
            return -ENOTSUP;
50
        }
51
    }
52
53
    if (needs_vbmi && !cpu_has_isa(OVS_CPU_ISA_X86_AVX512VBMI)) {
54
        return -ENOTSUP;
55
    }
56
57
    return 0;
58
}
59
60
/* Probe functions to check ISA requirements. */
61
static int32_t
62
mfex_avx512_probe(void)
63
{
64
    return avx512_isa_probe(false);
65
}
66
67
#if HAVE_AVX512VBMI
68
static int32_t
69
mfex_avx512_vbmi_probe(void)
70
{
71
    return avx512_isa_probe(true);
72
}
73
#endif
74
#endif
75
76
/* Implementations of available extract options and
77
 * the implementations are always in order of preference.
78
 */
79
static struct dpif_miniflow_extract_impl mfex_impls[] = {
80
81
    [MFEX_IMPL_AUTOVALIDATOR] = {
82
        .probe = NULL,
83
        .extract_func = dpif_miniflow_extract_autovalidator,
84
        .name = "autovalidator", },
85
86
    [MFEX_IMPL_SCALAR] = {
87
        .probe = NULL,
88
        .extract_func = NULL,
89
        .name = "scalar", },
90
91
    [MFEX_IMPL_STUDY] = {
92
        .probe = NULL,
93
        .extract_func = mfex_study_traffic,
94
        .name = "study", },
95
96
/* Compile in implementations only if the compiler ISA checks pass. */
97
#if MFEX_IMPL_AVX512_CHECK
98
#if HAVE_AVX512VBMI
99
    [MFEX_IMPL_VBMI_IPv4_UDP] = {
100
        .probe = mfex_avx512_vbmi_probe,
101
        .extract_func = mfex_avx512_vbmi_ip_udp,
102
        .name = "avx512_vbmi_ipv4_udp", },
103
#endif
104
    [MFEX_IMPL_IPv4_UDP] = {
105
        .probe = mfex_avx512_probe,
106
        .extract_func = mfex_avx512_ip_udp,
107
        .name = "avx512_ipv4_udp", },
108
#if HAVE_AVX512VBMI
109
    [MFEX_IMPL_VBMI_IPv4_TCP] = {
110
        .probe = mfex_avx512_vbmi_probe,
111
        .extract_func = mfex_avx512_vbmi_ip_tcp,
112
        .name = "avx512_vbmi_ipv4_tcp", },
113
#endif
114
    [MFEX_IMPL_IPv4_TCP] = {
115
        .probe = mfex_avx512_probe,
116
        .extract_func = mfex_avx512_ip_tcp,
117
        .name = "avx512_ipv4_tcp", },
118
#if HAVE_AVX512VBMI
119
    [MFEX_IMPL_VBMI_DOT1Q_IPv4_UDP] = {
120
        .probe = mfex_avx512_vbmi_probe,
121
        .extract_func = mfex_avx512_vbmi_dot1q_ip_udp,
122
        .name = "avx512_vbmi_dot1q_ipv4_udp", },
123
#endif
124
    [MFEX_IMPL_DOT1Q_IPv4_UDP] = {
125
        .probe = mfex_avx512_probe,
126
        .extract_func = mfex_avx512_dot1q_ip_udp,
127
        .name = "avx512_dot1q_ipv4_udp", },
128
#if HAVE_AVX512VBMI
129
    [MFEX_IMPL_VBMI_DOT1Q_IPv4_TCP] = {
130
        .probe = mfex_avx512_vbmi_probe,
131
        .extract_func = mfex_avx512_vbmi_dot1q_ip_tcp,
132
        .name = "avx512_vbmi_dot1q_ipv4_tcp", },
133
#endif
134
    [MFEX_IMPL_DOT1Q_IPv4_TCP] = {
135
        .probe = mfex_avx512_probe,
136
        .extract_func = mfex_avx512_dot1q_ip_tcp,
137
        .name = "avx512_dot1q_ipv4_tcp",
138
    },
139
#if HAVE_AVX512VBMI
140
    [MFEX_IMPL_VBMI_IPv6_UDP] = {
141
        .probe = mfex_avx512_vbmi_probe,
142
        .extract_func = mfex_avx512_vbmi_ipv6_udp,
143
        .name = "avx512_vbmi_ipv6_udp",
144
    },
145
#endif
146
    [MFEX_IMPL_IPv6_UDP] = {
147
        .probe = mfex_avx512_probe,
148
        .extract_func = mfex_avx512_ipv6_udp,
149
        .name = "avx512_ipv6_udp",
150
    },
151
#if HAVE_AVX512VBMI
152
    [MFEX_IMPL_VBMI_IPv6_TCP] = {
153
        .probe = mfex_avx512_vbmi_probe,
154
        .extract_func = mfex_avx512_vbmi_ipv6_tcp,
155
        .name = "avx512_vbmi_ipv6_tcp",
156
    },
157
#endif
158
    [MFEX_IMPL_IPv6_TCP] = {
159
        .probe = mfex_avx512_probe,
160
        .extract_func = mfex_avx512_ipv6_tcp,
161
        .name = "avx512_ipv6_tcp",
162
    },
163
#if HAVE_AVX512VBMI
164
    [MFEX_IMPL_VBMI_DOT1Q_IPv6_TCP] = {
165
        .probe = mfex_avx512_vbmi_probe,
166
        .extract_func = mfex_avx512_vbmi_dot1q_ipv6_tcp,
167
        .name = "avx512_vbmi_avx512_dot1q_ipv6_tcp",
168
    },
169
#endif
170
    [MFEX_IMPL_DOT1Q_IPv6_TCP] = {
171
        .probe = mfex_avx512_probe,
172
        .extract_func = mfex_avx512_dot1q_ipv6_tcp,
173
        .name = "avx512_dot1q_ipv6_tcp",
174
    },
175
#if HAVE_AVX512VBMI
176
    [MFEX_IMPL_VBMI_DOT1Q_IPv6_UDP] = {
177
        .probe = mfex_avx512_vbmi_probe,
178
        .extract_func = mfex_avx512_vbmi_dot1q_ipv6_udp,
179
        .name = "avx512_vbmi_avx512_dot1q_ipv6_udp",
180
    },
181
#endif
182
    [MFEX_IMPL_DOT1Q_IPv6_UDP] = {
183
        .probe = mfex_avx512_probe,
184
        .extract_func = mfex_avx512_dot1q_ipv6_udp,
185
        .name = "avx512_dot1q_ipv6_udp",
186
    },
187
#if HAVE_AVX512VBMI
188
    [MFEX_IMPL_VBMI_IPv4_NVGRE] = {
189
        .probe = mfex_avx512_vbmi_probe,
190
        .extract_func = mfex_avx512_vbmi_ip_nvgre,
191
        .name = "avx512_vbmi_ipv4_nvgre", },
192
#endif
193
    [MFEX_IMPL_IPv4_NVGRE] = {
194
        .probe = mfex_avx512_probe,
195
        .extract_func = mfex_avx512_ip_nvgre,
196
        .name = "avx512_ipv4_nvgre", },
197
#endif
198
};
199
200
BUILD_ASSERT_DECL(MFEX_IMPL_MAX == ARRAY_SIZE(mfex_impls));
201
202
void
203
dpif_miniflow_extract_init(void)
204
0
{
205
0
    atomic_uintptr_t *mfex_func = (void *)&default_mfex_func;
206
#ifdef MFEX_AUTOVALIDATOR_DEFAULT
207
    int mfex_idx = MFEX_IMPL_AUTOVALIDATOR;
208
#else
209
0
    int mfex_idx = MFEX_IMPL_SCALAR;
210
0
#endif
211
212
    /* Call probe on each impl, and cache the result. */
213
0
    for (int i = 0; i < MFEX_IMPL_MAX; i++) {
214
0
        bool avail = true;
215
0
        if (mfex_impls[i].probe) {
216
            /* Return zero is success, non-zero means error. */
217
0
            avail = (mfex_impls[i].probe() == 0);
218
0
        }
219
0
        VLOG_DBG("Miniflow Extract implementation '%s' %s available.",
220
0
                 mfex_impls[i].name, avail ? "is" : "is not");
221
0
        mfex_impls[i].available = avail;
222
0
    }
223
224
    /* For the first call, this will be choosen based on the
225
     * compile time flag.
226
     */
227
0
    VLOG_INFO("Default MFEX Extract implementation is %s.\n",
228
0
              mfex_impls[mfex_idx].name);
229
0
    atomic_store_relaxed(mfex_func, (uintptr_t) mfex_impls
230
0
                         [mfex_idx].extract_func);
231
0
}
232
233
miniflow_extract_func
234
dp_mfex_impl_get_default(void)
235
0
{
236
0
    miniflow_extract_func return_func;
237
0
    atomic_uintptr_t *mfex_func = (void *)&default_mfex_func;
238
239
0
    atomic_read_relaxed(mfex_func, (uintptr_t *) &return_func);
240
241
0
    return return_func;
242
0
}
243
244
int
245
dp_mfex_impl_set_default_by_name(const char *name)
246
0
{
247
0
    miniflow_extract_func new_default;
248
0
    atomic_uintptr_t *mfex_func = (void *)&default_mfex_func;
249
250
0
    int err = dp_mfex_impl_get_by_name(name, &new_default);
251
252
0
    if (!err) {
253
0
        atomic_store_relaxed(mfex_func, (uintptr_t) new_default);
254
0
    }
255
256
0
    return err;
257
258
0
}
259
260
void
261
dp_mfex_impl_get(struct ds *reply, struct dp_netdev_pmd_thread **pmd_list,
262
                 size_t pmd_list_size)
263
0
{
264
    /* Add all MFEX functions to reply string. */
265
0
    ds_put_cstr(reply, "Available MFEX implementations:\n");
266
267
0
    for (int i = 0; i < MFEX_IMPL_MAX; i++) {
268
0
        ds_put_format(reply, "  %s (available: %s pmds: ",
269
0
                      mfex_impls[i].name, mfex_impls[i].available ?
270
0
                      "True" : "False");
271
272
0
        for (size_t j = 0; j < pmd_list_size; j++) {
273
0
            struct dp_netdev_pmd_thread *pmd = pmd_list[j];
274
0
            if (pmd->core_id == NON_PMD_CORE_ID) {
275
0
                continue;
276
0
            }
277
278
0
            if (pmd->miniflow_extract_opt == mfex_impls[i].extract_func) {
279
0
                ds_put_format(reply, "%u,", pmd->core_id);
280
0
            }
281
0
        }
282
283
0
        ds_chomp(reply, ',');
284
285
0
        if (ds_last(reply) == ' ') {
286
0
            ds_put_cstr(reply, "none");
287
0
        }
288
289
0
        ds_put_cstr(reply, ")\n");
290
0
    }
291
292
0
}
293
294
/* This function checks all available MFEX implementations, and selects and
295
 * returns the function pointer to the one requested by "name". If nothing
296
 * is found it returns error.
297
 */
298
int
299
dp_mfex_impl_get_by_name(const char *name, miniflow_extract_func *out_func)
300
0
{
301
0
    if (!name || !out_func) {
302
0
        return -EINVAL;
303
0
    }
304
305
0
    for (int i = 0; i < MFEX_IMPL_MAX; i++) {
306
0
        if (strcmp(mfex_impls[i].name, name) == 0) {
307
            /* Check available is set before exec. */
308
0
            if (!mfex_impls[i].available) {
309
0
                *out_func = NULL;
310
0
                return -ENODEV;
311
0
            }
312
313
0
            *out_func = mfex_impls[i].extract_func;
314
0
            return 0;
315
0
        }
316
0
    }
317
318
0
    return -ENOENT;
319
0
}
320
321
struct dpif_miniflow_extract_impl *
322
0
dpif_mfex_impl_info_get(void) {
323
324
0
    return mfex_impls;
325
326
0
}
327
328
uint32_t
329
dpif_miniflow_extract_autovalidator(struct dp_packet_batch *packets,
330
                                    struct netdev_flow_key *keys,
331
                                    uint32_t keys_size, odp_port_t in_port,
332
                                    struct dp_netdev_pmd_thread *pmd_handle)
333
0
{
334
0
    const size_t cnt = dp_packet_batch_size(packets);
335
0
    uint16_t good_l2_5_ofs[NETDEV_MAX_BURST];
336
0
    uint16_t good_l3_ofs[NETDEV_MAX_BURST];
337
0
    uint16_t good_l4_ofs[NETDEV_MAX_BURST];
338
0
    uint16_t good_l2_pad_size[NETDEV_MAX_BURST];
339
0
    struct dp_packet *packet;
340
0
    struct dp_netdev_pmd_thread *pmd = pmd_handle;
341
0
    struct netdev_flow_key test_keys[NETDEV_MAX_BURST];
342
343
0
    if (keys_size < cnt) {
344
0
        atomic_store_relaxed(&pmd->miniflow_extract_opt, NULL);
345
0
        VLOG_ERR("Invalid key size supplied, Key_size: %d less than"
346
0
                 "batch_size:  %" PRIuSIZE"\n", keys_size, cnt);
347
0
        VLOG_ERR("Autovalidatior is disabled.\n");
348
0
        return 0;
349
0
    }
350
351
    /* Run scalar miniflow_extract to get default result. */
352
0
    DP_PACKET_BATCH_FOR_EACH (i, packet, packets) {
353
0
        pkt_metadata_init(&packet->md, in_port);
354
0
        miniflow_extract(packet, &keys[i].mf);
355
356
        /* Store known good metadata to compare with optimized metadata. */
357
0
        good_l2_5_ofs[i] = packet->l2_5_ofs;
358
0
        good_l3_ofs[i] = packet->l3_ofs;
359
0
        good_l4_ofs[i] = packet->l4_ofs;
360
0
        good_l2_pad_size[i] = packet->l2_pad_size;
361
0
    }
362
363
0
    uint32_t batch_failed = 0;
364
    /* Iterate through each version of miniflow implementations. */
365
0
    for (int j = MFEX_IMPL_START_IDX; j < MFEX_IMPL_MAX; j++) {
366
0
        if (!mfex_impls[j].available) {
367
0
            continue;
368
0
        }
369
        /* Reset keys and offsets before each implementation. */
370
0
        memset(test_keys, 0, keys_size * sizeof(struct netdev_flow_key));
371
0
        DP_PACKET_BATCH_FOR_EACH (i, packet, packets) {
372
0
            dp_packet_reset_offsets(packet);
373
0
        }
374
        /* Call optimized miniflow for each batch of packet. */
375
0
        uint32_t hit_mask = mfex_impls[j].extract_func(packets, test_keys,
376
0
                                                       keys_size, in_port,
377
0
                                                       pmd_handle);
378
379
        /* Do a miniflow compare for bits, blocks and offsets for all the
380
         * classified packets in the hitmask marked by set bits. */
381
0
        while (hit_mask) {
382
            /* Index for the set bit. */
383
0
            uint32_t i = raw_ctz(hit_mask);
384
            /* Set the index in hitmask to Zero. */
385
0
            hit_mask &= (hit_mask - 1);
386
387
0
            uint32_t failed = 0;
388
389
0
            struct ds log_msg = DS_EMPTY_INITIALIZER;
390
0
            ds_put_format(&log_msg, "MFEX autovalidator pkt %d\n", i);
391
392
            /* Check miniflow bits are equal. */
393
0
            if ((keys[i].mf.map.bits[0] != test_keys[i].mf.map.bits[0]) ||
394
0
                (keys[i].mf.map.bits[1] != test_keys[i].mf.map.bits[1])) {
395
0
                ds_put_format(&log_msg, "Autovalidation map failed\n"
396
0
                              "Good: 0x%llx 0x%llx    Test: 0x%llx 0x%llx\n",
397
0
                              keys[i].mf.map.bits[0],
398
0
                              keys[i].mf.map.bits[1],
399
0
                              test_keys[i].mf.map.bits[0],
400
0
                              test_keys[i].mf.map.bits[1]);
401
0
                failed = 1;
402
0
            }
403
404
0
            if (!miniflow_equal(&keys[i].mf, &test_keys[i].mf)) {
405
0
                uint32_t block_cnt = miniflow_n_values(&keys[i].mf);
406
0
                uint32_t test_block_cnt = miniflow_n_values(&test_keys[i].mf);
407
408
0
                ds_put_format(&log_msg, "Autovalidation blocks failed\n"
409
0
                              "Good hex:\n");
410
0
                ds_put_hex_dump(&log_msg, &keys[i].buf, block_cnt * 8, 0,
411
0
                                false);
412
0
                ds_put_format(&log_msg, "Test hex:\n");
413
0
                ds_put_hex_dump(&log_msg, &test_keys[i].buf,
414
0
                                test_block_cnt * 8, 0, false);
415
0
                failed = 1;
416
0
            }
417
418
0
            packet = packets->packets[i];
419
0
            if ((packet->l2_pad_size != good_l2_pad_size[i]) ||
420
0
                    (packet->l2_5_ofs != good_l2_5_ofs[i]) ||
421
0
                    (packet->l3_ofs != good_l3_ofs[i]) ||
422
0
                    (packet->l4_ofs != good_l4_ofs[i])) {
423
0
                ds_put_format(&log_msg,
424
0
                              "Autovalidation packet offsets failed\n");
425
0
                ds_put_format(&log_msg, "Good offsets: "
426
0
                              "l2_pad_size: %"PRIu16", l2_5_ofs: %"PRIu16", "
427
0
                              "l3_ofs: %"PRIu16", l4_ofs: %"PRIu16"\n",
428
0
                              good_l2_pad_size[i], good_l2_5_ofs[i],
429
0
                              good_l3_ofs[i], good_l4_ofs[i]);
430
0
                ds_put_format(&log_msg, "Test offsets: "
431
0
                              "l2_pad_size: %"PRIu16", l2_5_ofs: %"PRIu16", "
432
0
                              "l3_ofs: %"PRIu16", l4_ofs: %"PRIu16"\n",
433
0
                              packet->l2_pad_size, packet->l2_5_ofs,
434
0
                              packet->l3_ofs, packet->l4_ofs);
435
0
                failed = 1;
436
0
            }
437
438
0
            if (failed) {
439
0
                VLOG_ERR("Autovalidation for %s failed in pkt %d,"
440
0
                         " disabling.", mfex_impls[j].name, i);
441
0
                VLOG_ERR("Autovalidation failure details:\n%s",
442
0
                         ds_cstr(&log_msg));
443
0
                batch_failed = 1;
444
0
            }
445
0
            ds_destroy(&log_msg);
446
0
        }
447
0
    }
448
449
    /* Having dumped the debug info for the batch, disable autovalidator. */
450
0
    if (batch_failed) {
451
0
        atomic_store_relaxed(&pmd->miniflow_extract_opt, NULL);
452
0
    }
453
454
    /* Preserve packet correctness by storing back the good offsets in
455
     * packets back. */
456
0
    DP_PACKET_BATCH_FOR_EACH (i, packet, packets) {
457
0
        packet->l2_5_ofs = good_l2_5_ofs[i];
458
0
        packet->l3_ofs = good_l3_ofs[i];
459
0
        packet->l4_ofs = good_l4_ofs[i];
460
0
        packet->l2_pad_size = good_l2_pad_size[i];
461
0
    }
462
463
    /* Returning zero implies no packets were hit by autovalidation. This
464
     * simplifies unit-tests as changing --enable-mfex-default-autovalidator
465
     * would pass/fail. By always returning zero, autovalidator is a little
466
     * slower, but we gain consistency in testing. The auto-validator is only
467
     * meant to test different implementaions against a batch of packets
468
     * without incrementing hit counters.
469
     */
470
0
    return 0;
471
0
}