Coverage Report

Created: 2026-05-30 06:26

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/openvswitch/lib/dpif-netdev-perf.h
Line
Count
Source
1
/*
2
 * Copyright (c) 2017 Ericsson AB.
3
 *
4
 * Licensed under the Apache License, Version 2.0 (the "License");
5
 * you may not use this file except in compliance with the License.
6
 * You may obtain a copy of the License at:
7
 *
8
 *     http://www.apache.org/licenses/LICENSE-2.0
9
 *
10
 * Unless required by applicable law or agreed to in writing, software
11
 * distributed under the License is distributed on an "AS IS" BASIS,
12
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
 * See the License for the specific language governing permissions and
14
 * limitations under the License.
15
 */
16
17
#ifndef DPIF_NETDEV_PERF_H
18
#define DPIF_NETDEV_PERF_H 1
19
20
#include <stdbool.h>
21
#include <stddef.h>
22
#include <stdint.h>
23
#include <string.h>
24
#include <time.h>
25
#include <math.h>
26
27
#ifdef DPDK_NETDEV
28
#include <rte_config.h>
29
#include <rte_cycles.h>
30
#endif
31
32
#include "openvswitch/vlog.h"
33
#include "ovs-atomic.h"
34
#include "timeval.h"
35
#include "unixctl.h"
36
#include "util.h"
37
38
#ifdef  __cplusplus
39
extern "C" {
40
#endif
41
42
/* This module encapsulates data structures and functions to maintain basic PMD
43
 * performance metrics such as packet counters, execution cycles as well as
44
 * histograms and time series recording for more detailed PMD metrics.
45
 *
46
 * It provides a clean API for dpif-netdev to initialize, update and read and
47
 * reset these metrics.
48
 *
49
 * The basic set of PMD counters is implemented as atomic_uint64_t variables
50
 * to guarantee correct read also in 32-bit systems.
51
 *
52
 * The detailed PMD performance metrics are only supported on 64-bit systems
53
 * with atomic 64-bit read and store semantics for plain uint64_t counters.
54
 */
55
56
/* Set of counter types maintained in pmd_perf_stats. */
57
58
enum pmd_stat_type {
59
    PMD_STAT_PHWOL_HIT,     /* Packets that had a partial HWOL hit (phwol). */
60
    PMD_STAT_SIMPLE_HIT,    /* Packets that had a simple match hit. */
61
    PMD_STAT_EXACT_HIT,     /* Packets that had an exact match (emc). */
62
    PMD_STAT_SMC_HIT,       /* Packets that had a sig match hit (SMC). */
63
    PMD_STAT_MASKED_HIT,    /* Packets that matched in the flow table. */
64
    PMD_STAT_MISS,          /* Packets that did not match and upcall was ok. */
65
    PMD_STAT_LOST,          /* Packets that did not match and upcall failed. */
66
                            /* The above statistics account for the total
67
                             * number of packet passes through the datapath
68
                             * pipeline and should not be overlapping with each
69
                             * other. */
70
    PMD_STAT_MASKED_LOOKUP, /* Number of subtable lookups for flow table
71
                               hits. Each MASKED_HIT hit will have >= 1
72
                               MASKED_LOOKUP(s). */
73
    PMD_STAT_RECV,          /* Packets entering the datapath pipeline from an
74
                             * interface. */
75
    PMD_STAT_RECIRC,        /* Packets reentering the datapath pipeline due to
76
                             * recirculation. */
77
    PMD_STAT_SENT_PKTS,     /* Packets that have been sent. */
78
    PMD_STAT_SENT_BATCHES,  /* Number of batches sent. */
79
    PMD_CYCLES_ITER_IDLE,   /* Cycles spent in idle iterations. */
80
    PMD_CYCLES_ITER_BUSY,   /* Cycles spent in busy iterations. */
81
    PMD_CYCLES_UPCALL,      /* Cycles spent processing upcalls. */
82
    PMD_SLEEP_ITER,         /* Iterations where a sleep has taken place. */
83
    PMD_CYCLES_SLEEP,       /* Total cycles slept to save power. */
84
    PMD_N_STATS
85
};
86
87
/* Array of PMD counters indexed by enum pmd_stat_type.
88
 * The n[] array contains the actual counter values since initialization
89
 * of the PMD. Counters are atomically updated from the PMD but are
90
 * read and cleared also from other processes. To clear the counters at
91
 * PMD run-time, the current counter values are copied over to the zero[]
92
 * array. To read counters we subtract zero[] value from n[]. */
93
94
struct pmd_counters {
95
    atomic_uint64_t n[PMD_N_STATS];     /* Value since _init(). */
96
    uint64_t zero[PMD_N_STATS];         /* Value at last _clear().  */
97
};
98
99
/* Data structure to collect statistical distribution of an integer measurement
100
 * type in form of a histogram. The wall[] array contains the inclusive
101
 * upper boundaries of the bins, while the bin[] array contains the actual
102
 * counters per bin. The histogram walls are typically set automatically
103
 * using the functions provided below.*/
104
105
0
#define NUM_BINS 32             /* Number of histogram bins. */
106
107
struct histogram {
108
    uint32_t wall[NUM_BINS];
109
    uint64_t bin[NUM_BINS];
110
};
111
112
/* Data structure to record details PMD execution metrics per iteration for
113
 * a history period of up to HISTORY_LEN iterations in circular buffer.
114
 * Also used to record up to HISTORY_LEN millisecond averages/totals of these
115
 * metrics.*/
116
117
struct iter_stats {
118
    uint64_t timestamp;         /* Iteration no. or millisecond. */
119
    uint64_t cycles;            /* Number of TSC cycles spent in it. or ms. */
120
    uint64_t busy_cycles;       /* Cycles spent in busy iterations or ms. */
121
    uint32_t iterations;        /* Iterations in ms. */
122
    uint32_t pkts;              /* Packets processed in iteration or ms. */
123
    uint32_t upcalls;           /* Number of upcalls in iteration or ms. */
124
    uint32_t upcall_cycles;     /* Cycles spent in upcalls in it. or ms. */
125
    uint32_t batches;           /* Number of rx batches in iteration or ms. */
126
    uint32_t max_vhost_qfill;   /* Maximum fill level in iteration or ms. */
127
};
128
129
0
#define HISTORY_LEN 1000        /* Length of recorded history
130
                                   (iterations and ms). */
131
#define DEF_HIST_SHOW 20        /* Default number of history samples to
132
                                   display. */
133
134
struct history {
135
    size_t idx;                 /* Slot to which next call to history_store()
136
                                   will write. */
137
    struct iter_stats sample[HISTORY_LEN];
138
};
139
140
/* Container for all performance metrics of a PMD within the struct
141
 * dp_netdev_pmd_thread. The metrics must be updated from within the PMD
142
 * thread but can be read from any thread. The basic PMD counters in
143
 * struct pmd_counters can be read without protection against concurrent
144
 * clearing. The other metrics may only be safely read with the clear_mutex
145
 * held to protect against concurrent clearing. */
146
147
struct pmd_perf_stats {
148
    /* Prevents interference between PMD polling and stats clearing. */
149
    struct ovs_mutex stats_mutex;
150
    /* Set by CLI thread to order clearing of PMD stats. */
151
    volatile bool clear;
152
    /* Prevents stats retrieval while clearing is in progress. */
153
    struct ovs_mutex clear_mutex;
154
    /* Start of the current performance measurement period. */
155
    uint64_t start_ms;
156
    /* Counter for PMD iterations. */
157
    uint64_t iteration_cnt;
158
    /* Start of the current iteration. */
159
    uint64_t start_tsc;
160
    /* Latest TSC time stamp taken in PMD. */
161
    uint64_t last_tsc;
162
    /* Used to space certain checks in time. */
163
    uint64_t next_check_tsc;
164
    /* If non-NULL, outermost cycle timer currently running in PMD. */
165
    struct cycle_timer *cur_timer;
166
    /* Set of PMD counters with their zero offsets. */
167
    struct pmd_counters counters;
168
    /* Statistics of the current iteration. */
169
    struct iter_stats current;
170
    /* Totals for the current millisecond. */
171
    struct iter_stats totals;
172
    /* Histograms for the PMD metrics. */
173
    struct histogram cycles;
174
    struct histogram pkts;
175
    struct histogram cycles_per_pkt;
176
    struct histogram upcalls;
177
    struct histogram cycles_per_upcall;
178
    struct histogram pkts_per_batch;
179
    struct histogram max_vhost_qfill;
180
    /* Iteration history buffer. */
181
    struct history iterations;
182
    /* Millisecond history buffer. */
183
    struct history milliseconds;
184
    /* Suspicious iteration log. */
185
    uint32_t log_susp_it;
186
    /* Start of iteration range to log. */
187
    uint32_t log_begin_it;
188
    /* End of iteration range to log. */
189
    uint32_t log_end_it;
190
    /* Reason for logging suspicious iteration. */
191
    char *log_reason;
192
};
193
194
#ifdef __linux__
195
static inline uint64_t
196
rdtsc_syscall(struct pmd_perf_stats *s)
197
0
{
198
0
    struct timespec val;
199
0
    uint64_t v;
200
0
201
0
    if (clock_gettime(CLOCK_MONOTONIC_RAW, &val) != 0) {
202
0
       return s->last_tsc;
203
0
    }
204
0
205
0
    v  = val.tv_sec * UINT64_C(1000000000) + val.tv_nsec;
206
0
    return s->last_tsc = v;
207
0
}
Unexecuted instantiation: dpif-netdev.c:rdtsc_syscall
Unexecuted instantiation: dpif-netdev-dpcls.c:rdtsc_syscall
Unexecuted instantiation: dpif-netdev-perf.c:rdtsc_syscall
208
#endif
209
210
/* Support for accurate timing of PMD execution on TSC clock cycle level.
211
 * These functions are intended to be invoked in the context of pmd threads. */
212
213
/* Read the TSC cycle register and cache it. Any function not requiring clock
214
 * cycle accuracy should read the cached value using cycles_counter_get() to
215
 * avoid the overhead of reading the TSC register. */
216
217
static inline uint64_t
218
cycles_counter_update(struct pmd_perf_stats *s)
219
0
{
220
#ifdef DPDK_NETDEV
221
    return s->last_tsc = rte_get_tsc_cycles();
222
#elif defined(__x86_64__)
223
    uint32_t h, l;
224
0
    asm volatile("rdtsc" : "=a" (l), "=d" (h));
225
226
0
    return s->last_tsc = ((uint64_t) h << 32) | l;
227
#elif defined(__aarch64__)
228
    asm volatile("mrs %0, cntvct_el0" : "=r" (s->last_tsc));
229
230
    return s->last_tsc;
231
#elif defined(__linux__)
232
    return rdtsc_syscall(s);
233
#else
234
    return s->last_tsc = 0;
235
#endif
236
0
}
Unexecuted instantiation: dpif-netdev.c:cycles_counter_update
Unexecuted instantiation: dpif-netdev-dpcls.c:cycles_counter_update
Unexecuted instantiation: dpif-netdev-perf.c:cycles_counter_update
237
238
static inline uint64_t
239
cycles_counter_get(struct pmd_perf_stats *s)
240
0
{
241
0
    return s->last_tsc;
242
0
}
Unexecuted instantiation: dpif-netdev.c:cycles_counter_get
Unexecuted instantiation: dpif-netdev-dpcls.c:cycles_counter_get
Unexecuted instantiation: dpif-netdev-perf.c:cycles_counter_get
243
244
void pmd_perf_estimate_tsc_frequency(void);
245
246
/* A nestable timer for measuring execution time in TSC cycles.
247
 *
248
 * Usage:
249
 * struct cycle_timer timer;
250
 *
251
 * cycle_timer_start(pmd, &timer);
252
 * <Timed execution>
253
 * uint64_t cycles = cycle_timer_stop(pmd, &timer);
254
 *
255
 * The caller must guarantee that a call to cycle_timer_start() is always
256
 * paired with a call to cycle_stimer_stop().
257
 *
258
 * Is is possible to have nested cycles timers within the timed code. The
259
 * execution time measured by the nested timers is excluded from the time
260
 * measured by the embracing timer.
261
 */
262
263
struct cycle_timer {
264
    uint64_t start;
265
    uint64_t suspended;
266
    struct cycle_timer *interrupted;
267
};
268
269
static inline void
270
cycle_timer_start(struct pmd_perf_stats *s,
271
                  struct cycle_timer *timer)
272
0
{
273
0
    struct cycle_timer *cur_timer = s->cur_timer;
274
0
    uint64_t now = cycles_counter_update(s);
275
276
0
    if (cur_timer) {
277
0
        cur_timer->suspended = now;
278
0
    }
279
0
    timer->interrupted = cur_timer;
280
0
    timer->start = now;
281
0
    timer->suspended = 0;
282
0
    s->cur_timer = timer;
283
0
}
Unexecuted instantiation: dpif-netdev.c:cycle_timer_start
Unexecuted instantiation: dpif-netdev-dpcls.c:cycle_timer_start
Unexecuted instantiation: dpif-netdev-perf.c:cycle_timer_start
284
285
static inline uint64_t
286
cycle_timer_stop(struct pmd_perf_stats *s,
287
                 struct cycle_timer *timer)
288
0
{
289
    /* Assert that this is the current cycle timer. */
290
0
    ovs_assert(s->cur_timer == timer);
291
0
    uint64_t now = cycles_counter_update(s);
292
0
    struct cycle_timer *intr_timer = timer->interrupted;
293
294
0
    if (intr_timer) {
295
        /* Adjust the start offset by the suspended cycles. */
296
0
        intr_timer->start += now - intr_timer->suspended;
297
0
    }
298
    /* Restore suspended timer, if any. */
299
0
    s->cur_timer = intr_timer;
300
0
    return now - timer->start;
301
0
}
Unexecuted instantiation: dpif-netdev.c:cycle_timer_stop
Unexecuted instantiation: dpif-netdev-dpcls.c:cycle_timer_stop
Unexecuted instantiation: dpif-netdev-perf.c:cycle_timer_stop
302
303
/* Functions to initialize and reset the PMD performance metrics. */
304
305
void pmd_perf_stats_init(struct pmd_perf_stats *s);
306
void pmd_perf_stats_clear(struct pmd_perf_stats *s);
307
void pmd_perf_stats_clear_lock(struct pmd_perf_stats *s);
308
309
/* Functions to read and update PMD counters. */
310
311
void pmd_perf_read_counters(struct pmd_perf_stats *s,
312
                            uint64_t stats[PMD_N_STATS]);
313
314
/* PMD performance counters are updated lock-less. For real PMDs
315
 * they are only updated from the PMD thread itself. In the case of the
316
 * NON-PMD they might be updated from multiple threads, but we can live
317
 * with losing a rare update as 100% accuracy is not required.
318
 * However, as counters are read for display from outside the PMD thread
319
 * with e.g. pmd-stats-show, we make sure that the 64-bit read and store
320
 * operations are atomic also on 32-bit systems so that readers cannot
321
 * not read garbage. On 64-bit systems this incurs no overhead. */
322
323
static inline void
324
pmd_perf_update_counter(struct pmd_perf_stats *s,
325
                        enum pmd_stat_type counter, int delta)
326
0
{
327
0
    uint64_t tmp;
328
0
    atomic_read_relaxed(&s->counters.n[counter], &tmp);
329
0
    tmp += delta;
330
0
    atomic_store_relaxed(&s->counters.n[counter], tmp);
331
0
}
Unexecuted instantiation: dpif-netdev.c:pmd_perf_update_counter
Unexecuted instantiation: dpif-netdev-dpcls.c:pmd_perf_update_counter
Unexecuted instantiation: dpif-netdev-perf.c:pmd_perf_update_counter
332
333
/* Functions to manipulate a sample history. */
334
335
static inline void
336
histogram_add_sample(struct histogram *hist, uint32_t val)
337
0
{
338
    /* TODO: Can do better with binary search? */
339
0
    for (int i = 0; i < NUM_BINS-1; i++) {
340
0
        if (val <= hist->wall[i]) {
341
0
            hist->bin[i]++;
342
0
            return;
343
0
        }
344
0
    }
345
0
    hist->bin[NUM_BINS-1]++;
346
0
}
Unexecuted instantiation: dpif-netdev.c:histogram_add_sample
Unexecuted instantiation: dpif-netdev-dpcls.c:histogram_add_sample
Unexecuted instantiation: dpif-netdev-perf.c:histogram_add_sample
347
348
uint64_t histogram_samples(const struct histogram *hist);
349
350
/* This function is used to advance the given history index by positive
351
 * offset in the circular history buffer. */
352
static inline uint32_t
353
history_add(uint32_t idx, uint32_t offset)
354
0
{
355
0
    return (idx + offset) % HISTORY_LEN;
356
0
}
Unexecuted instantiation: dpif-netdev.c:history_add
Unexecuted instantiation: dpif-netdev-dpcls.c:history_add
Unexecuted instantiation: dpif-netdev-perf.c:history_add
357
358
/* This function computes the difference between two indices into the
359
 * circular history buffer. The result is always positive in the range
360
 * 0 .. HISTORY_LEN-1 and specifies the number of steps to reach idx1
361
 * starting from idx2. It can also be used to retreat the history index
362
 * idx1 by idx2 steps. */
363
static inline uint32_t
364
history_sub(uint32_t idx1, uint32_t idx2)
365
0
{
366
0
    return (idx1 + HISTORY_LEN - idx2) % HISTORY_LEN;
367
0
}
Unexecuted instantiation: dpif-netdev.c:history_sub
Unexecuted instantiation: dpif-netdev-dpcls.c:history_sub
Unexecuted instantiation: dpif-netdev-perf.c:history_sub
368
369
static inline struct iter_stats *
370
history_current(struct history *h)
371
0
{
372
0
    return &h->sample[h->idx];
373
0
}
Unexecuted instantiation: dpif-netdev.c:history_current
Unexecuted instantiation: dpif-netdev-dpcls.c:history_current
Unexecuted instantiation: dpif-netdev-perf.c:history_current
374
375
static inline struct iter_stats *
376
history_next(struct history *h)
377
0
{
378
0
    size_t next_idx = history_add(h->idx, 1);
379
0
    struct iter_stats *next = &h->sample[next_idx];
380
381
0
    memset(next, 0, sizeof(*next));
382
0
    h->idx = next_idx;
383
0
    return next;
384
0
}
Unexecuted instantiation: dpif-netdev.c:history_next
Unexecuted instantiation: dpif-netdev-dpcls.c:history_next
Unexecuted instantiation: dpif-netdev-perf.c:history_next
385
386
static inline struct iter_stats *
387
history_store(struct history *h, struct iter_stats *is)
388
0
{
389
0
    if (is) {
390
0
        h->sample[h->idx] = *is;
391
0
    }
392
    /* Advance the history pointer */
393
0
    return history_next(h);
394
0
}
Unexecuted instantiation: dpif-netdev.c:history_store
Unexecuted instantiation: dpif-netdev-dpcls.c:history_store
Unexecuted instantiation: dpif-netdev-perf.c:history_store
395
396
/* Data and function related to logging of suspicious iterations. */
397
398
extern bool log_enabled;
399
extern bool log_extend;
400
extern uint32_t log_q_thr;
401
extern uint64_t iter_cycle_threshold;
402
403
void pmd_perf_set_log_susp_iteration(struct pmd_perf_stats *s, char *reason);
404
void pmd_perf_log_susp_iteration_neighborhood(struct pmd_perf_stats *s);
405
406
/* Functions recording PMD metrics per iteration. */
407
408
void
409
pmd_perf_start_iteration(struct pmd_perf_stats *s);
410
void
411
pmd_perf_end_iteration(struct pmd_perf_stats *s, int rx_packets,
412
                       int tx_packets, uint64_t sleep_cycles,
413
                       bool full_metrics);
414
415
/* Formatting the output of commands. */
416
417
struct pmd_perf_params {
418
    int command_type;
419
    bool histograms;
420
    size_t iter_hist_len;
421
    size_t ms_hist_len;
422
};
423
424
void pmd_perf_format_overall_stats(struct ds *str, struct pmd_perf_stats *s,
425
                                   double duration);
426
void pmd_perf_format_histograms(struct ds *str, struct pmd_perf_stats *s);
427
void pmd_perf_format_iteration_history(struct ds *str,
428
                                       struct pmd_perf_stats *s,
429
                                       int n_iter);
430
void pmd_perf_format_ms_history(struct ds *str, struct pmd_perf_stats *s,
431
                                int n_ms);
432
void pmd_perf_log_set_cmd(struct unixctl_conn *conn,
433
                          int argc, const char *argv[],
434
                          void *aux OVS_UNUSED);
435
436
#ifdef  __cplusplus
437
}
438
#endif
439
440
#endif /* DPIF_NETDEV_PERF_H */