Coverage Report

Created: 2025-07-01 06:51

/src/openvswitch/lib/poll-loop.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2014 Nicira, Inc.
3
 *
4
 * Licensed under the Apache License, Version 2.0 (the "License");
5
 * you may not use this file except in compliance with the License.
6
 * You may obtain a copy of the License at:
7
 *
8
 *     http://www.apache.org/licenses/LICENSE-2.0
9
 *
10
 * Unless required by applicable law or agreed to in writing, software
11
 * distributed under the License is distributed on an "AS IS" BASIS,
12
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
 * See the License for the specific language governing permissions and
14
 * limitations under the License.
15
 */
16
17
#include <config.h>
18
#include "openvswitch/poll-loop.h"
19
#include <errno.h>
20
#include <inttypes.h>
21
#include <poll.h>
22
#include <stdlib.h>
23
#include <string.h>
24
#include "coverage.h"
25
#include "openvswitch/dynamic-string.h"
26
#include "fatal-signal.h"
27
#include "openvswitch/list.h"
28
#include "ovs-thread.h"
29
#include "seq.h"
30
#include "socket-util.h"
31
#include "timeval.h"
32
#include "openvswitch/vlog.h"
33
#include "openvswitch/hmap.h"
34
#include "hash.h"
35
36
VLOG_DEFINE_THIS_MODULE(poll_loop);
37
38
COVERAGE_DEFINE(poll_create_node);
39
COVERAGE_DEFINE(poll_zero_timeout);
40
41
struct poll_node {
42
    struct hmap_node hmap_node;
43
    struct pollfd pollfd;       /* Events to pass to time_poll(). */
44
    HANDLE wevent;              /* Events for WaitForMultipleObjects(). */
45
    const char *where;          /* Where poll_node was created. */
46
};
47
48
struct poll_loop {
49
    /* All active poll waiters. */
50
    struct hmap poll_nodes;
51
52
    /* Time at which to wake up the next call to poll_block(), LLONG_MIN to
53
     * wake up immediately, or LLONG_MAX to wait forever. */
54
    long long int timeout_when; /* In msecs as returned by time_msec(). */
55
    const char *timeout_where;  /* Where 'timeout_when' was set. */
56
};
57
58
static struct poll_loop *poll_loop(void);
59
60
/* Look up the node with same fd or wevent. */
61
static struct poll_node *
62
find_poll_node(struct poll_loop *loop, int fd, HANDLE wevent)
63
0
{
64
0
    struct poll_node *node;
65
66
    /* Both 'fd' and 'wevent' cannot be set. */
67
0
    ovs_assert(!fd != !wevent);
68
69
0
    HMAP_FOR_EACH_WITH_HASH (node, hmap_node,
70
0
                             hash_2words(fd, (uint32_t)wevent),
71
0
                             &loop->poll_nodes) {
72
0
        if ((fd && node->pollfd.fd == fd)
73
0
            || (wevent && node->wevent == wevent)) {
74
0
            return node;
75
0
        }
76
0
    }
77
0
    return NULL;
78
0
}
79
80
/* On Unix based systems:
81
 *
82
 *     Registers 'fd' as waiting for the specified 'events' (which should be
83
 *     POLLIN or POLLOUT or POLLIN | POLLOUT).  The following call to
84
 *     poll_block() will wake up when 'fd' becomes ready for one or more of the
85
 *     requested events. The 'fd's are given to poll() function later.
86
 *
87
 * On Windows system:
88
 *
89
 *     If 'fd' is specified, create a new 'wevent'. Association of 'fd' and
90
 *     'wevent' for 'events' happens in poll_block(). If 'wevent' is specified,
91
 *     it is assumed that it is unrelated to any sockets and poll_block()
92
 *     will wake up on any event on that 'wevent'. It is an error to pass
93
 *     both 'wevent' and 'fd'.
94
 *
95
 * The event registration is one-shot: only the following call to
96
 * poll_block() is affected.  The event will need to be re-registered after
97
 * poll_block() is called if it is to persist.
98
 *
99
 * ('where' is used in debug logging.  Commonly one would use poll_fd_wait() to
100
 * automatically provide the caller's source file and line number for
101
 * 'where'.) */
102
static void
103
poll_create_node(int fd, HANDLE wevent, short int events, const char *where)
104
0
{
105
0
    struct poll_loop *loop = poll_loop();
106
0
    struct poll_node *node;
107
108
0
    COVERAGE_INC(poll_create_node);
109
110
    /* Both 'fd' and 'wevent' cannot be set. */
111
0
    ovs_assert(!fd != !wevent);
112
113
    /* Check for duplicate.  If found, "or" the events. */
114
0
    node = find_poll_node(loop, fd, wevent);
115
0
    if (node) {
116
0
        node->pollfd.events |= events;
117
0
    } else {
118
0
        node = xzalloc(sizeof *node);
119
0
        hmap_insert(&loop->poll_nodes, &node->hmap_node,
120
0
                    hash_2words(fd, (uint32_t)wevent));
121
0
        node->pollfd.fd = fd;
122
0
        node->pollfd.events = events;
123
#ifdef _WIN32
124
        if (!wevent) {
125
            wevent = CreateEvent(NULL, FALSE, FALSE, NULL);
126
        }
127
#endif
128
0
        node->wevent = wevent;
129
0
        node->where = where;
130
0
    }
131
0
}
132
133
/* Registers 'fd' as waiting for the specified 'events' (which should be POLLIN
134
 * or POLLOUT or POLLIN | POLLOUT).  The following call to poll_block() will
135
 * wake up when 'fd' becomes ready for one or more of the requested events.
136
 *
137
 * On Windows, 'fd' must be a socket.
138
 *
139
 * The event registration is one-shot: only the following call to poll_block()
140
 * is affected.  The event will need to be re-registered after poll_block() is
141
 * called if it is to persist.
142
 *
143
 * ('where' is used in debug logging.  Commonly one would use poll_fd_wait() to
144
 * automatically provide the caller's source file and line number for
145
 * 'where'.) */
146
void
147
poll_fd_wait_at(int fd, short int events, const char *where)
148
0
{
149
0
    poll_create_node(fd, 0, events, where);
150
0
}
151
152
#ifdef _WIN32
153
/* Registers for the next call to poll_block() to wake up when 'wevent' is
154
 * signaled.
155
 *
156
 * The event registration is one-shot: only the following call to poll_block()
157
 * is affected.  The event will need to be re-registered after poll_block() is
158
 * called if it is to persist.
159
 *
160
 * ('where' is used in debug logging.  Commonly one would use
161
 * poll_wevent_wait() to automatically provide the caller's source file and
162
 * line number for 'where'.) */
163
void
164
poll_wevent_wait_at(HANDLE wevent, const char *where)
165
{
166
    poll_create_node(0, wevent, 0, where);
167
}
168
#endif /* _WIN32 */
169
170
/* Causes the following call to poll_block() to block for no more than 'msec'
171
 * milliseconds.  If 'msec' is nonpositive, the following call to poll_block()
172
 * will not block at all.
173
 *
174
 * The timer registration is one-shot: only the following call to poll_block()
175
 * is affected.  The timer will need to be re-registered after poll_block() is
176
 * called if it is to persist.
177
 *
178
 * ('where' is used in debug logging.  Commonly one would use poll_timer_wait()
179
 * to automatically provide the caller's source file and line number for
180
 * 'where'.) */
181
void
182
poll_timer_wait_at(long long int msec, const char *where)
183
0
{
184
0
    long long int now = time_msec();
185
0
    long long int when;
186
187
0
    if (msec <= 0) {
188
        /* Wake up immediately. */
189
0
        when = LLONG_MIN;
190
0
    } else if ((unsigned long long int) now + msec <= LLONG_MAX) {
191
        /* Normal case. */
192
0
        when = now + msec;
193
0
    } else {
194
        /* now + msec would overflow. */
195
0
        when = LLONG_MAX;
196
0
    }
197
198
0
    poll_timer_wait_until_at(when, where);
199
0
}
200
201
/* Causes the following call to poll_block() to wake up when the current time,
202
 * as returned by time_msec(), reaches 'when' or later.  If 'when' is earlier
203
 * than the current time, the following call to poll_block() will not block at
204
 * all.
205
 *
206
 * The timer registration is one-shot: only the following call to poll_block()
207
 * is affected.  The timer will need to be re-registered after poll_block() is
208
 * called if it is to persist.
209
 *
210
 * ('where' is used in debug logging.  Commonly one would use
211
 * poll_timer_wait_until() to automatically provide the caller's source file
212
 * and line number for 'where'.) */
213
void
214
poll_timer_wait_until_at(long long int when, const char *where)
215
0
{
216
0
    struct poll_loop *loop = poll_loop();
217
0
    if (when < loop->timeout_when) {
218
0
        loop->timeout_when = when;
219
0
        loop->timeout_where = where;
220
0
    }
221
0
}
222
223
/* Causes the following call to poll_block() to wake up immediately, without
224
 * blocking.
225
 *
226
 * ('where' is used in debug logging.  Commonly one would use
227
 * poll_immediate_wake() to automatically provide the caller's source file and
228
 * line number for 'where'.) */
229
void
230
poll_immediate_wake_at(const char *where)
231
0
{
232
0
    poll_timer_wait_at(0, where);
233
0
}
234
235
/* Logs, if appropriate, that the poll loop was awakened by an event
236
 * registered at 'where' (typically a source file and line number).  The other
237
 * arguments have two possible interpretations:
238
 *
239
 *   - If 'pollfd' is nonnull then it should be the "struct pollfd" that caused
240
 *     the wakeup.  'timeout' is ignored.
241
 *
242
 *   - If 'pollfd' is NULL then 'timeout' is the number of milliseconds after
243
 *     which the poll loop woke up.
244
 */
245
static void
246
log_wakeup(const char *where, const struct pollfd *pollfd, int timeout)
247
0
{
248
0
    static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(10, 10);
249
0
    enum vlog_level level;
250
0
    int cpu_usage;
251
0
    struct ds s;
252
253
0
    cpu_usage = get_cpu_usage();
254
0
    if (VLOG_IS_DBG_ENABLED()) {
255
0
        level = VLL_DBG;
256
0
    } else if (cpu_usage > 50
257
0
               && !thread_is_pmd()
258
0
               && !VLOG_DROP_INFO(&rl)) {
259
0
        level = VLL_INFO;
260
0
    } else {
261
0
        return;
262
0
    }
263
264
0
    ds_init(&s);
265
0
    ds_put_cstr(&s, "wakeup due to ");
266
0
    if (pollfd) {
267
0
        char *description = describe_fd(pollfd->fd);
268
0
        if (pollfd->revents & POLLIN) {
269
0
            ds_put_cstr(&s, "[POLLIN]");
270
0
        }
271
0
        if (pollfd->revents & POLLOUT) {
272
0
            ds_put_cstr(&s, "[POLLOUT]");
273
0
        }
274
0
        if (pollfd->revents & POLLERR) {
275
0
            ds_put_cstr(&s, "[POLLERR]");
276
0
        }
277
0
        if (pollfd->revents & POLLHUP) {
278
0
            ds_put_cstr(&s, "[POLLHUP]");
279
0
        }
280
0
        if (pollfd->revents & POLLNVAL) {
281
0
            ds_put_cstr(&s, "[POLLNVAL]");
282
0
        }
283
0
        ds_put_format(&s, " on fd %d (%s)", pollfd->fd, description);
284
0
        free(description);
285
0
    } else {
286
0
        ds_put_format(&s, "%d-ms timeout", timeout);
287
0
    }
288
0
    if (where) {
289
0
        ds_put_format(&s, " at %s", where);
290
0
    }
291
0
    if (cpu_usage >= 0) {
292
0
        ds_put_format(&s, " (%d%% CPU usage)", cpu_usage);
293
0
    }
294
0
    VLOG(level, "%s", ds_cstr(&s));
295
0
    ds_destroy(&s);
296
0
}
297
298
static void
299
free_poll_nodes(struct poll_loop *loop)
300
0
{
301
0
    struct poll_node *node;
302
303
0
    HMAP_FOR_EACH_SAFE (node, hmap_node, &loop->poll_nodes) {
304
0
        hmap_remove(&loop->poll_nodes, &node->hmap_node);
305
#ifdef _WIN32
306
        if (node->wevent && node->pollfd.fd) {
307
            WSAEventSelect(node->pollfd.fd, NULL, 0);
308
            CloseHandle(node->wevent);
309
        }
310
#endif
311
0
        free(node);
312
0
    }
313
0
}
314
315
/* Blocks until one or more of the events registered with poll_fd_wait()
316
 * occurs, or until the minimum duration registered with poll_timer_wait()
317
 * elapses, or not at all if poll_immediate_wake() has been called. */
318
void
319
poll_block(void)
320
0
{
321
0
    struct poll_loop *loop = poll_loop();
322
0
    struct poll_node *node;
323
0
    struct pollfd *pollfds;
324
0
    HANDLE *wevents = NULL;
325
0
    int elapsed;
326
0
    int retval;
327
0
    int i;
328
329
    /* Register fatal signal events before actually doing any real work for
330
     * poll_block. */
331
0
    fatal_signal_wait();
332
333
0
    if (loop->timeout_when == LLONG_MIN) {
334
0
        COVERAGE_INC(poll_zero_timeout);
335
0
    }
336
337
0
    timewarp_run();
338
0
    pollfds = xmalloc(hmap_count(&loop->poll_nodes) * sizeof *pollfds);
339
340
#ifdef _WIN32
341
    wevents = xmalloc(hmap_count(&loop->poll_nodes) * sizeof *wevents);
342
#endif
343
344
    /* Populate with all the fds and events. */
345
0
    i = 0;
346
0
    HMAP_FOR_EACH (node, hmap_node, &loop->poll_nodes) {
347
0
        pollfds[i] = node->pollfd;
348
#ifdef _WIN32
349
        wevents[i] = node->wevent;
350
        if (node->pollfd.fd && node->wevent) {
351
            short int wsa_events = 0;
352
            if (node->pollfd.events & POLLIN) {
353
                wsa_events |= FD_READ | FD_ACCEPT | FD_CLOSE;
354
            }
355
            if (node->pollfd.events & POLLOUT) {
356
                wsa_events |= FD_WRITE | FD_CONNECT | FD_CLOSE;
357
            }
358
            WSAEventSelect(node->pollfd.fd, node->wevent, wsa_events);
359
        }
360
#endif
361
0
        i++;
362
0
    }
363
364
0
    retval = time_poll(pollfds, hmap_count(&loop->poll_nodes), wevents,
365
0
                       loop->timeout_when, &elapsed);
366
0
    if (retval < 0) {
367
0
        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
368
0
        VLOG_ERR_RL(&rl, "poll: %s", ovs_strerror(-retval));
369
0
    } else if (!retval) {
370
0
        log_wakeup(loop->timeout_where, NULL, elapsed);
371
0
    } else if (get_cpu_usage() > 50 || VLOG_IS_DBG_ENABLED()) {
372
0
        i = 0;
373
0
        HMAP_FOR_EACH (node, hmap_node, &loop->poll_nodes) {
374
0
            if (pollfds[i].revents) {
375
0
                log_wakeup(node->where, &pollfds[i], 0);
376
0
            }
377
0
            i++;
378
0
        }
379
0
    }
380
381
0
    free_poll_nodes(loop);
382
0
    loop->timeout_when = LLONG_MAX;
383
0
    loop->timeout_where = NULL;
384
0
    free(pollfds);
385
0
    free(wevents);
386
387
    /* Handle any pending signals before doing anything else. */
388
0
    fatal_signal_run();
389
390
0
    seq_woke();
391
0
}
392

393
static void
394
free_poll_loop(void *loop_)
395
0
{
396
0
    struct poll_loop *loop = loop_;
397
398
0
    free_poll_nodes(loop);
399
0
    hmap_destroy(&loop->poll_nodes);
400
0
    free(loop);
401
0
}
402
403
static struct poll_loop *
404
poll_loop(void)
405
0
{
406
0
    static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
407
0
    static pthread_key_t key;
408
0
    struct poll_loop *loop;
409
410
0
    if (ovsthread_once_start(&once)) {
411
0
        xpthread_key_create(&key, free_poll_loop);
412
0
        ovsthread_once_done(&once);
413
0
    }
414
415
0
    loop = pthread_getspecific(key);
416
0
    if (!loop) {
417
0
        loop = xzalloc(sizeof *loop);
418
0
        loop->timeout_when = LLONG_MAX;
419
0
        hmap_init(&loop->poll_nodes);
420
0
        xpthread_setspecific(key, loop);
421
0
    }
422
0
    return loop;
423
0
}
424