/src/openvswitch/lib/poll-loop.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2014 Nicira, Inc. |
3 | | * |
4 | | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | | * you may not use this file except in compliance with the License. |
6 | | * You may obtain a copy of the License at: |
7 | | * |
8 | | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | | * |
10 | | * Unless required by applicable law or agreed to in writing, software |
11 | | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | | * See the License for the specific language governing permissions and |
14 | | * limitations under the License. |
15 | | */ |
16 | | |
17 | | #include <config.h> |
18 | | #include "openvswitch/poll-loop.h" |
19 | | #include <errno.h> |
20 | | #include <inttypes.h> |
21 | | #include <poll.h> |
22 | | #include <stdlib.h> |
23 | | #include <string.h> |
24 | | #include "coverage.h" |
25 | | #include "openvswitch/dynamic-string.h" |
26 | | #include "fatal-signal.h" |
27 | | #include "openvswitch/list.h" |
28 | | #include "ovs-thread.h" |
29 | | #include "seq.h" |
30 | | #include "socket-util.h" |
31 | | #include "timeval.h" |
32 | | #include "openvswitch/vlog.h" |
33 | | #include "openvswitch/hmap.h" |
34 | | #include "hash.h" |
35 | | |
36 | | VLOG_DEFINE_THIS_MODULE(poll_loop); |
37 | | |
38 | | COVERAGE_DEFINE(poll_create_node); |
39 | | COVERAGE_DEFINE(poll_zero_timeout); |
40 | | |
41 | | struct poll_node { |
42 | | struct hmap_node hmap_node; |
43 | | struct pollfd pollfd; /* Events to pass to time_poll(). */ |
44 | | HANDLE wevent; /* Events for WaitForMultipleObjects(). */ |
45 | | const char *where; /* Where poll_node was created. */ |
46 | | }; |
47 | | |
48 | | struct poll_loop { |
49 | | /* All active poll waiters. */ |
50 | | struct hmap poll_nodes; |
51 | | |
52 | | /* Time at which to wake up the next call to poll_block(), LLONG_MIN to |
53 | | * wake up immediately, or LLONG_MAX to wait forever. */ |
54 | | long long int timeout_when; /* In msecs as returned by time_msec(). */ |
55 | | const char *timeout_where; /* Where 'timeout_when' was set. */ |
56 | | }; |
57 | | |
58 | | static struct poll_loop *poll_loop(void); |
59 | | |
60 | | /* Look up the node with same fd or wevent. */ |
61 | | static struct poll_node * |
62 | | find_poll_node(struct poll_loop *loop, int fd, HANDLE wevent) |
63 | 0 | { |
64 | 0 | struct poll_node *node; |
65 | | |
66 | | /* Both 'fd' and 'wevent' cannot be set. */ |
67 | 0 | ovs_assert(!fd != !wevent); |
68 | |
|
69 | 0 | HMAP_FOR_EACH_WITH_HASH (node, hmap_node, |
70 | 0 | hash_2words(fd, (uint32_t)wevent), |
71 | 0 | &loop->poll_nodes) { |
72 | 0 | if ((fd && node->pollfd.fd == fd) |
73 | 0 | || (wevent && node->wevent == wevent)) { |
74 | 0 | return node; |
75 | 0 | } |
76 | 0 | } |
77 | 0 | return NULL; |
78 | 0 | } |
79 | | |
80 | | /* On Unix based systems: |
81 | | * |
82 | | * Registers 'fd' as waiting for the specified 'events' (which should be |
83 | | * POLLIN or POLLOUT or POLLIN | POLLOUT). The following call to |
84 | | * poll_block() will wake up when 'fd' becomes ready for one or more of the |
85 | | * requested events. The 'fd's are given to poll() function later. |
86 | | * |
87 | | * On Windows system: |
88 | | * |
89 | | * If 'fd' is specified, create a new 'wevent'. Association of 'fd' and |
90 | | * 'wevent' for 'events' happens in poll_block(). If 'wevent' is specified, |
91 | | * it is assumed that it is unrelated to any sockets and poll_block() |
92 | | * will wake up on any event on that 'wevent'. It is an error to pass |
93 | | * both 'wevent' and 'fd'. |
94 | | * |
95 | | * The event registration is one-shot: only the following call to |
96 | | * poll_block() is affected. The event will need to be re-registered after |
97 | | * poll_block() is called if it is to persist. |
98 | | * |
99 | | * ('where' is used in debug logging. Commonly one would use poll_fd_wait() to |
100 | | * automatically provide the caller's source file and line number for |
101 | | * 'where'.) */ |
102 | | static void |
103 | | poll_create_node(int fd, HANDLE wevent, short int events, const char *where) |
104 | 0 | { |
105 | 0 | struct poll_loop *loop = poll_loop(); |
106 | 0 | struct poll_node *node; |
107 | |
|
108 | 0 | COVERAGE_INC(poll_create_node); |
109 | | |
110 | | /* Both 'fd' and 'wevent' cannot be set. */ |
111 | 0 | ovs_assert(!fd != !wevent); |
112 | | |
113 | | /* Check for duplicate. If found, "or" the events. */ |
114 | 0 | node = find_poll_node(loop, fd, wevent); |
115 | 0 | if (node) { |
116 | 0 | node->pollfd.events |= events; |
117 | 0 | } else { |
118 | 0 | node = xzalloc(sizeof *node); |
119 | 0 | hmap_insert(&loop->poll_nodes, &node->hmap_node, |
120 | 0 | hash_2words(fd, (uint32_t)wevent)); |
121 | 0 | node->pollfd.fd = fd; |
122 | 0 | node->pollfd.events = events; |
123 | | #ifdef _WIN32 |
124 | | if (!wevent) { |
125 | | wevent = CreateEvent(NULL, FALSE, FALSE, NULL); |
126 | | } |
127 | | #endif |
128 | 0 | node->wevent = wevent; |
129 | 0 | node->where = where; |
130 | 0 | } |
131 | 0 | } |
132 | | |
133 | | /* Registers 'fd' as waiting for the specified 'events' (which should be POLLIN |
134 | | * or POLLOUT or POLLIN | POLLOUT). The following call to poll_block() will |
135 | | * wake up when 'fd' becomes ready for one or more of the requested events. |
136 | | * |
137 | | * On Windows, 'fd' must be a socket. |
138 | | * |
139 | | * The event registration is one-shot: only the following call to poll_block() |
140 | | * is affected. The event will need to be re-registered after poll_block() is |
141 | | * called if it is to persist. |
142 | | * |
143 | | * ('where' is used in debug logging. Commonly one would use poll_fd_wait() to |
144 | | * automatically provide the caller's source file and line number for |
145 | | * 'where'.) */ |
146 | | void |
147 | | poll_fd_wait_at(int fd, short int events, const char *where) |
148 | 0 | { |
149 | 0 | poll_create_node(fd, 0, events, where); |
150 | 0 | } |
151 | | |
152 | | #ifdef _WIN32 |
153 | | /* Registers for the next call to poll_block() to wake up when 'wevent' is |
154 | | * signaled. |
155 | | * |
156 | | * The event registration is one-shot: only the following call to poll_block() |
157 | | * is affected. The event will need to be re-registered after poll_block() is |
158 | | * called if it is to persist. |
159 | | * |
160 | | * ('where' is used in debug logging. Commonly one would use |
161 | | * poll_wevent_wait() to automatically provide the caller's source file and |
162 | | * line number for 'where'.) */ |
163 | | void |
164 | | poll_wevent_wait_at(HANDLE wevent, const char *where) |
165 | | { |
166 | | poll_create_node(0, wevent, 0, where); |
167 | | } |
168 | | #endif /* _WIN32 */ |
169 | | |
170 | | /* Causes the following call to poll_block() to block for no more than 'msec' |
171 | | * milliseconds. If 'msec' is nonpositive, the following call to poll_block() |
172 | | * will not block at all. |
173 | | * |
174 | | * The timer registration is one-shot: only the following call to poll_block() |
175 | | * is affected. The timer will need to be re-registered after poll_block() is |
176 | | * called if it is to persist. |
177 | | * |
178 | | * ('where' is used in debug logging. Commonly one would use poll_timer_wait() |
179 | | * to automatically provide the caller's source file and line number for |
180 | | * 'where'.) */ |
181 | | void |
182 | | poll_timer_wait_at(long long int msec, const char *where) |
183 | 0 | { |
184 | 0 | long long int now = time_msec(); |
185 | 0 | long long int when; |
186 | |
|
187 | 0 | if (msec <= 0) { |
188 | | /* Wake up immediately. */ |
189 | 0 | when = LLONG_MIN; |
190 | 0 | } else if ((unsigned long long int) now + msec <= LLONG_MAX) { |
191 | | /* Normal case. */ |
192 | 0 | when = now + msec; |
193 | 0 | } else { |
194 | | /* now + msec would overflow. */ |
195 | 0 | when = LLONG_MAX; |
196 | 0 | } |
197 | |
|
198 | 0 | poll_timer_wait_until_at(when, where); |
199 | 0 | } |
200 | | |
201 | | /* Causes the following call to poll_block() to wake up when the current time, |
202 | | * as returned by time_msec(), reaches 'when' or later. If 'when' is earlier |
203 | | * than the current time, the following call to poll_block() will not block at |
204 | | * all. |
205 | | * |
206 | | * The timer registration is one-shot: only the following call to poll_block() |
207 | | * is affected. The timer will need to be re-registered after poll_block() is |
208 | | * called if it is to persist. |
209 | | * |
210 | | * ('where' is used in debug logging. Commonly one would use |
211 | | * poll_timer_wait_until() to automatically provide the caller's source file |
212 | | * and line number for 'where'.) */ |
213 | | void |
214 | | poll_timer_wait_until_at(long long int when, const char *where) |
215 | 0 | { |
216 | 0 | struct poll_loop *loop = poll_loop(); |
217 | 0 | if (when < loop->timeout_when) { |
218 | 0 | loop->timeout_when = when; |
219 | 0 | loop->timeout_where = where; |
220 | 0 | } |
221 | 0 | } |
222 | | |
223 | | /* Causes the following call to poll_block() to wake up immediately, without |
224 | | * blocking. |
225 | | * |
226 | | * ('where' is used in debug logging. Commonly one would use |
227 | | * poll_immediate_wake() to automatically provide the caller's source file and |
228 | | * line number for 'where'.) */ |
229 | | void |
230 | | poll_immediate_wake_at(const char *where) |
231 | 0 | { |
232 | 0 | poll_timer_wait_at(0, where); |
233 | 0 | } |
234 | | |
235 | | /* Logs, if appropriate, that the poll loop was awakened by an event |
236 | | * registered at 'where' (typically a source file and line number). The other |
237 | | * arguments have two possible interpretations: |
238 | | * |
239 | | * - If 'pollfd' is nonnull then it should be the "struct pollfd" that caused |
240 | | * the wakeup. 'timeout' is ignored. |
241 | | * |
242 | | * - If 'pollfd' is NULL then 'timeout' is the number of milliseconds after |
243 | | * which the poll loop woke up. |
244 | | */ |
245 | | static void |
246 | | log_wakeup(const char *where, const struct pollfd *pollfd, int timeout) |
247 | 0 | { |
248 | 0 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(10, 10); |
249 | 0 | enum vlog_level level; |
250 | 0 | int cpu_usage; |
251 | 0 | struct ds s; |
252 | |
|
253 | 0 | cpu_usage = get_cpu_usage(); |
254 | 0 | if (VLOG_IS_DBG_ENABLED()) { |
255 | 0 | level = VLL_DBG; |
256 | 0 | } else if (cpu_usage > 50 |
257 | 0 | && !thread_is_pmd() |
258 | 0 | && !VLOG_DROP_INFO(&rl)) { |
259 | 0 | level = VLL_INFO; |
260 | 0 | } else { |
261 | 0 | return; |
262 | 0 | } |
263 | | |
264 | 0 | ds_init(&s); |
265 | 0 | ds_put_cstr(&s, "wakeup due to "); |
266 | 0 | if (pollfd) { |
267 | 0 | char *description = describe_fd(pollfd->fd); |
268 | 0 | if (pollfd->revents & POLLIN) { |
269 | 0 | ds_put_cstr(&s, "[POLLIN]"); |
270 | 0 | } |
271 | 0 | if (pollfd->revents & POLLOUT) { |
272 | 0 | ds_put_cstr(&s, "[POLLOUT]"); |
273 | 0 | } |
274 | 0 | if (pollfd->revents & POLLERR) { |
275 | 0 | ds_put_cstr(&s, "[POLLERR]"); |
276 | 0 | } |
277 | 0 | if (pollfd->revents & POLLHUP) { |
278 | 0 | ds_put_cstr(&s, "[POLLHUP]"); |
279 | 0 | } |
280 | 0 | if (pollfd->revents & POLLNVAL) { |
281 | 0 | ds_put_cstr(&s, "[POLLNVAL]"); |
282 | 0 | } |
283 | 0 | ds_put_format(&s, " on fd %d (%s)", pollfd->fd, description); |
284 | 0 | free(description); |
285 | 0 | } else { |
286 | 0 | ds_put_format(&s, "%d-ms timeout", timeout); |
287 | 0 | } |
288 | 0 | if (where) { |
289 | 0 | ds_put_format(&s, " at %s", where); |
290 | 0 | } |
291 | 0 | if (cpu_usage >= 0) { |
292 | 0 | ds_put_format(&s, " (%d%% CPU usage)", cpu_usage); |
293 | 0 | } |
294 | 0 | VLOG(level, "%s", ds_cstr(&s)); |
295 | 0 | ds_destroy(&s); |
296 | 0 | } |
297 | | |
298 | | static void |
299 | | free_poll_nodes(struct poll_loop *loop) |
300 | 0 | { |
301 | 0 | struct poll_node *node; |
302 | |
|
303 | 0 | HMAP_FOR_EACH_SAFE (node, hmap_node, &loop->poll_nodes) { |
304 | 0 | hmap_remove(&loop->poll_nodes, &node->hmap_node); |
305 | | #ifdef _WIN32 |
306 | | if (node->wevent && node->pollfd.fd) { |
307 | | WSAEventSelect(node->pollfd.fd, NULL, 0); |
308 | | CloseHandle(node->wevent); |
309 | | } |
310 | | #endif |
311 | 0 | free(node); |
312 | 0 | } |
313 | 0 | } |
314 | | |
315 | | /* Blocks until one or more of the events registered with poll_fd_wait() |
316 | | * occurs, or until the minimum duration registered with poll_timer_wait() |
317 | | * elapses, or not at all if poll_immediate_wake() has been called. */ |
318 | | void |
319 | | poll_block(void) |
320 | 0 | { |
321 | 0 | struct poll_loop *loop = poll_loop(); |
322 | 0 | struct poll_node *node; |
323 | 0 | struct pollfd *pollfds; |
324 | 0 | HANDLE *wevents = NULL; |
325 | 0 | int elapsed; |
326 | 0 | int retval; |
327 | 0 | int i; |
328 | | |
329 | | /* Register fatal signal events before actually doing any real work for |
330 | | * poll_block. */ |
331 | 0 | fatal_signal_wait(); |
332 | |
|
333 | 0 | if (loop->timeout_when == LLONG_MIN) { |
334 | 0 | COVERAGE_INC(poll_zero_timeout); |
335 | 0 | } |
336 | |
|
337 | 0 | timewarp_run(); |
338 | 0 | pollfds = xmalloc(hmap_count(&loop->poll_nodes) * sizeof *pollfds); |
339 | |
|
340 | | #ifdef _WIN32 |
341 | | wevents = xmalloc(hmap_count(&loop->poll_nodes) * sizeof *wevents); |
342 | | #endif |
343 | | |
344 | | /* Populate with all the fds and events. */ |
345 | 0 | i = 0; |
346 | 0 | HMAP_FOR_EACH (node, hmap_node, &loop->poll_nodes) { |
347 | 0 | pollfds[i] = node->pollfd; |
348 | | #ifdef _WIN32 |
349 | | wevents[i] = node->wevent; |
350 | | if (node->pollfd.fd && node->wevent) { |
351 | | short int wsa_events = 0; |
352 | | if (node->pollfd.events & POLLIN) { |
353 | | wsa_events |= FD_READ | FD_ACCEPT | FD_CLOSE; |
354 | | } |
355 | | if (node->pollfd.events & POLLOUT) { |
356 | | wsa_events |= FD_WRITE | FD_CONNECT | FD_CLOSE; |
357 | | } |
358 | | WSAEventSelect(node->pollfd.fd, node->wevent, wsa_events); |
359 | | } |
360 | | #endif |
361 | 0 | i++; |
362 | 0 | } |
363 | |
|
364 | 0 | retval = time_poll(pollfds, hmap_count(&loop->poll_nodes), wevents, |
365 | 0 | loop->timeout_when, &elapsed); |
366 | 0 | if (retval < 0) { |
367 | 0 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); |
368 | 0 | VLOG_ERR_RL(&rl, "poll: %s", ovs_strerror(-retval)); |
369 | 0 | } else if (!retval) { |
370 | 0 | log_wakeup(loop->timeout_where, NULL, elapsed); |
371 | 0 | } else if (get_cpu_usage() > 50 || VLOG_IS_DBG_ENABLED()) { |
372 | 0 | i = 0; |
373 | 0 | HMAP_FOR_EACH (node, hmap_node, &loop->poll_nodes) { |
374 | 0 | if (pollfds[i].revents) { |
375 | 0 | log_wakeup(node->where, &pollfds[i], 0); |
376 | 0 | } |
377 | 0 | i++; |
378 | 0 | } |
379 | 0 | } |
380 | |
|
381 | 0 | free_poll_nodes(loop); |
382 | 0 | loop->timeout_when = LLONG_MAX; |
383 | 0 | loop->timeout_where = NULL; |
384 | 0 | free(pollfds); |
385 | 0 | free(wevents); |
386 | | |
387 | | /* Handle any pending signals before doing anything else. */ |
388 | 0 | fatal_signal_run(); |
389 | |
|
390 | 0 | seq_woke(); |
391 | 0 | } |
392 | | |
393 | | static void |
394 | | free_poll_loop(void *loop_) |
395 | 0 | { |
396 | 0 | struct poll_loop *loop = loop_; |
397 | |
|
398 | 0 | free_poll_nodes(loop); |
399 | 0 | hmap_destroy(&loop->poll_nodes); |
400 | 0 | free(loop); |
401 | 0 | } |
402 | | |
403 | | static struct poll_loop * |
404 | | poll_loop(void) |
405 | 0 | { |
406 | 0 | static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER; |
407 | 0 | static pthread_key_t key; |
408 | 0 | struct poll_loop *loop; |
409 | |
|
410 | 0 | if (ovsthread_once_start(&once)) { |
411 | 0 | xpthread_key_create(&key, free_poll_loop); |
412 | 0 | ovsthread_once_done(&once); |
413 | 0 | } |
414 | |
|
415 | 0 | loop = pthread_getspecific(key); |
416 | 0 | if (!loop) { |
417 | 0 | loop = xzalloc(sizeof *loop); |
418 | 0 | loop->timeout_when = LLONG_MAX; |
419 | 0 | hmap_init(&loop->poll_nodes); |
420 | 0 | xpthread_setspecific(key, loop); |
421 | 0 | } |
422 | 0 | return loop; |
423 | 0 | } |
424 | | |