Line  | Count  | Source  | 
1  |  | /*  | 
2  |  |  * Copyright 2000-2007 Niels Provos <provos@citi.umich.edu>  | 
3  |  |  * Copyright 2007-2012 Niels Provos, Nick Mathewson  | 
4  |  |  *  | 
5  |  |  * Redistribution and use in source and binary forms, with or without  | 
6  |  |  * modification, are permitted provided that the following conditions  | 
7  |  |  * are met:  | 
8  |  |  * 1. Redistributions of source code must retain the above copyright  | 
9  |  |  *    notice, this list of conditions and the following disclaimer.  | 
10  |  |  * 2. Redistributions in binary form must reproduce the above copyright  | 
11  |  |  *    notice, this list of conditions and the following disclaimer in the  | 
12  |  |  *    documentation and/or other materials provided with the distribution.  | 
13  |  |  * 3. The name of the author may not be used to endorse or promote products  | 
14  |  |  *    derived from this software without specific prior written permission.  | 
15  |  |  *  | 
16  |  |  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR  | 
17  |  |  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES  | 
18  |  |  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  | 
19  |  |  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,  | 
20  |  |  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT  | 
21  |  |  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,  | 
22  |  |  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY  | 
23  |  |  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT  | 
24  |  |  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF  | 
25  |  |  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.  | 
26  |  |  */  | 
27  |  | #include "event2/event-config.h"  | 
28  |  | #include "evconfig-private.h"  | 
29  |  |  | 
30  |  | #if defined EVENT__HAVE_EPOLL || defined EVENT__HAVE_WEPOLL  | 
31  |  |  | 
32  |  | #include <stdint.h>  | 
33  |  | #include <stdio.h>  | 
34  |  | #include <stdlib.h>  | 
35  |  | #include <limits.h>  | 
36  |  | #include <string.h>  | 
37  |  |  | 
38  |  | #ifdef EVENT__HAVE_WEPOLL  | 
39  |  | #include "wepoll.h"  | 
40  |  | #define EPOLLET 0  | 
41  |  | #else  | 
42  |  | #include <sys/types.h>  | 
43  |  | #include <sys/resource.h>  | 
44  |  | #ifdef EVENT__HAVE_SYS_TIME_H  | 
45  |  | #include <sys/time.h>  | 
46  |  | #endif  | 
47  |  | #include <sys/queue.h>  | 
48  |  | #include <sys/epoll.h>  | 
49  |  | #include <signal.h>  | 
50  |  | #include <unistd.h>  | 
51  |  | #include <errno.h>  | 
52  |  | #ifdef EVENT__HAVE_FCNTL_H  | 
53  |  | #include <fcntl.h>  | 
54  |  | #endif  | 
55  |  | #ifdef EVENT__HAVE_SYS_TIMERFD_H  | 
56  |  | #include <sys/timerfd.h>  | 
57  |  | #endif  | 
58  |  | #endif  | 
59  |  |  | 
60  |  | #include "event-internal.h"  | 
61  |  | #include "evsignal-internal.h"  | 
62  |  | #include "event2/thread.h"  | 
63  |  | #include "evthread-internal.h"  | 
64  |  | #include "log-internal.h"  | 
65  |  | #include "evmap-internal.h"  | 
66  |  | #include "changelist-internal.h"  | 
67  |  | #include "time-internal.h"  | 
68  |  |  | 
69  |  | /* Since Linux 2.6.17, epoll is able to report about peer half-closed connection  | 
70  |  |    using special EPOLLRDHUP flag on a read event.  | 
71  |  | */  | 
72  |  | #if !defined(EPOLLRDHUP)  | 
73  |  | #define EPOLLRDHUP 0  | 
74  |  | #define EARLY_CLOSE_IF_HAVE_RDHUP 0  | 
75  |  | #else  | 
76  |  | #define EARLY_CLOSE_IF_HAVE_RDHUP EV_FEATURE_EARLY_CLOSE  | 
77  |  | #endif  | 
78  |  |  | 
79  |  | #include "epolltable-internal.h"  | 
80  |  |  | 
81  |  | #if defined(EVENT__HAVE_SYS_TIMERFD_H) &&       \  | 
82  |  |   defined(EVENT__HAVE_TIMERFD_CREATE) &&        \  | 
83  |  |   defined(HAVE_POSIX_MONOTONIC) && defined(TFD_NONBLOCK) && \  | 
84  |  |   defined(TFD_CLOEXEC) && !defined(EVENT__HAVE_EPOLL_PWAIT2)  | 
85  |  | /* Note that we only use timerfd if TFD_NONBLOCK and TFD_CLOEXEC are available  | 
86  |  |    and working.  This means that we can't support it on 2.6.25 (where timerfd  | 
87  |  |    was introduced) or 2.6.26, since 2.6.27 introduced those flags. On recent  | 
88  |  |    enough systems (with 5.11 and never) and so epoll_pwait2() with nanosecond  | 
89  |  |    precision for timeouts, timerfd is not needed at all.  | 
90  |  | */  | 
91  |  | #define USING_TIMERFD  | 
92  |  | #endif  | 
93  |  |  | 
94  |  | #ifdef EVENT__HAVE_WEPOLL  | 
95  |  | typedef HANDLE epoll_handle;  | 
96  |  | #define INVALID_EPOLL_HANDLE NULL  | 
97  |  | static void close_epoll_handle(HANDLE h) { epoll_close(h); } | 
98  |  | #else  | 
99  |  | typedef int epoll_handle;  | 
100  | 0  | #define INVALID_EPOLL_HANDLE -1  | 
101  | 0  | static void close_epoll_handle(int h) { close(h); } | 
102  |  | #endif  | 
103  |  |  | 
104  |  | struct epollop { | 
105  |  |   struct epoll_event *events;  | 
106  |  |   int nevents;  | 
107  |  |   epoll_handle epfd;  | 
108  |  | #ifdef USING_TIMERFD  | 
109  |  |   int timerfd;  | 
110  |  | #endif  | 
111  |  | };  | 
112  |  |  | 
113  |  | static void *epoll_init(struct event_base *);  | 
114  |  | static int epoll_dispatch(struct event_base *, struct timeval *);  | 
115  |  | static void epoll_dealloc(struct event_base *);  | 
116  |  |  | 
117  |  | static const struct eventop epollops_changelist = { | 
118  |  |   "epoll (with changelist)",  | 
119  |  |   epoll_init,  | 
120  |  |   event_changelist_add_,  | 
121  |  |   event_changelist_del_,  | 
122  |  |   epoll_dispatch,  | 
123  |  |   epoll_dealloc,  | 
124  |  |   1, /* need reinit */  | 
125  |  |   EV_FEATURE_ET|EV_FEATURE_O1| EARLY_CLOSE_IF_HAVE_RDHUP,  | 
126  |  |   EVENT_CHANGELIST_FDINFO_SIZE  | 
127  |  | };  | 
128  |  |  | 
129  |  |  | 
130  |  | static int epoll_nochangelist_add(struct event_base *base, evutil_socket_t fd,  | 
131  |  |     short old, short events, void *p);  | 
132  |  | static int epoll_nochangelist_del(struct event_base *base, evutil_socket_t fd,  | 
133  |  |     short old, short events, void *p);  | 
134  |  |  | 
135  |  | #ifdef EVENT__HAVE_WEPOLL  | 
136  |  | const struct eventop wepollops = { | 
137  |  |   "wepoll",  | 
138  |  |   epoll_init,  | 
139  |  |   epoll_nochangelist_add,  | 
140  |  |   epoll_nochangelist_del,  | 
141  |  |   epoll_dispatch,  | 
142  |  |   epoll_dealloc,  | 
143  |  |   1, /* need reinit */  | 
144  |  |   EV_FEATURE_O1|EV_FEATURE_EARLY_CLOSE,  | 
145  |  |   0  | 
146  |  | };  | 
147  |  | #else  | 
148  |  | const struct eventop epollops = { | 
149  |  |   "epoll",  | 
150  |  |   epoll_init,  | 
151  |  |   epoll_nochangelist_add,  | 
152  |  |   epoll_nochangelist_del,  | 
153  |  |   epoll_dispatch,  | 
154  |  |   epoll_dealloc,  | 
155  |  |   1, /* need reinit */  | 
156  |  |   EV_FEATURE_ET|EV_FEATURE_O1|EV_FEATURE_EARLY_CLOSE,  | 
157  |  |   0  | 
158  |  | };  | 
159  |  | #endif  | 
160  |  |  | 
161  |  |  | 
162  | 0  | #define INITIAL_NEVENT 32  | 
163  | 0  | #define MAX_NEVENT 4096  | 
164  |  |  | 
165  |  | /* On Linux kernels at least up to 2.6.24.4, epoll can't handle timeout  | 
166  |  |  * values bigger than (LONG_MAX - 999ULL)/HZ.  HZ in the wild can be  | 
167  |  |  * as big as 1000, and LONG_MAX can be as small as (1<<31)-1, so the  | 
168  |  |  * largest number of msec we can support here is 2147482.  Let's  | 
169  |  |  * round that down by 47 seconds.  | 
170  |  |  */  | 
171  | 0  | #define MAX_EPOLL_TIMEOUT_MSEC (35*60*1000)  | 
172  |  |  | 
173  |  | static void *  | 
174  |  | epoll_init(struct event_base *base)  | 
175  | 0  | { | 
176  | 0  |   epoll_handle epfd = INVALID_EPOLL_HANDLE;  | 
177  | 0  |   struct epollop *epollop;  | 
178  |  | 
  | 
179  | 0  | #ifdef EVENT__HAVE_EPOLL_CREATE1  | 
180  |  |   /* First, try the shiny new epoll_create1 interface, if we have it. */  | 
181  | 0  |   epfd = epoll_create1(EPOLL_CLOEXEC);  | 
182  | 0  | #endif  | 
183  | 0  |   if (epfd == INVALID_EPOLL_HANDLE) { | 
184  |  |     /* Initialize the kernel queue using the old interface.  (The  | 
185  |  |     size field is ignored   since 2.6.8.) */  | 
186  | 0  |     if ((epfd = epoll_create(32000)) == INVALID_EPOLL_HANDLE) { | 
187  | 0  |       if (errno != ENOSYS)  | 
188  | 0  |         event_warn("epoll_create"); | 
189  | 0  |       return (NULL);  | 
190  | 0  |     }  | 
191  | 0  | #ifndef EVENT__HAVE_WEPOLL  | 
192  | 0  |     evutil_make_socket_closeonexec(epfd);  | 
193  | 0  | #endif  | 
194  | 0  |   }  | 
195  |  |  | 
196  | 0  |   if (!(epollop = mm_calloc(1, sizeof(struct epollop)))) { | 
197  | 0  |     close_epoll_handle(epfd);  | 
198  | 0  |     return (NULL);  | 
199  | 0  |   }  | 
200  |  |  | 
201  | 0  |   epollop->epfd = epfd;  | 
202  |  |  | 
203  |  |   /* Initialize fields */  | 
204  | 0  |   epollop->events = mm_calloc(INITIAL_NEVENT, sizeof(struct epoll_event));  | 
205  | 0  |   if (epollop->events == NULL) { | 
206  | 0  |     mm_free(epollop);  | 
207  | 0  |     close_epoll_handle(epfd);  | 
208  | 0  |     return (NULL);  | 
209  | 0  |   }  | 
210  | 0  |   epollop->nevents = INITIAL_NEVENT;  | 
211  |  | 
  | 
212  | 0  | #ifndef EVENT__HAVE_WEPOLL  | 
213  | 0  |   if ((base->flags & EVENT_BASE_FLAG_EPOLL_USE_CHANGELIST) != 0 ||  | 
214  | 0  |       ((base->flags & EVENT_BASE_FLAG_IGNORE_ENV) == 0 &&  | 
215  | 0  |     evutil_getenv_("EVENT_EPOLL_USE_CHANGELIST") != NULL)) { | 
216  |  | 
  | 
217  | 0  |     base->evsel = &epollops_changelist;  | 
218  | 0  |   }  | 
219  | 0  | #endif  | 
220  |  | 
  | 
221  | 0  | #ifdef USING_TIMERFD  | 
222  |  |   /*  | 
223  |  |     The epoll interface ordinarily gives us one-millisecond precision,  | 
224  |  |     so on Linux it makes perfect sense to use the CLOCK_MONOTONIC_COARSE  | 
225  |  |     timer.  But when the user has set the new PRECISE_TIMER flag for an  | 
226  |  |     event_base, we can try to use timerfd to give them finer granularity.  | 
227  |  |   */  | 
228  | 0  |   if ((base->flags & EVENT_BASE_FLAG_PRECISE_TIMER) &&  | 
229  | 0  |       !(base->flags & EVENT_BASE_FLAG_EPOLL_DISALLOW_TIMERFD) &&  | 
230  | 0  |       base->monotonic_timer.monotonic_clock == CLOCK_MONOTONIC) { | 
231  | 0  |     int fd;  | 
232  | 0  |     fd = epollop->timerfd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK|TFD_CLOEXEC);  | 
233  | 0  |     if (epollop->timerfd >= 0) { | 
234  | 0  |       struct epoll_event epev;  | 
235  | 0  |       memset(&epev, 0, sizeof(epev));  | 
236  | 0  |       epev.data.fd = epollop->timerfd;  | 
237  | 0  |       epev.events = EPOLLIN;  | 
238  | 0  |       if (epoll_ctl(epollop->epfd, EPOLL_CTL_ADD, fd, &epev) < 0) { | 
239  | 0  |         event_warn("epoll_ctl(timerfd)"); | 
240  | 0  |         close(fd);  | 
241  | 0  |         epollop->timerfd = -1;  | 
242  | 0  |       }  | 
243  | 0  |     } else { | 
244  | 0  |       if (errno != EINVAL && errno != ENOSYS) { | 
245  |  |         /* These errors probably mean that we were  | 
246  |  |          * compiled with timerfd/TFD_* support, but  | 
247  |  |          * we're running on a kernel that lacks those.  | 
248  |  |          */  | 
249  | 0  |         event_warn("timerfd_create"); | 
250  | 0  |       }  | 
251  | 0  |       epollop->timerfd = -1;  | 
252  | 0  |     }  | 
253  | 0  |   } else { | 
254  | 0  |     epollop->timerfd = -1;  | 
255  | 0  |   }  | 
256  | 0  | #endif  | 
257  |  | 
  | 
258  | 0  |   if (sigfd_init_(base) < 0)  | 
259  | 0  |     evsig_init_(base);  | 
260  |  | 
  | 
261  | 0  |   return (epollop);  | 
262  | 0  | }  | 
263  |  |  | 
264  |  | static const char *  | 
265  |  | change_to_string(int change)  | 
266  | 0  | { | 
267  | 0  |   change &= (EV_CHANGE_ADD|EV_CHANGE_DEL);  | 
268  | 0  |   if (change == EV_CHANGE_ADD) { | 
269  | 0  |     return "add";  | 
270  | 0  |   } else if (change == EV_CHANGE_DEL) { | 
271  | 0  |     return "del";  | 
272  | 0  |   } else if (change == 0) { | 
273  | 0  |     return "none";  | 
274  | 0  |   } else { | 
275  | 0  |     return "???";  | 
276  | 0  |   }  | 
277  | 0  | }  | 
278  |  |  | 
279  |  | static const char *  | 
280  |  | epoll_op_to_string(int op)  | 
281  | 0  | { | 
282  | 0  |   return op == EPOLL_CTL_ADD?"ADD":  | 
283  | 0  |       op == EPOLL_CTL_DEL?"DEL":  | 
284  | 0  |       op == EPOLL_CTL_MOD?"MOD":  | 
285  | 0  |       "???";  | 
286  | 0  | }  | 
287  |  |  | 
288  |  | #define PRINT_CHANGES(op, events, ch, status)  \  | 
289  | 0  |   "Epoll %s(%d) on fd %d " status ". "       \  | 
290  | 0  |   "Old events were %d; "                     \  | 
291  | 0  |   "read change was %d (%s); "                \  | 
292  | 0  |   "write change was %d (%s); "               \  | 
293  | 0  |   "close change was %d (%s)",                \  | 
294  | 0  |   epoll_op_to_string(op),                    \  | 
295  | 0  |   events,                                    \  | 
296  | 0  |   ch->fd,                                    \  | 
297  | 0  |   ch->old_events,                            \  | 
298  | 0  |   ch->read_change,                           \  | 
299  | 0  |   change_to_string(ch->read_change),         \  | 
300  | 0  |   ch->write_change,                          \  | 
301  | 0  |   change_to_string(ch->write_change),        \  | 
302  | 0  |   ch->close_change,                          \  | 
303  | 0  |   change_to_string(ch->close_change)  | 
304  |  |  | 
305  |  | static int  | 
306  |  | epoll_apply_one_change(struct event_base *base,  | 
307  |  |     struct epollop *epollop,  | 
308  |  |     const struct event_change *ch)  | 
309  | 0  | { | 
310  | 0  |   struct epoll_event epev;  | 
311  | 0  |   int op, events = 0;  | 
312  | 0  |   int idx;  | 
313  |  | 
  | 
314  | 0  |   idx = EPOLL_OP_TABLE_INDEX(ch);  | 
315  | 0  |   op = epoll_op_table[idx].op;  | 
316  | 0  |   events = epoll_op_table[idx].events;  | 
317  |  | 
  | 
318  | 0  |   if (!events) { | 
319  | 0  |     EVUTIL_ASSERT(op == 0);  | 
320  | 0  |     return 0;  | 
321  | 0  |   }  | 
322  |  |  | 
323  | 0  |   if ((ch->read_change|ch->write_change|ch->close_change) & EV_CHANGE_ET)  | 
324  | 0  |     events |= EPOLLET;  | 
325  |  | 
  | 
326  | 0  |   memset(&epev, 0, sizeof(epev));  | 
327  |  | #ifdef EVENT__HAVE_WEPOLL  | 
328  |  |   epev.data.sock = ch->fd;  | 
329  |  | #else  | 
330  | 0  |   epev.data.fd = ch->fd;  | 
331  | 0  | #endif  | 
332  | 0  |   epev.events = events;  | 
333  | 0  |   if (epoll_ctl(epollop->epfd, op, ch->fd, &epev) == 0) { | 
334  | 0  |     event_debug((PRINT_CHANGES(op, epev.events, ch, "okay")));  | 
335  | 0  |     return 0;  | 
336  | 0  |   }  | 
337  |  |  | 
338  | 0  |   switch (op) { | 
339  | 0  |   case EPOLL_CTL_MOD:  | 
340  | 0  |     if (errno == ENOENT) { | 
341  |  |       /* If a MOD operation fails with ENOENT, the  | 
342  |  |        * fd was probably closed and re-opened.  We  | 
343  |  |        * should retry the operation as an ADD.  | 
344  |  |        */  | 
345  | 0  |       if (epoll_ctl(epollop->epfd, EPOLL_CTL_ADD, ch->fd, &epev) == -1) { | 
346  | 0  |         event_warn("Epoll MOD(%d) on %d retried as ADD; that failed too", | 
347  | 0  |             (int)epev.events, ch->fd);  | 
348  | 0  |         return -1;  | 
349  | 0  |       } else { | 
350  | 0  |         event_debug(("Epoll MOD(%d) on %d retried as ADD; succeeded.", | 
351  | 0  |           (int)epev.events,  | 
352  | 0  |           ch->fd));  | 
353  | 0  |         return 0;  | 
354  | 0  |       }  | 
355  | 0  |     }  | 
356  | 0  |     break;  | 
357  | 0  |   case EPOLL_CTL_ADD:  | 
358  | 0  |     if (errno == EEXIST) { | 
359  |  |       /* If an ADD operation fails with EEXIST,  | 
360  |  |        * either the operation was redundant (as with a  | 
361  |  |        * precautionary add), or we ran into a fun  | 
362  |  |        * kernel bug where using dup*() to duplicate the  | 
363  |  |        * same file into the same fd gives you the same epitem  | 
364  |  |        * rather than a fresh one.  For the second case,  | 
365  |  |        * we must retry with MOD. */  | 
366  | 0  |       if (epoll_ctl(epollop->epfd, EPOLL_CTL_MOD, ch->fd, &epev) == -1) { | 
367  | 0  |         event_warn("Epoll ADD(%d) on %d retried as MOD; that failed too", | 
368  | 0  |             (int)epev.events, ch->fd);  | 
369  | 0  |         return -1;  | 
370  | 0  |       } else { | 
371  | 0  |         event_debug(("Epoll ADD(%d) on %d retried as MOD; succeeded.", | 
372  | 0  |           (int)epev.events,  | 
373  | 0  |           ch->fd));  | 
374  | 0  |         return 0;  | 
375  | 0  |       }  | 
376  | 0  |     }  | 
377  | 0  |     break;  | 
378  | 0  |   case EPOLL_CTL_DEL:  | 
379  | 0  |     if (errno == ENOENT || errno == EBADF || errno == EPERM) { | 
380  |  |       /* If a delete fails with one of these errors,  | 
381  |  |        * that's fine too: we closed the fd before we  | 
382  |  |        * got around to calling epoll_dispatch. */  | 
383  | 0  |       event_debug(("Epoll DEL(%d) on fd %d gave %s: DEL was unnecessary.", | 
384  | 0  |         (int)epev.events,  | 
385  | 0  |         ch->fd,  | 
386  | 0  |         strerror(errno)));  | 
387  | 0  |       return 0;  | 
388  | 0  |     }  | 
389  | 0  |     break;  | 
390  | 0  |   default:  | 
391  | 0  |     break;  | 
392  | 0  |   }  | 
393  |  |  | 
394  | 0  |   event_warn(PRINT_CHANGES(op, epev.events, ch, "failed"));  | 
395  | 0  |   return -1;  | 
396  | 0  | }  | 
397  |  |  | 
398  |  | static int  | 
399  |  | epoll_apply_changes(struct event_base *base)  | 
400  | 0  | { | 
401  | 0  |   struct event_changelist *changelist = &base->changelist;  | 
402  | 0  |   struct epollop *epollop = base->evbase;  | 
403  | 0  |   struct event_change *ch;  | 
404  |  | 
  | 
405  | 0  |   int r = 0;  | 
406  | 0  |   int i;  | 
407  |  | 
  | 
408  | 0  |   for (i = 0; i < changelist->n_changes; ++i) { | 
409  | 0  |     ch = &changelist->changes[i];  | 
410  | 0  |     if (epoll_apply_one_change(base, epollop, ch) < 0)  | 
411  | 0  |       r = -1;  | 
412  | 0  |   }  | 
413  |  | 
  | 
414  | 0  |   return (r);  | 
415  | 0  | }  | 
416  |  |  | 
417  |  | static int  | 
418  |  | epoll_nochangelist_add(struct event_base *base, evutil_socket_t fd,  | 
419  |  |     short old, short events, void *p)  | 
420  | 0  | { | 
421  | 0  |   struct event_change ch;  | 
422  | 0  |   ch.fd = fd;  | 
423  | 0  |   ch.old_events = old;  | 
424  | 0  |   ch.read_change = ch.write_change = ch.close_change = 0;  | 
425  | 0  |   if (events & EV_WRITE)  | 
426  | 0  |     ch.write_change = EV_CHANGE_ADD |  | 
427  | 0  |         (events & EV_ET);  | 
428  | 0  |   if (events & EV_READ)  | 
429  | 0  |     ch.read_change = EV_CHANGE_ADD |  | 
430  | 0  |         (events & EV_ET);  | 
431  | 0  |   if (events & EV_CLOSED)  | 
432  | 0  |     ch.close_change = EV_CHANGE_ADD |  | 
433  | 0  |         (events & EV_ET);  | 
434  |  | 
  | 
435  | 0  |   return epoll_apply_one_change(base, base->evbase, &ch);  | 
436  | 0  | }  | 
437  |  |  | 
438  |  | static int  | 
439  |  | epoll_nochangelist_del(struct event_base *base, evutil_socket_t fd,  | 
440  |  |     short old, short events, void *p)  | 
441  | 0  | { | 
442  | 0  |   struct event_change ch;  | 
443  | 0  |   ch.fd = fd;  | 
444  | 0  |   ch.old_events = old;  | 
445  | 0  |   ch.read_change = ch.write_change = ch.close_change = 0;  | 
446  | 0  |   if (events & EV_WRITE)  | 
447  | 0  |     ch.write_change = EV_CHANGE_DEL |  | 
448  | 0  |         (events & EV_ET);  | 
449  | 0  |   if (events & EV_READ)  | 
450  | 0  |     ch.read_change = EV_CHANGE_DEL |  | 
451  | 0  |         (events & EV_ET);  | 
452  | 0  |   if (events & EV_CLOSED)  | 
453  | 0  |     ch.close_change = EV_CHANGE_DEL |  | 
454  | 0  |         (events & EV_ET);  | 
455  |  | 
  | 
456  | 0  |   return epoll_apply_one_change(base, base->evbase, &ch);  | 
457  | 0  | }  | 
458  |  |  | 
459  |  | static int  | 
460  |  | epoll_dispatch(struct event_base *base, struct timeval *tv)  | 
461  | 0  | { | 
462  | 0  |   struct epollop *epollop = base->evbase;  | 
463  | 0  |   struct epoll_event *events = epollop->events;  | 
464  | 0  |   int i, res;  | 
465  |  | #if defined(EVENT__HAVE_EPOLL_PWAIT2)  | 
466  |  |   struct timespec ts = { 0, 0 }; | 
467  |  | #else /* no epoll_pwait2() */  | 
468  | 0  |   long timeout = -1;  | 
469  | 0  | #endif /* EVENT__HAVE_EPOLL_PWAIT2 */  | 
470  |  | 
  | 
471  | 0  | #ifdef USING_TIMERFD  | 
472  | 0  |   if (epollop->timerfd >= 0) { | 
473  | 0  |     struct itimerspec is;  | 
474  | 0  |     is.it_interval.tv_sec = 0;  | 
475  | 0  |     is.it_interval.tv_nsec = 0;  | 
476  | 0  |     if (tv == NULL) { | 
477  |  |       /* No timeout; disarm the timer. */  | 
478  | 0  |       is.it_value.tv_sec = 0;  | 
479  | 0  |       is.it_value.tv_nsec = 0;  | 
480  | 0  |     } else { | 
481  | 0  |       if (tv->tv_sec == 0 && tv->tv_usec == 0) { | 
482  |  |         /* we need to exit immediately; timerfd can't  | 
483  |  |          * do that. */  | 
484  | 0  |         timeout = 0;  | 
485  | 0  |       }  | 
486  | 0  |       is.it_value.tv_sec = tv->tv_sec;  | 
487  | 0  |       is.it_value.tv_nsec = tv->tv_usec * 1000;  | 
488  | 0  |     }  | 
489  |  |     /* TODO: we could avoid unnecessary syscalls here by only  | 
490  |  |        calling timerfd_settime when the top timeout changes, or  | 
491  |  |        when we're called with a different timeval.  | 
492  |  |     */  | 
493  | 0  |     if (timerfd_settime(epollop->timerfd, 0, &is, NULL) < 0) { | 
494  | 0  |       event_warn("timerfd_settime"); | 
495  | 0  |     }  | 
496  | 0  |   } else  | 
497  | 0  | #endif  | 
498  | 0  |   if (tv != NULL) { | 
499  |  | #if defined(EVENT__HAVE_EPOLL_PWAIT2)  | 
500  |  |     TIMEVAL_TO_TIMESPEC(tv, &ts);  | 
501  |  | #else /* no epoll_pwait2() */  | 
502  | 0  |     timeout = evutil_tv_to_msec_(tv);  | 
503  | 0  |     if (timeout < 0 || timeout > MAX_EPOLL_TIMEOUT_MSEC) { | 
504  |  |       /* Linux kernels can wait forever if the timeout is  | 
505  |  |        * too big; see comment on MAX_EPOLL_TIMEOUT_MSEC. */  | 
506  | 0  |       timeout = MAX_EPOLL_TIMEOUT_MSEC;  | 
507  | 0  |     }  | 
508  | 0  | #endif /* EVENT__HAVE_EPOLL_PWAIT2 */  | 
509  | 0  |   }  | 
510  |  | 
  | 
511  | 0  |   epoll_apply_changes(base);  | 
512  | 0  |   event_changelist_remove_all_(&base->changelist, base);  | 
513  |  | 
  | 
514  | 0  |   EVBASE_RELEASE_LOCK(base, th_base_lock);  | 
515  |  | 
  | 
516  |  | #if defined(EVENT__HAVE_EPOLL_PWAIT2)  | 
517  |  |   res = epoll_pwait2(epollop->epfd, events, epollop->nevents, tv ? &ts : NULL, NULL);  | 
518  |  | #else /* no epoll_pwait2() */  | 
519  | 0  |   res = epoll_wait(epollop->epfd, events, epollop->nevents, timeout);  | 
520  | 0  | #endif /* EVENT__HAVE_EPOLL_PWAIT2 */  | 
521  |  | 
  | 
522  | 0  |   EVBASE_ACQUIRE_LOCK(base, th_base_lock);  | 
523  |  | 
  | 
524  | 0  |   if (res == -1) { | 
525  | 0  |     if (errno != EINTR) { | 
526  | 0  |       event_warn("epoll_wait"); | 
527  | 0  |       return (-1);  | 
528  | 0  |     }  | 
529  |  |  | 
530  | 0  |     return (0);  | 
531  | 0  |   }  | 
532  |  |  | 
533  | 0  |   event_debug(("%s: epoll_wait reports %d", __func__, res)); | 
534  | 0  |   EVUTIL_ASSERT(res <= epollop->nevents);  | 
535  |  | 
  | 
536  | 0  |   for (i = 0; i < res; i++) { | 
537  | 0  |     int what = events[i].events;  | 
538  | 0  |     short ev = 0;  | 
539  | 0  | #ifdef USING_TIMERFD  | 
540  | 0  |     if (events[i].data.fd == epollop->timerfd)  | 
541  | 0  |       continue;  | 
542  | 0  | #endif  | 
543  |  |  | 
544  | 0  |     if (what & EPOLLERR) { | 
545  | 0  |       ev = EV_READ | EV_WRITE;  | 
546  | 0  |     } else if ((what & EPOLLHUP) && !(what & EPOLLRDHUP)) { | 
547  | 0  |       ev = EV_READ | EV_WRITE;  | 
548  | 0  |     } else { | 
549  | 0  |       if (what & EPOLLIN)  | 
550  | 0  |         ev |= EV_READ;  | 
551  | 0  |       if (what & EPOLLOUT)  | 
552  | 0  |         ev |= EV_WRITE;  | 
553  | 0  |       if (what & EPOLLRDHUP)  | 
554  | 0  |         ev |= EV_CLOSED;  | 
555  | 0  |     }  | 
556  |  | 
  | 
557  | 0  |     if (!ev)  | 
558  | 0  |       continue;  | 
559  |  |  | 
560  |  | #ifdef EVENT__HAVE_WEPOLL  | 
561  |  |     evmap_io_active_(base, events[i].data.sock, ev);  | 
562  |  | #else  | 
563  | 0  |     evmap_io_active_(base, events[i].data.fd, ev | EV_ET);  | 
564  | 0  | #endif  | 
565  | 0  |   }  | 
566  |  | 
  | 
567  | 0  |   if (res == epollop->nevents && epollop->nevents < MAX_NEVENT) { | 
568  |  |     /* We used all of the event space this time.  We should  | 
569  |  |        be ready for more events next time. */  | 
570  | 0  |     int new_nevents = epollop->nevents * 2;  | 
571  | 0  |     struct epoll_event *new_events;  | 
572  |  | 
  | 
573  | 0  |     new_events = mm_realloc(epollop->events,  | 
574  | 0  |         new_nevents * sizeof(struct epoll_event));  | 
575  | 0  |     if (new_events) { | 
576  | 0  |       epollop->events = new_events;  | 
577  | 0  |       epollop->nevents = new_nevents;  | 
578  | 0  |     }  | 
579  | 0  |   }  | 
580  |  | 
  | 
581  | 0  |   return (0);  | 
582  | 0  | }  | 
583  |  |  | 
584  |  |  | 
585  |  | static void  | 
586  |  | epoll_dealloc(struct event_base *base)  | 
587  | 0  | { | 
588  | 0  |   struct epollop *epollop = base->evbase;  | 
589  |  | 
  | 
590  | 0  |   evsig_dealloc_(base);  | 
591  | 0  |   if (epollop->events)  | 
592  | 0  |     mm_free(epollop->events);  | 
593  | 0  |   if (epollop->epfd != INVALID_EPOLL_HANDLE)  | 
594  | 0  |     close_epoll_handle(epollop->epfd);  | 
595  | 0  | #ifdef USING_TIMERFD  | 
596  | 0  |   if (epollop->timerfd >= 0)  | 
597  | 0  |     close(epollop->timerfd);  | 
598  | 0  | #endif  | 
599  |  | 
  | 
600  | 0  |   memset(epollop, 0, sizeof(struct epollop));  | 
601  | 0  |   mm_free(epollop);  | 
602  | 0  | }  | 
603  |  |  | 
604  |  | #endif /* defined EVENT__HAVE_EPOLL || defined EVENT__HAVE_WEPOLL */  |