/src/dovecot/src/lib/ioloop-epoll.c
Line | Count | Source |
1 | | /* Copyright (c) 2004-2018 Dovecot authors, see the included COPYING file */ |
2 | | |
3 | | #include "lib.h" |
4 | | #include "array.h" |
5 | | #include "sleep.h" |
6 | | #include "ioloop-private.h" |
7 | | #include "ioloop-iolist.h" |
8 | | |
9 | | #ifdef IOLOOP_EPOLL |
10 | | |
11 | | #include <sys/epoll.h> |
12 | | #include <unistd.h> |
13 | | |
14 | | struct ioloop_handler_context { |
15 | | int epfd; |
16 | | |
17 | | unsigned int deleted_count; |
18 | | ARRAY(struct io_list *) fd_index; |
19 | | ARRAY(struct epoll_event) events; |
20 | | }; |
21 | | |
22 | | void io_loop_handler_init(struct ioloop *ioloop, unsigned int initial_fd_count) |
23 | 0 | { |
24 | 0 | struct ioloop_handler_context *ctx; |
25 | |
|
26 | 0 | ioloop->handler_context = ctx = i_new(struct ioloop_handler_context, 1); |
27 | |
|
28 | 0 | i_array_init(&ctx->events, initial_fd_count); |
29 | 0 | i_array_init(&ctx->fd_index, initial_fd_count); |
30 | |
|
31 | 0 | ctx->epfd = epoll_create(initial_fd_count); |
32 | 0 | if (ctx->epfd < 0) { |
33 | 0 | if (errno != EMFILE) |
34 | 0 | i_fatal("epoll_create(): %m"); |
35 | 0 | else { |
36 | 0 | i_fatal("epoll_create(): %m (you may need to increase " |
37 | 0 | "/proc/sys/fs/epoll/max_user_instances)"); |
38 | 0 | } |
39 | 0 | } |
40 | 0 | fd_close_on_exec(ctx->epfd, TRUE); |
41 | 0 | } |
42 | | |
43 | | void io_loop_handler_deinit(struct ioloop *ioloop) |
44 | 0 | { |
45 | 0 | struct ioloop_handler_context *ctx = ioloop->handler_context; |
46 | 0 | struct io_list **list; |
47 | 0 | unsigned int i, count; |
48 | |
|
49 | 0 | list = array_get_modifiable(&ctx->fd_index, &count); |
50 | 0 | for (i = 0; i < count; i++) |
51 | 0 | i_free(list[i]); |
52 | |
|
53 | 0 | if (close(ctx->epfd) < 0) |
54 | 0 | i_error("close(epoll) failed: %m"); |
55 | 0 | array_free(&ioloop->handler_context->fd_index); |
56 | 0 | array_free(&ioloop->handler_context->events); |
57 | 0 | i_free(ioloop->handler_context); |
58 | 0 | } |
59 | | |
60 | 0 | #define IO_EPOLL_ERROR (EPOLLERR | EPOLLHUP) |
61 | 0 | #define IO_EPOLL_INPUT (EPOLLIN | EPOLLPRI | IO_EPOLL_ERROR) |
62 | 0 | #define IO_EPOLL_OUTPUT (EPOLLOUT | IO_EPOLL_ERROR) |
63 | | |
64 | | static int epoll_event_mask(struct io_list *list) |
65 | 0 | { |
66 | 0 | int events = 0, i; |
67 | 0 | struct io_file *io; |
68 | |
|
69 | 0 | for (i = 0; i < IOLOOP_IOLIST_IOS_PER_FD; i++) { |
70 | 0 | io = list->ios[i]; |
71 | |
|
72 | 0 | if (io == NULL) |
73 | 0 | continue; |
74 | | |
75 | 0 | if ((io->io.condition & IO_READ) != 0) |
76 | 0 | events |= IO_EPOLL_INPUT; |
77 | 0 | if ((io->io.condition & IO_WRITE) != 0) |
78 | 0 | events |= IO_EPOLL_OUTPUT; |
79 | 0 | if ((io->io.condition & IO_ERROR) != 0) |
80 | 0 | events |= IO_EPOLL_ERROR; |
81 | 0 | } |
82 | |
|
83 | 0 | return events; |
84 | 0 | } |
85 | | |
86 | | void io_loop_handle_add(struct io_file *io) |
87 | 0 | { |
88 | 0 | struct ioloop_handler_context *ctx = io->io.ioloop->handler_context; |
89 | 0 | struct io_list **list; |
90 | 0 | struct epoll_event event; |
91 | 0 | int op; |
92 | 0 | bool first; |
93 | |
|
94 | 0 | list = array_idx_get_space(&ctx->fd_index, io->fd); |
95 | 0 | if (*list == NULL) |
96 | 0 | *list = i_new(struct io_list, 1); |
97 | |
|
98 | 0 | first = ioloop_iolist_add(*list, io); |
99 | |
|
100 | 0 | i_zero(&event); |
101 | 0 | event.data.ptr = *list; |
102 | 0 | event.events = epoll_event_mask(*list); |
103 | |
|
104 | 0 | op = first ? EPOLL_CTL_ADD : EPOLL_CTL_MOD; |
105 | |
|
106 | 0 | if (epoll_ctl(ctx->epfd, op, io->fd, &event) < 0) { |
107 | 0 | if (errno == EPERM && op == EPOLL_CTL_ADD) { |
108 | 0 | i_panic("epoll_ctl(add, %d) failed: %m " |
109 | 0 | "(fd doesn't support epoll%s)", io->fd, |
110 | 0 | io->fd != STDIN_FILENO ? "" : |
111 | 0 | " - instead of '<file', try 'cat file|'"); |
112 | 0 | } |
113 | 0 | i_panic("epoll_ctl(%s, %d) failed: %m", |
114 | 0 | op == EPOLL_CTL_ADD ? "add" : "mod", io->fd); |
115 | 0 | } |
116 | | |
117 | 0 | if (first) { |
118 | | /* allow epoll_wait() to return the maximum number of events |
119 | | by keeping space allocated for each file descriptor */ |
120 | 0 | if (ctx->deleted_count > 0) |
121 | 0 | ctx->deleted_count--; |
122 | 0 | else |
123 | 0 | array_append_zero(&ctx->events); |
124 | 0 | } |
125 | 0 | } |
126 | | |
127 | | void io_loop_handle_remove(struct io_file *io, bool closed) |
128 | 0 | { |
129 | 0 | struct ioloop_handler_context *ctx = io->io.ioloop->handler_context; |
130 | 0 | struct io_list **list; |
131 | 0 | struct epoll_event event; |
132 | 0 | int op; |
133 | 0 | bool last; |
134 | |
|
135 | 0 | list = array_idx_modifiable(&ctx->fd_index, io->fd); |
136 | 0 | last = ioloop_iolist_del(*list, io); |
137 | |
|
138 | 0 | if (!closed) { |
139 | 0 | i_zero(&event); |
140 | 0 | event.data.ptr = *list; |
141 | 0 | event.events = epoll_event_mask(*list); |
142 | |
|
143 | 0 | op = last ? EPOLL_CTL_DEL : EPOLL_CTL_MOD; |
144 | |
|
145 | 0 | if (epoll_ctl(ctx->epfd, op, io->fd, &event) < 0) { |
146 | 0 | const char *errstr = t_strdup_printf( |
147 | 0 | "epoll_ctl(%s, %d) failed: %m", |
148 | 0 | op == EPOLL_CTL_DEL ? "del" : "mod", io->fd); |
149 | 0 | if (errno != ENOSPC && errno != ENOMEM) |
150 | 0 | i_panic("%s", errstr); |
151 | 0 | else |
152 | 0 | i_error("%s", errstr); |
153 | 0 | } |
154 | 0 | } |
155 | 0 | if (last) { |
156 | | /* since we're not freeing memory in any case, just increase |
157 | | deleted counter so next handle_add() can just decrease it |
158 | | instead of appending to the events array */ |
159 | 0 | ctx->deleted_count++; |
160 | 0 | } |
161 | 0 | i_free(io); |
162 | 0 | } |
163 | | |
164 | | void io_loop_handler_run_internal(struct ioloop *ioloop) |
165 | 0 | { |
166 | 0 | struct ioloop_handler_context *ctx = ioloop->handler_context; |
167 | 0 | struct epoll_event *events; |
168 | 0 | const struct epoll_event *event; |
169 | 0 | struct io_list *list; |
170 | 0 | struct io_file *io; |
171 | 0 | struct timeval tv; |
172 | 0 | unsigned int events_count; |
173 | 0 | int msecs, ret, i, j; |
174 | 0 | bool call; |
175 | |
|
176 | 0 | i_assert(ctx != NULL); |
177 | | |
178 | | /* get the time left for next timeout task */ |
179 | 0 | msecs = io_loop_run_get_wait_time(ioloop, &tv); |
180 | |
|
181 | 0 | events = array_get_modifiable(&ctx->events, &events_count); |
182 | 0 | if (ioloop->io_files != NULL && events_count > ctx->deleted_count) { |
183 | 0 | ret = epoll_wait(ctx->epfd, events, events_count, msecs); |
184 | 0 | if (ret < 0 && errno != EINTR) |
185 | 0 | i_fatal("epoll_wait(): %m"); |
186 | 0 | } else { |
187 | | /* no I/Os, but we should have some timeouts. |
188 | | just wait for them. */ |
189 | 0 | i_assert(msecs >= 0); |
190 | 0 | i_sleep_intr_msecs(msecs); |
191 | 0 | ret = 0; |
192 | 0 | } |
193 | | |
194 | | /* execute timeout handlers */ |
195 | 0 | io_loop_handle_timeouts(ioloop); |
196 | |
|
197 | 0 | if (!ioloop->running) |
198 | 0 | return; |
199 | | |
200 | 0 | for (i = 0; i < ret; i++) { |
201 | | /* io_loop_handle_add() may cause events array reallocation, |
202 | | so we have use array_idx() */ |
203 | 0 | event = array_idx(&ctx->events, i); |
204 | 0 | list = event->data.ptr; |
205 | |
|
206 | 0 | for (j = 0; j < IOLOOP_IOLIST_IOS_PER_FD; j++) { |
207 | 0 | io = list->ios[j]; |
208 | 0 | if (io == NULL) |
209 | 0 | continue; |
210 | | |
211 | 0 | call = FALSE; |
212 | 0 | if ((event->events & (EPOLLHUP | EPOLLERR)) != 0) |
213 | 0 | call = TRUE; |
214 | 0 | else if ((io->io.condition & IO_READ) != 0) |
215 | 0 | call = (event->events & EPOLLIN) != 0; |
216 | 0 | else if ((io->io.condition & IO_WRITE) != 0) |
217 | 0 | call = (event->events & EPOLLOUT) != 0; |
218 | 0 | else if ((io->io.condition & IO_ERROR) != 0) |
219 | 0 | call = (event->events & IO_EPOLL_ERROR) != 0; |
220 | |
|
221 | 0 | if (call) { |
222 | 0 | io_loop_call_io(&io->io); |
223 | 0 | if (!ioloop->running) |
224 | 0 | return; |
225 | 0 | } |
226 | 0 | } |
227 | 0 | } |
228 | 0 | } |
229 | | |
230 | | #endif /* IOLOOP_EPOLL */ |