This source file includes following definitions.
- epoll_init
- change_to_string
- epoll_op_to_string
- epoll_apply_one_change
- epoll_apply_changes
- epoll_nochangelist_add
- epoll_nochangelist_del
- epoll_dispatch
- epoll_dealloc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27 #include "event2/event-config.h"
28
29 #include <stdint.h>
30 #include <sys/types.h>
31 #include <sys/resource.h>
32 #ifdef _EVENT_HAVE_SYS_TIME_H
33 #include <sys/time.h>
34 #endif
35 #include <sys/queue.h>
36 #include <sys/epoll.h>
37 #include <signal.h>
38 #include <limits.h>
39 #include <stdio.h>
40 #include <stdlib.h>
41 #include <string.h>
42 #include <unistd.h>
43 #include <errno.h>
44 #ifdef _EVENT_HAVE_FCNTL_H
45 #include <fcntl.h>
46 #endif
47
48 #include "event-internal.h"
49 #include "evsignal-internal.h"
50 #include "event2/thread.h"
51 #include "evthread-internal.h"
52 #include "log-internal.h"
53 #include "evmap-internal.h"
54 #include "changelist-internal.h"
55
56 struct epollop {
57 struct epoll_event *events;
58 int nevents;
59 int epfd;
60 };
61
62 static void *epoll_init(struct event_base *);
63 static int epoll_dispatch(struct event_base *, struct timeval *);
64 static void epoll_dealloc(struct event_base *);
65
66 static const struct eventop epollops_changelist = {
67 "epoll (with changelist)",
68 epoll_init,
69 event_changelist_add,
70 event_changelist_del,
71 epoll_dispatch,
72 epoll_dealloc,
73 1,
74 EV_FEATURE_ET|EV_FEATURE_O1,
75 EVENT_CHANGELIST_FDINFO_SIZE
76 };
77
78
79 static int epoll_nochangelist_add(struct event_base *base, evutil_socket_t fd,
80 short old, short events, void *p);
81 static int epoll_nochangelist_del(struct event_base *base, evutil_socket_t fd,
82 short old, short events, void *p);
83
84 const struct eventop epollops = {
85 "epoll",
86 epoll_init,
87 epoll_nochangelist_add,
88 epoll_nochangelist_del,
89 epoll_dispatch,
90 epoll_dealloc,
91 1,
92 EV_FEATURE_ET|EV_FEATURE_O1,
93 0
94 };
95
96 #define INITIAL_NEVENT 32
97 #define MAX_NEVENT 4096
98
99
100
101
102
103
104
105 #define MAX_EPOLL_TIMEOUT_MSEC (35*60*1000)
106
107 static void *
108 epoll_init(struct event_base *base)
109 {
110 int epfd;
111 struct epollop *epollop;
112
113
114
115 if ((epfd = epoll_create(32000)) == -1) {
116 if (errno != ENOSYS)
117 event_warn("epoll_create");
118 return (NULL);
119 }
120
121 evutil_make_socket_closeonexec(epfd);
122
123 if (!(epollop = mm_calloc(1, sizeof(struct epollop)))) {
124 close(epfd);
125 return (NULL);
126 }
127
128 epollop->epfd = epfd;
129
130
131 epollop->events = mm_calloc(INITIAL_NEVENT, sizeof(struct epoll_event));
132 if (epollop->events == NULL) {
133 mm_free(epollop);
134 close(epfd);
135 return (NULL);
136 }
137 epollop->nevents = INITIAL_NEVENT;
138
139 if ((base->flags & EVENT_BASE_FLAG_EPOLL_USE_CHANGELIST) != 0 ||
140 ((base->flags & EVENT_BASE_FLAG_IGNORE_ENV) == 0 &&
141 evutil_getenv("EVENT_EPOLL_USE_CHANGELIST") != NULL))
142 base->evsel = &epollops_changelist;
143
144 evsig_init(base);
145
146 return (epollop);
147 }
148
149 static const char *
150 change_to_string(int change)
151 {
152 change &= (EV_CHANGE_ADD|EV_CHANGE_DEL);
153 if (change == EV_CHANGE_ADD) {
154 return "add";
155 } else if (change == EV_CHANGE_DEL) {
156 return "del";
157 } else if (change == 0) {
158 return "none";
159 } else {
160 return "???";
161 }
162 }
163
164 static const char *
165 epoll_op_to_string(int op)
166 {
167 return op == EPOLL_CTL_ADD?"ADD":
168 op == EPOLL_CTL_DEL?"DEL":
169 op == EPOLL_CTL_MOD?"MOD":
170 "???";
171 }
172
173 static int
174 epoll_apply_one_change(struct event_base *base,
175 struct epollop *epollop,
176 const struct event_change *ch)
177 {
178 struct epoll_event epev;
179 int op, events = 0;
180
181 if (1) {
182
183
184
185
186
187
188
189
190
191
192
193
194 if ((ch->read_change & EV_CHANGE_ADD) ||
195 (ch->write_change & EV_CHANGE_ADD)) {
196
197
198 events = 0;
199 op = EPOLL_CTL_ADD;
200 if (ch->read_change & EV_CHANGE_ADD) {
201 events |= EPOLLIN;
202 } else if (ch->read_change & EV_CHANGE_DEL) {
203 ;
204 } else if (ch->old_events & EV_READ) {
205 events |= EPOLLIN;
206 }
207 if (ch->write_change & EV_CHANGE_ADD) {
208 events |= EPOLLOUT;
209 } else if (ch->write_change & EV_CHANGE_DEL) {
210 ;
211 } else if (ch->old_events & EV_WRITE) {
212 events |= EPOLLOUT;
213 }
214 if ((ch->read_change|ch->write_change) & EV_ET)
215 events |= EPOLLET;
216
217 if (ch->old_events) {
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232 op = EPOLL_CTL_MOD;
233 }
234 } else if ((ch->read_change & EV_CHANGE_DEL) ||
235 (ch->write_change & EV_CHANGE_DEL)) {
236
237
238 op = EPOLL_CTL_DEL;
239
240 if (ch->read_change & EV_CHANGE_DEL) {
241 if (ch->write_change & EV_CHANGE_DEL) {
242 events = EPOLLIN|EPOLLOUT;
243 } else if (ch->old_events & EV_WRITE) {
244 events = EPOLLOUT;
245 op = EPOLL_CTL_MOD;
246 } else {
247 events = EPOLLIN;
248 }
249 } else if (ch->write_change & EV_CHANGE_DEL) {
250 if (ch->old_events & EV_READ) {
251 events = EPOLLIN;
252 op = EPOLL_CTL_MOD;
253 } else {
254 events = EPOLLOUT;
255 }
256 }
257 }
258
259 if (!events)
260 return 0;
261
262 memset(&epev, 0, sizeof(epev));
263 epev.data.fd = ch->fd;
264 epev.events = events;
265 if (epoll_ctl(epollop->epfd, op, ch->fd, &epev) == -1) {
266 if (op == EPOLL_CTL_MOD && errno == ENOENT) {
267
268
269
270
271 if (epoll_ctl(epollop->epfd, EPOLL_CTL_ADD, ch->fd, &epev) == -1) {
272 event_warn("Epoll MOD(%d) on %d retried as ADD; that failed too",
273 (int)epev.events, ch->fd);
274 return -1;
275 } else {
276 event_debug(("Epoll MOD(%d) on %d retried as ADD; succeeded.",
277 (int)epev.events,
278 ch->fd));
279 }
280 } else if (op == EPOLL_CTL_ADD && errno == EEXIST) {
281
282
283
284
285
286
287
288 if (epoll_ctl(epollop->epfd, EPOLL_CTL_MOD, ch->fd, &epev) == -1) {
289 event_warn("Epoll ADD(%d) on %d retried as MOD; that failed too",
290 (int)epev.events, ch->fd);
291 return -1;
292 } else {
293 event_debug(("Epoll ADD(%d) on %d retried as MOD; succeeded.",
294 (int)epev.events,
295 ch->fd));
296 }
297 } else if (op == EPOLL_CTL_DEL &&
298 (errno == ENOENT || errno == EBADF ||
299 errno == EPERM)) {
300
301
302
303 event_debug(("Epoll DEL(%d) on fd %d gave %s: DEL was unnecessary.",
304 (int)epev.events,
305 ch->fd,
306 strerror(errno)));
307 } else {
308 event_warn("Epoll %s(%d) on fd %d failed. Old events were %d; read change was %d (%s); write change was %d (%s)",
309 epoll_op_to_string(op),
310 (int)epev.events,
311 ch->fd,
312 ch->old_events,
313 ch->read_change,
314 change_to_string(ch->read_change),
315 ch->write_change,
316 change_to_string(ch->write_change));
317 return -1;
318 }
319 } else {
320 event_debug(("Epoll %s(%d) on fd %d okay. [old events were %d; read change was %d; write change was %d]",
321 epoll_op_to_string(op),
322 (int)epev.events,
323 (int)ch->fd,
324 ch->old_events,
325 ch->read_change,
326 ch->write_change));
327 }
328 }
329 return 0;
330 }
331
332 static int
333 epoll_apply_changes(struct event_base *base)
334 {
335 struct event_changelist *changelist = &base->changelist;
336 struct epollop *epollop = base->evbase;
337 struct event_change *ch;
338
339 int r = 0;
340 int i;
341
342 for (i = 0; i < changelist->n_changes; ++i) {
343 ch = &changelist->changes[i];
344 if (epoll_apply_one_change(base, epollop, ch) < 0)
345 r = -1;
346 }
347
348 return (r);
349 }
350
351 static int
352 epoll_nochangelist_add(struct event_base *base, evutil_socket_t fd,
353 short old, short events, void *p)
354 {
355 struct event_change ch;
356 ch.fd = fd;
357 ch.old_events = old;
358 ch.read_change = ch.write_change = 0;
359 if (events & EV_WRITE)
360 ch.write_change = EV_CHANGE_ADD |
361 (events & EV_ET);
362 if (events & EV_READ)
363 ch.read_change = EV_CHANGE_ADD |
364 (events & EV_ET);
365
366 return epoll_apply_one_change(base, base->evbase, &ch);
367 }
368
369 static int
370 epoll_nochangelist_del(struct event_base *base, evutil_socket_t fd,
371 short old, short events, void *p)
372 {
373 struct event_change ch;
374 ch.fd = fd;
375 ch.old_events = old;
376 ch.read_change = ch.write_change = 0;
377 if (events & EV_WRITE)
378 ch.write_change = EV_CHANGE_DEL;
379 if (events & EV_READ)
380 ch.read_change = EV_CHANGE_DEL;
381
382 return epoll_apply_one_change(base, base->evbase, &ch);
383 }
384
385 static int
386 epoll_dispatch(struct event_base *base, struct timeval *tv)
387 {
388 struct epollop *epollop = base->evbase;
389 struct epoll_event *events = epollop->events;
390 int i, res;
391 long timeout = -1;
392
393 if (tv != NULL) {
394 timeout = evutil_tv_to_msec(tv);
395 if (timeout < 0 || timeout > MAX_EPOLL_TIMEOUT_MSEC) {
396
397
398 timeout = MAX_EPOLL_TIMEOUT_MSEC;
399 }
400 }
401
402 epoll_apply_changes(base);
403 event_changelist_remove_all(&base->changelist, base);
404
405 EVBASE_RELEASE_LOCK(base, th_base_lock);
406
407 res = epoll_wait(epollop->epfd, events, epollop->nevents, timeout);
408
409 EVBASE_ACQUIRE_LOCK(base, th_base_lock);
410
411 if (res == -1) {
412 if (errno != EINTR) {
413 event_warn("epoll_wait");
414 return (-1);
415 }
416
417 return (0);
418 }
419
420 event_debug(("%s: epoll_wait reports %d", __func__, res));
421 EVUTIL_ASSERT(res <= epollop->nevents);
422
423 for (i = 0; i < res; i++) {
424 int what = events[i].events;
425 short ev = 0;
426
427 if (what & (EPOLLHUP|EPOLLERR)) {
428 ev = EV_READ | EV_WRITE;
429 } else {
430 if (what & EPOLLIN)
431 ev |= EV_READ;
432 if (what & EPOLLOUT)
433 ev |= EV_WRITE;
434 }
435
436 if (!ev)
437 continue;
438
439 evmap_io_active(base, events[i].data.fd, ev | EV_ET);
440 }
441
442 if (res == epollop->nevents && epollop->nevents < MAX_NEVENT) {
443
444
445 int new_nevents = epollop->nevents * 2;
446 struct epoll_event *new_events;
447
448 new_events = mm_realloc(epollop->events,
449 new_nevents * sizeof(struct epoll_event));
450 if (new_events) {
451 epollop->events = new_events;
452 epollop->nevents = new_nevents;
453 }
454 }
455
456 return (0);
457 }
458
459
460 static void
461 epoll_dealloc(struct event_base *base)
462 {
463 struct epollop *epollop = base->evbase;
464
465 evsig_dealloc(base);
466 if (epollop->events)
467 mm_free(epollop->events);
468 if (epollop->epfd >= 0)
469 close(epollop->epfd);
470
471 memset(epollop, 0, sizeof(struct epollop));
472 mm_free(epollop);
473 }