Home | History | Annotate | Line # | Download | only in libevent
kqueue.c revision 1.1.1.5
      1 /*	$OpenBSD: kqueue.c,v 1.5 2002/07/10 14:41:31 art Exp $	*/
      2 
      3 /*
      4  * Copyright 2000-2007 Niels Provos <provos (at) citi.umich.edu>
      5  * Copyright 2007-2012 Niels Provos and Nick Mathewson
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  * 3. The name of the author may not be used to endorse or promote products
     16  *    derived from this software without specific prior written permission.
     17  *
     18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     19  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     20  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     21  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
     22  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
     23  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     24  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     25  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     26  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
     27  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     28  */
     29 #include "event2/event-config.h"
     30 #include "evconfig-private.h"
     31 
     32 #ifdef EVENT__HAVE_KQUEUE
     33 
     34 #include <sys/types.h>
     35 #ifdef EVENT__HAVE_SYS_TIME_H
     36 #include <sys/time.h>
     37 #endif
     38 #include <sys/queue.h>
     39 #include <sys/event.h>
     40 #include <signal.h>
     41 #include <stdio.h>
     42 #include <stdlib.h>
     43 #include <string.h>
     44 #include <unistd.h>
     45 #include <errno.h>
     46 #ifdef EVENT__HAVE_INTTYPES_H
     47 #include <inttypes.h>
     48 #endif
     49 
     50 /* Some platforms apparently define the udata field of struct kevent as
     51  * intptr_t, whereas others define it as void*.  There doesn't seem to be an
     52  * easy way to tell them apart via autoconf, so we need to use OS macros. */
     53 #if defined(EVENT__HAVE_INTTYPES_H) && !defined(__OpenBSD__) && !defined(__FreeBSD__) && !defined(__darwin__) && !defined(__APPLE__)
     54 #define PTR_TO_UDATA(x)	((intptr_t)(x))
     55 #define INT_TO_UDATA(x) ((intptr_t)(x))
     56 #else
     57 #define PTR_TO_UDATA(x)	(x)
     58 #define INT_TO_UDATA(x) ((void*)(x))
     59 #endif
     60 
     61 #include "event-internal.h"
     62 #include "log-internal.h"
     63 #include "evmap-internal.h"
     64 #include "event2/thread.h"
     65 #include "evthread-internal.h"
     66 #include "changelist-internal.h"
     67 
     68 #include "kqueue-internal.h"
     69 
     70 #define NEVENT		64
     71 
     72 struct kqop {
     73 	struct kevent *changes;
     74 	int changes_size;
     75 
     76 	struct kevent *events;
     77 	int events_size;
     78 	int kq;
     79 	int notify_event_added;
     80 	pid_t pid;
     81 };
     82 
     83 static void kqop_free(struct kqop *kqop);
     84 
     85 static void *kq_init(struct event_base *);
     86 static int kq_sig_add(struct event_base *, int, short, short, void *);
     87 static int kq_sig_del(struct event_base *, int, short, short, void *);
     88 static int kq_dispatch(struct event_base *, struct timeval *);
     89 static void kq_dealloc(struct event_base *);
     90 
     91 const struct eventop kqops = {
     92 	"kqueue",
     93 	kq_init,
     94 	event_changelist_add_,
     95 	event_changelist_del_,
     96 	kq_dispatch,
     97 	kq_dealloc,
     98 	1 /* need reinit */,
     99     EV_FEATURE_ET|EV_FEATURE_O1|EV_FEATURE_FDS,
    100 	EVENT_CHANGELIST_FDINFO_SIZE
    101 };
    102 
    103 static const struct eventop kqsigops = {
    104 	"kqueue_signal",
    105 	NULL,
    106 	kq_sig_add,
    107 	kq_sig_del,
    108 	NULL,
    109 	NULL,
    110 	1 /* need reinit */,
    111 	0,
    112 	0
    113 };
    114 
    115 static void *
    116 kq_init(struct event_base *base)
    117 {
    118 	int kq = -1;
    119 	struct kqop *kqueueop = NULL;
    120 
    121 	if (!(kqueueop = mm_calloc(1, sizeof(struct kqop))))
    122 		return (NULL);
    123 
    124 /* Initialize the kernel queue */
    125 
    126 	if ((kq = kqueue()) == -1) {
    127 		event_warn("kqueue");
    128 		goto err;
    129 	}
    130 
    131 	kqueueop->kq = kq;
    132 
    133 	kqueueop->pid = getpid();
    134 
    135 	/* Initialize fields */
    136 	kqueueop->changes = mm_calloc(NEVENT, sizeof(struct kevent));
    137 	if (kqueueop->changes == NULL)
    138 		goto err;
    139 	kqueueop->events = mm_calloc(NEVENT, sizeof(struct kevent));
    140 	if (kqueueop->events == NULL)
    141 		goto err;
    142 	kqueueop->events_size = kqueueop->changes_size = NEVENT;
    143 
    144 	/* Check for Mac OS X kqueue bug. */
    145 	memset(&kqueueop->changes[0], 0, sizeof kqueueop->changes[0]);
    146 	kqueueop->changes[0].ident = -1;
    147 	kqueueop->changes[0].filter = EVFILT_READ;
    148 	kqueueop->changes[0].flags = EV_ADD;
    149 	/*
    150 	 * If kqueue works, then kevent will succeed, and it will
    151 	 * stick an error in events[0].  If kqueue is broken, then
    152 	 * kevent will fail.
    153 	 */
    154 	if (kevent(kq,
    155 		kqueueop->changes, 1, kqueueop->events, NEVENT, NULL) != 1 ||
    156 	    (int)kqueueop->events[0].ident != -1 ||
    157 	    kqueueop->events[0].flags != EV_ERROR) {
    158 		event_warn("%s: detected broken kqueue; not using.", __func__);
    159 		goto err;
    160 	}
    161 
    162 	base->evsigsel = &kqsigops;
    163 
    164 	return (kqueueop);
    165 err:
    166 	if (kqueueop)
    167 		kqop_free(kqueueop);
    168 
    169 	return (NULL);
    170 }
    171 
    172 #define ADD_UDATA 0x30303
    173 
    174 static void
    175 kq_setup_kevent(struct kevent *out, evutil_socket_t fd, int filter, short change)
    176 {
    177 	memset(out, 0, sizeof(struct kevent));
    178 	out->ident = fd;
    179 	out->filter = filter;
    180 
    181 	if (change & EV_CHANGE_ADD) {
    182 		out->flags = EV_ADD;
    183 		/* We set a magic number here so that we can tell 'add'
    184 		 * errors from 'del' errors. */
    185 		out->udata = INT_TO_UDATA(ADD_UDATA);
    186 		if (change & EV_ET)
    187 			out->flags |= EV_CLEAR;
    188 #ifdef NOTE_EOF
    189 		/* Make it behave like select() and poll() */
    190 		if (filter == EVFILT_READ)
    191 			out->fflags = NOTE_EOF;
    192 #endif
    193 	} else {
    194 		EVUTIL_ASSERT(change & EV_CHANGE_DEL);
    195 		out->flags = EV_DELETE;
    196 	}
    197 }
    198 
    199 static int
    200 kq_build_changes_list(const struct event_changelist *changelist,
    201     struct kqop *kqop)
    202 {
    203 	int i;
    204 	int n_changes = 0;
    205 
    206 	for (i = 0; i < changelist->n_changes; ++i) {
    207 		struct event_change *in_ch = &changelist->changes[i];
    208 		struct kevent *out_ch;
    209 		if (n_changes >= kqop->changes_size - 1) {
    210 			int newsize = kqop->changes_size * 2;
    211 			struct kevent *newchanges;
    212 
    213 			newchanges = mm_realloc(kqop->changes,
    214 			    newsize * sizeof(struct kevent));
    215 			if (newchanges == NULL) {
    216 				event_warn("%s: realloc", __func__);
    217 				return (-1);
    218 			}
    219 			kqop->changes = newchanges;
    220 			kqop->changes_size = newsize;
    221 		}
    222 		if (in_ch->read_change) {
    223 			out_ch = &kqop->changes[n_changes++];
    224 			kq_setup_kevent(out_ch, in_ch->fd, EVFILT_READ,
    225 			    in_ch->read_change);
    226 		}
    227 		if (in_ch->write_change) {
    228 			out_ch = &kqop->changes[n_changes++];
    229 			kq_setup_kevent(out_ch, in_ch->fd, EVFILT_WRITE,
    230 			    in_ch->write_change);
    231 		}
    232 	}
    233 	return n_changes;
    234 }
    235 
    236 static int
    237 kq_grow_events(struct kqop *kqop, size_t new_size)
    238 {
    239 	struct kevent *newresult;
    240 
    241 	newresult = mm_realloc(kqop->events,
    242 	    new_size * sizeof(struct kevent));
    243 
    244 	if (newresult) {
    245 		kqop->events = newresult;
    246 		kqop->events_size = new_size;
    247 		return 0;
    248 	} else {
    249 		return -1;
    250 	}
    251 }
    252 
    253 static int
    254 kq_dispatch(struct event_base *base, struct timeval *tv)
    255 {
    256 	struct kqop *kqop = base->evbase;
    257 	struct kevent *events = kqop->events;
    258 	struct kevent *changes;
    259 	struct timespec ts, *ts_p = NULL;
    260 	int i, n_changes, res;
    261 
    262 	if (tv != NULL) {
    263 		TIMEVAL_TO_TIMESPEC(tv, &ts);
    264 		ts_p = &ts;
    265 	}
    266 
    267 	/* Build "changes" from "base->changes" */
    268 	EVUTIL_ASSERT(kqop->changes);
    269 	n_changes = kq_build_changes_list(&base->changelist, kqop);
    270 	if (n_changes < 0)
    271 		return -1;
    272 
    273 	event_changelist_remove_all_(&base->changelist, base);
    274 
    275 	/* steal the changes array in case some broken code tries to call
    276 	 * dispatch twice at once. */
    277 	changes = kqop->changes;
    278 	kqop->changes = NULL;
    279 
    280 	/* Make sure that 'events' is at least as long as the list of changes:
    281 	 * otherwise errors in the changes can get reported as a -1 return
    282 	 * value from kevent() rather than as EV_ERROR events in the events
    283 	 * array.
    284 	 *
    285 	 * (We could instead handle -1 return values from kevent() by
    286 	 * retrying with a smaller changes array or a larger events array,
    287 	 * but this approach seems less risky for now.)
    288 	 */
    289 	if (kqop->events_size < n_changes) {
    290 		int new_size = kqop->events_size;
    291 		do {
    292 			new_size *= 2;
    293 		} while (new_size < n_changes);
    294 
    295 		kq_grow_events(kqop, new_size);
    296 		events = kqop->events;
    297 	}
    298 
    299 	EVBASE_RELEASE_LOCK(base, th_base_lock);
    300 
    301 	res = kevent(kqop->kq, changes, n_changes,
    302 	    events, kqop->events_size, ts_p);
    303 
    304 	EVBASE_ACQUIRE_LOCK(base, th_base_lock);
    305 
    306 	EVUTIL_ASSERT(kqop->changes == NULL);
    307 	kqop->changes = changes;
    308 
    309 	if (res == -1) {
    310 		if (errno != EINTR) {
    311 			event_warn("kevent");
    312 			return (-1);
    313 		}
    314 
    315 		return (0);
    316 	}
    317 
    318 	event_debug(("%s: kevent reports %d", __func__, res));
    319 
    320 	for (i = 0; i < res; i++) {
    321 		int which = 0;
    322 
    323 		if (events[i].flags & EV_ERROR) {
    324 			switch (events[i].data) {
    325 
    326 			/* Can occur on delete if we are not currently
    327 			 * watching any events on this fd.  That can
    328 			 * happen when the fd was closed and another
    329 			 * file was opened with that fd. */
    330 			case ENOENT:
    331 			/* Can occur for reasons not fully understood
    332 			 * on FreeBSD. */
    333 			case EINVAL:
    334 				continue;
    335 #if defined(__FreeBSD__) && defined(ENOTCAPABLE)
    336 			/*
    337 			 * This currently occurs if an FD is closed
    338 			 * before the EV_DELETE makes it out via kevent().
    339 			 * The FreeBSD capabilities code sees the blank
    340 			 * capability set and rejects the request to
    341 			 * modify an event.
    342 			 *
    343 			 * To be strictly correct - when an FD is closed,
    344 			 * all the registered events are also removed.
    345 			 * Queuing EV_DELETE to a closed FD is wrong.
    346 			 * The event(s) should just be deleted from
    347 			 * the pending changelist.
    348 			 */
    349 			case ENOTCAPABLE:
    350 				continue;
    351 #endif
    352 
    353 			/* Can occur on a delete if the fd is closed. */
    354 			case EBADF:
    355 				/* XXXX On NetBSD, we can also get EBADF if we
    356 				 * try to add the write side of a pipe, but
    357 				 * the read side has already been closed.
    358 				 * Other BSDs call this situation 'EPIPE'. It
    359 				 * would be good if we had a way to report
    360 				 * this situation. */
    361 				continue;
    362 			/* These two can occur on an add if the fd was one side
    363 			 * of a pipe, and the other side was closed. */
    364 			case EPERM:
    365 			case EPIPE:
    366 				/* Report read events, if we're listening for
    367 				 * them, so that the user can learn about any
    368 				 * add errors.  (If the operation was a
    369 				 * delete, then udata should be cleared.) */
    370 				if (events[i].udata) {
    371 					/* The operation was an add:
    372 					 * report the error as a read. */
    373 					which |= EV_READ;
    374 					break;
    375 				} else {
    376 					/* The operation was a del:
    377 					 * report nothing. */
    378 					continue;
    379 				}
    380 
    381 			/* Other errors shouldn't occur. */
    382 			default:
    383 				errno = events[i].data;
    384 				return (-1);
    385 			}
    386 		} else if (events[i].filter == EVFILT_READ) {
    387 			which |= EV_READ;
    388 		} else if (events[i].filter == EVFILT_WRITE) {
    389 			which |= EV_WRITE;
    390 		} else if (events[i].filter == EVFILT_SIGNAL) {
    391 			which |= EV_SIGNAL;
    392 #ifdef EVFILT_USER
    393 		} else if (events[i].filter == EVFILT_USER) {
    394 			base->is_notify_pending = 0;
    395 #endif
    396 		}
    397 
    398 		if (!which)
    399 			continue;
    400 
    401 		if (events[i].filter == EVFILT_SIGNAL) {
    402 			evmap_signal_active_(base, events[i].ident, 1);
    403 		} else {
    404 			evmap_io_active_(base, events[i].ident, which | EV_ET);
    405 		}
    406 	}
    407 
    408 	if (res == kqop->events_size) {
    409 		/* We used all the events space that we have. Maybe we should
    410 		   make it bigger. */
    411 		kq_grow_events(kqop, kqop->events_size * 2);
    412 	}
    413 
    414 	return (0);
    415 }
    416 
    417 static void
    418 kqop_free(struct kqop *kqop)
    419 {
    420 	if (kqop->changes)
    421 		mm_free(kqop->changes);
    422 	if (kqop->events)
    423 		mm_free(kqop->events);
    424 	if (kqop->kq >= 0 && kqop->pid == getpid())
    425 		close(kqop->kq);
    426 	memset(kqop, 0, sizeof(struct kqop));
    427 	mm_free(kqop);
    428 }
    429 
    430 static void
    431 kq_dealloc(struct event_base *base)
    432 {
    433 	struct kqop *kqop = base->evbase;
    434 	evsig_dealloc_(base);
    435 	kqop_free(kqop);
    436 }
    437 
    438 /* signal handling */
    439 static int
    440 kq_sig_add(struct event_base *base, int nsignal, short old, short events, void *p)
    441 {
    442 	struct kqop *kqop = base->evbase;
    443 	struct kevent kev;
    444 	struct timespec timeout = { 0, 0 };
    445 	(void)p;
    446 
    447 	EVUTIL_ASSERT(nsignal >= 0 && nsignal < NSIG);
    448 
    449 	memset(&kev, 0, sizeof(kev));
    450 	kev.ident = nsignal;
    451 	kev.filter = EVFILT_SIGNAL;
    452 	kev.flags = EV_ADD;
    453 
    454 	/* Be ready for the signal if it is sent any
    455 	 * time between now and the next call to
    456 	 * kq_dispatch. */
    457 	if (kevent(kqop->kq, &kev, 1, NULL, 0, &timeout) == -1)
    458 		return (-1);
    459 
    460         /* We can set the handler for most signals to SIG_IGN and
    461          * still have them reported to us in the queue.  However,
    462          * if the handler for SIGCHLD is SIG_IGN, the system reaps
    463          * zombie processes for us, and we don't get any notification.
    464          * This appears to be the only signal with this quirk. */
    465 	if (evsig_set_handler_(base, nsignal,
    466                                nsignal == SIGCHLD ? SIG_DFL : SIG_IGN) == -1)
    467 		return (-1);
    468 
    469 	return (0);
    470 }
    471 
    472 static int
    473 kq_sig_del(struct event_base *base, int nsignal, short old, short events, void *p)
    474 {
    475 	struct kqop *kqop = base->evbase;
    476 	struct kevent kev;
    477 
    478 	struct timespec timeout = { 0, 0 };
    479 	(void)p;
    480 
    481 	EVUTIL_ASSERT(nsignal >= 0 && nsignal < NSIG);
    482 
    483 	memset(&kev, 0, sizeof(kev));
    484 	kev.ident = nsignal;
    485 	kev.filter = EVFILT_SIGNAL;
    486 	kev.flags = EV_DELETE;
    487 
    488 	/* Because we insert signal events
    489 	 * immediately, we need to delete them
    490 	 * immediately, too */
    491 	if (kevent(kqop->kq, &kev, 1, NULL, 0, &timeout) == -1)
    492 		return (-1);
    493 
    494 	if (evsig_restore_handler_(base, nsignal) == -1)
    495 		return (-1);
    496 
    497 	return (0);
    498 }
    499 
    500 
    501 /* OSX 10.6 and FreeBSD 8.1 add support for EVFILT_USER, which we can use
    502  * to wake up the event loop from another thread. */
    503 
    504 /* Magic number we use for our filter ID. */
    505 #define NOTIFY_IDENT 42
    506 
    507 int
    508 event_kq_add_notify_event_(struct event_base *base)
    509 {
    510 	struct kqop *kqop = base->evbase;
    511 #if defined(EVFILT_USER) && defined(NOTE_TRIGGER)
    512 	struct kevent kev;
    513 	struct timespec timeout = { 0, 0 };
    514 #endif
    515 
    516 	if (kqop->notify_event_added)
    517 		return 0;
    518 
    519 #if defined(EVFILT_USER) && defined(NOTE_TRIGGER)
    520 	memset(&kev, 0, sizeof(kev));
    521 	kev.ident = NOTIFY_IDENT;
    522 	kev.filter = EVFILT_USER;
    523 	kev.flags = EV_ADD | EV_CLEAR;
    524 
    525 	if (kevent(kqop->kq, &kev, 1, NULL, 0, &timeout) == -1) {
    526 		event_warn("kevent: adding EVFILT_USER event");
    527 		return -1;
    528 	}
    529 
    530 	kqop->notify_event_added = 1;
    531 
    532 	return 0;
    533 #else
    534 	return -1;
    535 #endif
    536 }
    537 
    538 int
    539 event_kq_notify_base_(struct event_base *base)
    540 {
    541 	struct kqop *kqop = base->evbase;
    542 #if defined(EVFILT_USER) && defined(NOTE_TRIGGER)
    543 	struct kevent kev;
    544 	struct timespec timeout = { 0, 0 };
    545 #endif
    546 	if (! kqop->notify_event_added)
    547 		return -1;
    548 
    549 #if defined(EVFILT_USER) && defined(NOTE_TRIGGER)
    550 	memset(&kev, 0, sizeof(kev));
    551 	kev.ident = NOTIFY_IDENT;
    552 	kev.filter = EVFILT_USER;
    553 	kev.fflags = NOTE_TRIGGER;
    554 
    555 	if (kevent(kqop->kq, &kev, 1, NULL, 0, &timeout) == -1) {
    556 		event_warn("kevent: triggering EVFILT_USER event");
    557 		return -1;
    558 	}
    559 
    560 	return 0;
    561 #else
    562 	return -1;
    563 #endif
    564 }
    565 
    566 #endif /* EVENT__HAVE_KQUEUE */
    567