Home | History | Annotate | Line # | Download | only in libevent
kqueue.c revision 1.2.8.2
      1 /*	$NetBSD: kqueue.c,v 1.2.8.2 2015/01/07 04:45:37 msaitoh Exp $	*/
      2 
      3 /*	$OpenBSD: kqueue.c,v 1.5 2002/07/10 14:41:31 art Exp $	*/
      4 
      5 /*
      6  * Copyright 2000-2007 Niels Provos <provos (at) citi.umich.edu>
      7  * Copyright 2007-2012 Niels Provos and Nick Mathewson
      8  *
      9  * Redistribution and use in source and binary forms, with or without
     10  * modification, are permitted provided that the following conditions
     11  * are met:
     12  * 1. Redistributions of source code must retain the above copyright
     13  *    notice, this list of conditions and the following disclaimer.
     14  * 2. Redistributions in binary form must reproduce the above copyright
     15  *    notice, this list of conditions and the following disclaimer in the
     16  *    documentation and/or other materials provided with the distribution.
     17  * 3. The name of the author may not be used to endorse or promote products
     18  *    derived from this software without specific prior written permission.
     19  *
     20  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     21  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     22  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     23  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
     24  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
     25  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     26  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     27  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     28  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
     29  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     30  */
     31 #include "event2/event-config.h"
     32 #include "evconfig-private.h"
     33 
     34 #ifdef EVENT__HAVE_KQUEUE
     35 
     36 #include <sys/types.h>
     37 #ifdef EVENT__HAVE_SYS_TIME_H
     38 #include <sys/time.h>
     39 #endif
     40 #include <sys/queue.h>
     41 #include <sys/event.h>
     42 #include <signal.h>
     43 #include <stdio.h>
     44 #include <stdlib.h>
     45 #include <string.h>
     46 #include <unistd.h>
     47 #include <errno.h>
     48 #ifdef EVENT__HAVE_INTTYPES_H
     49 #include <inttypes.h>
     50 #endif
     51 
     52 /* Some platforms apparently define the udata field of struct kevent as
     53  * intptr_t, whereas others define it as void*.  There doesn't seem to be an
     54  * easy way to tell them apart via autoconf, so we need to use OS macros. */
     55 #if defined(EVENT__HAVE_INTTYPES_H) && !defined(__OpenBSD__) && !defined(__FreeBSD__) && !defined(__darwin__) && !defined(__APPLE__)
     56 #define PTR_TO_UDATA(x)	((intptr_t)(x))
     57 #define INT_TO_UDATA(x) ((intptr_t)(x))
     58 #else
     59 #define PTR_TO_UDATA(x)	(x)
     60 #define INT_TO_UDATA(x) ((void*)(x))
     61 #endif
     62 
     63 #include "event-internal.h"
     64 #include "log-internal.h"
     65 #include "evmap-internal.h"
     66 #include "event2/thread.h"
     67 #include "evthread-internal.h"
     68 #include "changelist-internal.h"
     69 
     70 #include "kqueue-internal.h"
     71 
     72 #define NEVENT		64
     73 
     74 struct kqop {
     75 	struct kevent *changes;
     76 	int changes_size;
     77 
     78 	struct kevent *events;
     79 	int events_size;
     80 	int kq;
     81 	int notify_event_added;
     82 	pid_t pid;
     83 };
     84 
     85 static void kqop_free(struct kqop *kqop);
     86 
     87 static void *kq_init(struct event_base *);
     88 static int kq_sig_add(struct event_base *, int, short, short, void *);
     89 static int kq_sig_del(struct event_base *, int, short, short, void *);
     90 static int kq_dispatch(struct event_base *, struct timeval *);
     91 static void kq_dealloc(struct event_base *);
     92 
     93 const struct eventop kqops = {
     94 	"kqueue",
     95 	kq_init,
     96 	event_changelist_add_,
     97 	event_changelist_del_,
     98 	kq_dispatch,
     99 	kq_dealloc,
    100 	1 /* need reinit */,
    101     EV_FEATURE_ET|EV_FEATURE_O1|EV_FEATURE_FDS,
    102 	EVENT_CHANGELIST_FDINFO_SIZE
    103 };
    104 
    105 static const struct eventop kqsigops = {
    106 	"kqueue_signal",
    107 	NULL,
    108 	kq_sig_add,
    109 	kq_sig_del,
    110 	NULL,
    111 	NULL,
    112 	1 /* need reinit */,
    113 	0,
    114 	0
    115 };
    116 
    117 static void *
    118 kq_init(struct event_base *base)
    119 {
    120 	int kq = -1;
    121 	struct kqop *kqueueop = NULL;
    122 
    123 	if (!(kqueueop = mm_calloc(1, sizeof(struct kqop))))
    124 		return (NULL);
    125 
    126 /* Initialize the kernel queue */
    127 
    128 	if ((kq = kqueue()) == -1) {
    129 		event_warn("kqueue");
    130 		goto err;
    131 	}
    132 
    133 	kqueueop->kq = kq;
    134 
    135 	kqueueop->pid = getpid();
    136 
    137 	/* Initialize fields */
    138 	kqueueop->changes = mm_calloc(NEVENT, sizeof(struct kevent));
    139 	if (kqueueop->changes == NULL)
    140 		goto err;
    141 	kqueueop->events = mm_calloc(NEVENT, sizeof(struct kevent));
    142 	if (kqueueop->events == NULL)
    143 		goto err;
    144 	kqueueop->events_size = kqueueop->changes_size = NEVENT;
    145 
    146 	/* Check for Mac OS X kqueue bug. */
    147 	memset(&kqueueop->changes[0], 0, sizeof kqueueop->changes[0]);
    148 	kqueueop->changes[0].ident = -1;
    149 	kqueueop->changes[0].filter = EVFILT_READ;
    150 	kqueueop->changes[0].flags = EV_ADD;
    151 	/*
    152 	 * If kqueue works, then kevent will succeed, and it will
    153 	 * stick an error in events[0].  If kqueue is broken, then
    154 	 * kevent will fail.
    155 	 */
    156 	if (kevent(kq,
    157 		kqueueop->changes, 1, kqueueop->events, NEVENT, NULL) != 1 ||
    158 	    (int)kqueueop->events[0].ident != -1 ||
    159 	    kqueueop->events[0].flags != EV_ERROR) {
    160 		event_warn("%s: detected broken kqueue; not using.", __func__);
    161 		goto err;
    162 	}
    163 
    164 	base->evsigsel = &kqsigops;
    165 
    166 	return (kqueueop);
    167 err:
    168 	if (kqueueop)
    169 		kqop_free(kqueueop);
    170 
    171 	return (NULL);
    172 }
    173 
    174 #define ADD_UDATA 0x30303
    175 
    176 static void
    177 kq_setup_kevent(struct kevent *out, evutil_socket_t fd, int filter, short change)
    178 {
    179 	memset(out, 0, sizeof(struct kevent));
    180 	out->ident = fd;
    181 	out->filter = filter;
    182 
    183 	if (change & EV_CHANGE_ADD) {
    184 		out->flags = EV_ADD;
    185 		/* We set a magic number here so that we can tell 'add'
    186 		 * errors from 'del' errors. */
    187 		out->udata = INT_TO_UDATA(ADD_UDATA);
    188 		if (change & EV_ET)
    189 			out->flags |= EV_CLEAR;
    190 #ifdef NOTE_EOF
    191 		/* Make it behave like select() and poll() */
    192 		if (filter == EVFILT_READ)
    193 			out->fflags = NOTE_EOF;
    194 #endif
    195 	} else {
    196 		EVUTIL_ASSERT(change & EV_CHANGE_DEL);
    197 		out->flags = EV_DELETE;
    198 	}
    199 }
    200 
    201 static int
    202 kq_build_changes_list(const struct event_changelist *changelist,
    203     struct kqop *kqop)
    204 {
    205 	int i;
    206 	int n_changes = 0;
    207 
    208 	for (i = 0; i < changelist->n_changes; ++i) {
    209 		struct event_change *in_ch = &changelist->changes[i];
    210 		struct kevent *out_ch;
    211 		if (n_changes >= kqop->changes_size - 1) {
    212 			int newsize = kqop->changes_size * 2;
    213 			struct kevent *newchanges;
    214 
    215 			newchanges = mm_realloc(kqop->changes,
    216 			    newsize * sizeof(struct kevent));
    217 			if (newchanges == NULL) {
    218 				event_warn("%s: realloc", __func__);
    219 				return (-1);
    220 			}
    221 			kqop->changes = newchanges;
    222 			kqop->changes_size = newsize;
    223 		}
    224 		if (in_ch->read_change) {
    225 			out_ch = &kqop->changes[n_changes++];
    226 			kq_setup_kevent(out_ch, in_ch->fd, EVFILT_READ,
    227 			    in_ch->read_change);
    228 		}
    229 		if (in_ch->write_change) {
    230 			out_ch = &kqop->changes[n_changes++];
    231 			kq_setup_kevent(out_ch, in_ch->fd, EVFILT_WRITE,
    232 			    in_ch->write_change);
    233 		}
    234 	}
    235 	return n_changes;
    236 }
    237 
    238 static int
    239 kq_grow_events(struct kqop *kqop, size_t new_size)
    240 {
    241 	struct kevent *newresult;
    242 
    243 	newresult = mm_realloc(kqop->events,
    244 	    new_size * sizeof(struct kevent));
    245 
    246 	if (newresult) {
    247 		kqop->events = newresult;
    248 		kqop->events_size = new_size;
    249 		return 0;
    250 	} else {
    251 		return -1;
    252 	}
    253 }
    254 
    255 static int
    256 kq_dispatch(struct event_base *base, struct timeval *tv)
    257 {
    258 	struct kqop *kqop = base->evbase;
    259 	struct kevent *events = kqop->events;
    260 	struct kevent *changes;
    261 	struct timespec ts, *ts_p = NULL;
    262 	int i, n_changes, res;
    263 
    264 	if (tv != NULL) {
    265 		TIMEVAL_TO_TIMESPEC(tv, &ts);
    266 		ts_p = &ts;
    267 	}
    268 
    269 	/* Build "changes" from "base->changes" */
    270 	EVUTIL_ASSERT(kqop->changes);
    271 	n_changes = kq_build_changes_list(&base->changelist, kqop);
    272 	if (n_changes < 0)
    273 		return -1;
    274 
    275 	event_changelist_remove_all_(&base->changelist, base);
    276 
    277 	/* steal the changes array in case some broken code tries to call
    278 	 * dispatch twice at once. */
    279 	changes = kqop->changes;
    280 	kqop->changes = NULL;
    281 
    282 	/* Make sure that 'events' is at least as long as the list of changes:
    283 	 * otherwise errors in the changes can get reported as a -1 return
    284 	 * value from kevent() rather than as EV_ERROR events in the events
    285 	 * array.
    286 	 *
    287 	 * (We could instead handle -1 return values from kevent() by
    288 	 * retrying with a smaller changes array or a larger events array,
    289 	 * but this approach seems less risky for now.)
    290 	 */
    291 	if (kqop->events_size < n_changes) {
    292 		int new_size = kqop->events_size;
    293 		do {
    294 			new_size *= 2;
    295 		} while (new_size < n_changes);
    296 
    297 		kq_grow_events(kqop, new_size);
    298 		events = kqop->events;
    299 	}
    300 
    301 	EVBASE_RELEASE_LOCK(base, th_base_lock);
    302 
    303 	res = kevent(kqop->kq, changes, n_changes,
    304 	    events, kqop->events_size, ts_p);
    305 
    306 	EVBASE_ACQUIRE_LOCK(base, th_base_lock);
    307 
    308 	EVUTIL_ASSERT(kqop->changes == NULL);
    309 	kqop->changes = changes;
    310 
    311 	if (res == -1) {
    312 		if (errno != EINTR) {
    313 			event_warn("kevent");
    314 			return (-1);
    315 		}
    316 
    317 		return (0);
    318 	}
    319 
    320 	event_debug(("%s: kevent reports %d", __func__, res));
    321 
    322 	for (i = 0; i < res; i++) {
    323 		int which = 0;
    324 
    325 		if (events[i].flags & EV_ERROR) {
    326 			switch (events[i].data) {
    327 
    328 			/* Can occur on delete if we are not currently
    329 			 * watching any events on this fd.  That can
    330 			 * happen when the fd was closed and another
    331 			 * file was opened with that fd. */
    332 			case ENOENT:
    333 			/* Can occur for reasons not fully understood
    334 			 * on FreeBSD. */
    335 			case EINVAL:
    336 				continue;
    337 
    338 			/* Can occur on a delete if the fd is closed. */
    339 			case EBADF:
    340 				/* XXXX On NetBSD, we can also get EBADF if we
    341 				 * try to add the write side of a pipe, but
    342 				 * the read side has already been closed.
    343 				 * Other BSDs call this situation 'EPIPE'. It
    344 				 * would be good if we had a way to report
    345 				 * this situation. */
    346 				continue;
    347 			/* These two can occur on an add if the fd was one side
    348 			 * of a pipe, and the other side was closed. */
    349 			case EPERM:
    350 			case EPIPE:
    351 				/* Report read events, if we're listening for
    352 				 * them, so that the user can learn about any
    353 				 * add errors.  (If the operation was a
    354 				 * delete, then udata should be cleared.) */
    355 				if (events[i].udata) {
    356 					/* The operation was an add:
    357 					 * report the error as a read. */
    358 					which |= EV_READ;
    359 					break;
    360 				} else {
    361 					/* The operation was a del:
    362 					 * report nothing. */
    363 					continue;
    364 				}
    365 
    366 			/* Other errors shouldn't occur. */
    367 			default:
    368 				errno = events[i].data;
    369 				return (-1);
    370 			}
    371 		} else if (events[i].filter == EVFILT_READ) {
    372 			which |= EV_READ;
    373 		} else if (events[i].filter == EVFILT_WRITE) {
    374 			which |= EV_WRITE;
    375 		} else if (events[i].filter == EVFILT_SIGNAL) {
    376 			which |= EV_SIGNAL;
    377 #ifdef EVFILT_USER
    378 		} else if (events[i].filter == EVFILT_USER) {
    379 			base->is_notify_pending = 0;
    380 #endif
    381 		}
    382 
    383 		if (!which)
    384 			continue;
    385 
    386 		if (events[i].filter == EVFILT_SIGNAL) {
    387 			evmap_signal_active_(base, events[i].ident, 1);
    388 		} else {
    389 			evmap_io_active_(base, events[i].ident, which | EV_ET);
    390 		}
    391 	}
    392 
    393 	if (res == kqop->events_size) {
    394 		/* We used all the events space that we have. Maybe we should
    395 		   make it bigger. */
    396 		kq_grow_events(kqop, kqop->events_size * 2);
    397 	}
    398 
    399 	return (0);
    400 }
    401 
    402 static void
    403 kqop_free(struct kqop *kqop)
    404 {
    405 	if (kqop->changes)
    406 		mm_free(kqop->changes);
    407 	if (kqop->events)
    408 		mm_free(kqop->events);
    409 	if (kqop->kq >= 0 && kqop->pid == getpid())
    410 		close(kqop->kq);
    411 	memset(kqop, 0, sizeof(struct kqop));
    412 	mm_free(kqop);
    413 }
    414 
    415 static void
    416 kq_dealloc(struct event_base *base)
    417 {
    418 	struct kqop *kqop = base->evbase;
    419 	evsig_dealloc_(base);
    420 	kqop_free(kqop);
    421 }
    422 
    423 /* signal handling */
    424 static int
    425 kq_sig_add(struct event_base *base, int nsignal, short old, short events, void *p)
    426 {
    427 	struct kqop *kqop = base->evbase;
    428 	struct kevent kev;
    429 	struct timespec timeout = { 0, 0 };
    430 	(void)p;
    431 
    432 	EVUTIL_ASSERT(nsignal >= 0 && nsignal < NSIG);
    433 
    434 	memset(&kev, 0, sizeof(kev));
    435 	kev.ident = nsignal;
    436 	kev.filter = EVFILT_SIGNAL;
    437 	kev.flags = EV_ADD;
    438 
    439 	/* Be ready for the signal if it is sent any
    440 	 * time between now and the next call to
    441 	 * kq_dispatch. */
    442 	if (kevent(kqop->kq, &kev, 1, NULL, 0, &timeout) == -1)
    443 		return (-1);
    444 
    445         /* We can set the handler for most signals to SIG_IGN and
    446          * still have them reported to us in the queue.  However,
    447          * if the handler for SIGCHLD is SIG_IGN, the system reaps
    448          * zombie processes for us, and we don't get any notification.
    449          * This appears to be the only signal with this quirk. */
    450 	if (evsig_set_handler_(base, nsignal,
    451                                nsignal == SIGCHLD ? SIG_DFL : SIG_IGN) == -1)
    452 		return (-1);
    453 
    454 	return (0);
    455 }
    456 
    457 static int
    458 kq_sig_del(struct event_base *base, int nsignal, short old, short events, void *p)
    459 {
    460 	struct kqop *kqop = base->evbase;
    461 	struct kevent kev;
    462 
    463 	struct timespec timeout = { 0, 0 };
    464 	(void)p;
    465 
    466 	EVUTIL_ASSERT(nsignal >= 0 && nsignal < NSIG);
    467 
    468 	memset(&kev, 0, sizeof(kev));
    469 	kev.ident = nsignal;
    470 	kev.filter = EVFILT_SIGNAL;
    471 	kev.flags = EV_DELETE;
    472 
    473 	/* Because we insert signal events
    474 	 * immediately, we need to delete them
    475 	 * immediately, too */
    476 	if (kevent(kqop->kq, &kev, 1, NULL, 0, &timeout) == -1)
    477 		return (-1);
    478 
    479 	if (evsig_restore_handler_(base, nsignal) == -1)
    480 		return (-1);
    481 
    482 	return (0);
    483 }
    484 
    485 
    486 /* OSX 10.6 and FreeBSD 8.1 add support for EVFILT_USER, which we can use
    487  * to wake up the event loop from another thread. */
    488 
    489 /* Magic number we use for our filter ID. */
    490 #define NOTIFY_IDENT 42
    491 
    492 int
    493 event_kq_add_notify_event_(struct event_base *base)
    494 {
    495 	struct kqop *kqop = base->evbase;
    496 #if defined(EVFILT_USER) && defined(NOTE_TRIGGER)
    497 	struct kevent kev;
    498 	struct timespec timeout = { 0, 0 };
    499 #endif
    500 
    501 	if (kqop->notify_event_added)
    502 		return 0;
    503 
    504 #if defined(EVFILT_USER) && defined(NOTE_TRIGGER)
    505 	memset(&kev, 0, sizeof(kev));
    506 	kev.ident = NOTIFY_IDENT;
    507 	kev.filter = EVFILT_USER;
    508 	kev.flags = EV_ADD | EV_CLEAR;
    509 
    510 	if (kevent(kqop->kq, &kev, 1, NULL, 0, &timeout) == -1) {
    511 		event_warn("kevent: adding EVFILT_USER event");
    512 		return -1;
    513 	}
    514 
    515 	kqop->notify_event_added = 1;
    516 
    517 	return 0;
    518 #else
    519 	return -1;
    520 #endif
    521 }
    522 
    523 int
    524 event_kq_notify_base_(struct event_base *base)
    525 {
    526 	struct kqop *kqop = base->evbase;
    527 #if defined(EVFILT_USER) && defined(NOTE_TRIGGER)
    528 	struct kevent kev;
    529 	struct timespec timeout = { 0, 0 };
    530 #endif
    531 	if (! kqop->notify_event_added)
    532 		return -1;
    533 
    534 #if defined(EVFILT_USER) && defined(NOTE_TRIGGER)
    535 	memset(&kev, 0, sizeof(kev));
    536 	kev.ident = NOTIFY_IDENT;
    537 	kev.filter = EVFILT_USER;
    538 	kev.fflags = NOTE_TRIGGER;
    539 
    540 	if (kevent(kqop->kq, &kev, 1, NULL, 0, &timeout) == -1) {
    541 		event_warn("kevent: triggering EVFILT_USER event");
    542 		return -1;
    543 	}
    544 
    545 	return 0;
    546 #else
    547 	return -1;
    548 #endif
    549 }
    550 
    551 #endif /* EVENT__HAVE_KQUEUE */
    552