Home | History | Annotate | Line # | Download | only in src
      1 /* SPDX-License-Identifier: BSD-2-Clause */
      2 /*
      3  * eloop - portable event based main loop.
      4  * Copyright (c) 2006-2025 Roy Marples <roy (at) marples.name>
      5  * All rights reserved.
      6 
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  *
     16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
     17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
     20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     26  * SUCH DAMAGE.
     27  */
     28 
     29 /* NOTES:
     30  * Basically for a small number of fd's (total, not max fd)
     31  * of say a few hundred, ppoll(2) performs just fine, if not faster than others.
     32  * It also has the smallest memory and binary size footprint.
     33  * ppoll(2) is available on all modern OS my software runs on and should be
     34  * an up and coming POSIX standard interface.
     35  * If ppoll is not available, then pselect(2) can be used instead which has
     36  * even smaller memory and binary size footprint.
     37  * However, this difference is quite tiny and the ppoll API is superior.
     38  * pselect cannot return error conditions such as EOF for example.
     39  *
     40  * Both epoll(7) and kqueue(2) require an extra fd per process to manage
     41  * their respective list of interest AND syscalls to manage it.
     42  * So for a small number of fd's, these are more resource intensive,
     43  * especially when used with more than one process.
     44  *
     45  * epoll avoids the resource limit RLIMIT_NOFILE Linux poll stupidly applies.
     46  * kqueue avoids the same limit on OpenBSD.
     47  * ppoll can still be secured in both by using SEECOMP or pledge.
     48  *
     49  * kqueue can avoid the signal trick we use here so that we function calls
     50  * other than those listed in sigaction(2) in our signal handlers which is
     51  * probably more robust than ours at surviving a signal storm.
     52  * signalfd(2) is available for Linux which probably works in a similar way
     53  * but it's yet another fd to use.
     54  *
     55  * Taking this all into account, ppoll(2) is the default mechanism used here.
     56  */
     57 
     58 #if (defined(__unix__) || defined(unix)) && !defined(USG)
     59 #include <sys/param.h>
     60 #endif
     61 #include <sys/time.h>
     62 
     63 #include <assert.h>
     64 #include <errno.h>
     65 #include <fcntl.h>
     66 #include <limits.h>
     67 #include <stdbool.h>
     68 #include <signal.h>
     69 #include <stdarg.h>
     70 #include <stdint.h>
     71 #include <stdlib.h>
     72 #include <string.h>
     73 #include <unistd.h>
     74 
     75 /* config.h should define HAVE_PPOLL, etc. */
     76 #if defined(HAVE_CONFIG_H) && !defined(NO_CONFIG_H)
     77 #include "config.h"
     78 #endif
     79 
     80 /* Prioritise which mechanism we want to use.*/
     81 #if defined(HAVE_PPOLL)
     82 #undef HAVE_EPOLL
     83 #undef HAVE_KQUEUE
     84 #undef HAVE_PSELECT
     85 #elif defined(HAVE_POLLTS)
     86 #define HAVE_PPOLL
     87 #define ppoll pollts
     88 #undef HAVE_EPOLL
     89 #undef HAVE_KQUEUE
     90 #undef HAVE_PSELECT
     91 #elif defined(HAVE_KQUEUE)
     92 #undef HAVE_EPOLL
     93 #undef HAVE_PSELECT
     94 #elif defined(HAVE_EPOLL)
     95 #undef HAVE_KQUEUE
     96 #undef HAVE_PSELECT
     97 #elif !defined(HAVE_PSELECT)
     98 #define HAVE_PPOLL
     99 #endif
    100 
    101 #if defined(HAVE_KQUEUE)
    102 #include <sys/event.h>
    103 #if defined(__DragonFly__) || defined(__FreeBSD__)
    104 #define	_kevent(kq, cl, ncl, el, nel, t) \
    105 	kevent((kq), (cl), (int)(ncl), (el), (int)(nel), (t))
    106 #else
    107 #define	_kevent kevent
    108 #endif
    109 #define NFD 2
    110 #elif defined(HAVE_EPOLL)
    111 #include <sys/epoll.h>
    112 #define	NFD 1
    113 #elif defined(HAVE_PPOLL)
    114 #include <poll.h>
    115 #define NFD 1
    116 #elif defined(HAVE_PSELECT)
    117 #include <sys/select.h>
    118 #endif
    119 
    120 #include "eloop.h"
    121 
    122 #ifndef UNUSED
    123 #define UNUSED(a) (void)((a))
    124 #endif
    125 #ifndef __unused
    126 #ifdef __GNUC__
    127 #define __unused   __attribute__((__unused__))
    128 #else
    129 #define __unused
    130 #endif
    131 #endif
    132 
    133 /* Our structures require TAILQ macros, which really every libc should
    134  * ship as they are useful beyond belief.
    135  * Sadly some libc's don't have sys/queue.h and some that do don't have
    136  * the TAILQ_FOREACH macro. For those that don't, the application using
    137  * this implementation will need to ship a working queue.h somewhere.
    138  * If we don't have sys/queue.h found in config.h, then
    139  * allow QUEUE_H to override loading queue.h in the current directory. */
    140 #ifndef TAILQ_FOREACH
    141 #ifdef HAVE_SYS_QUEUE_H
    142 #include <sys/queue.h>
    143 #elif defined(QUEUE_H)
    144 #define __QUEUE_HEADER(x) #x
    145 #define _QUEUE_HEADER(x) __QUEUE_HEADER(x)
    146 #include _QUEUE_HEADER(QUEUE_H)
    147 #else
    148 #include "queue.h"
    149 #endif
    150 #endif
    151 
    152 #ifdef ELOOP_DEBUG
    153 #include <stdio.h>
    154 #endif
    155 
    156 #ifndef __arraycount
    157 #  define __arraycount(__x)       (sizeof(__x) / sizeof(__x[0]))
    158 #endif
    159 
    160 /*
    161  * Allow a backlog of signals.
    162  * If you use many eloops in the same process, they should all
    163  * use the same signal handler or have the signal handler unset.
    164  * Otherwise the signal might not behave as expected.
    165  */
    166 #define ELOOP_NSIGNALS	5
    167 
    168 /*
    169  * time_t is a signed integer of an unspecified size.
    170  * To adjust for time_t wrapping, we need to work the maximum signed
    171  * value and use that as a maximum.
    172  */
    173 #ifndef TIME_MAX
    174 #define	TIME_MAX	((1ULL << (sizeof(time_t) * NBBY - 1)) - 1)
    175 #endif
    176 /* The unsigned maximum is then simple - multiply by two and add one. */
    177 #ifndef UTIME_MAX
    178 #define	UTIME_MAX	(TIME_MAX * 2) + 1
    179 #endif
    180 
    181 struct eloop_event {
    182 	TAILQ_ENTRY(eloop_event) next;
    183 	int fd;
    184 	void (*cb)(void *, unsigned short);
    185 	void *cb_arg;
    186 	unsigned short events;
    187 #ifdef HAVE_PPOLL
    188 	struct pollfd *pollfd;
    189 #endif
    190 };
    191 
    192 struct eloop_timeout {
    193 	TAILQ_ENTRY(eloop_timeout) next;
    194 	unsigned int seconds;
    195 	unsigned int nseconds;
    196 	void (*callback)(void *);
    197 	void *arg;
    198 	int queue;
    199 };
    200 
    201 struct eloop {
    202 	TAILQ_HEAD (event_head, eloop_event) events;
    203 	size_t nevents;
    204 	struct event_head free_events;
    205 
    206 	struct timespec now;
    207 	TAILQ_HEAD (timeout_head, eloop_timeout) timeouts;
    208 	struct timeout_head free_timeouts;
    209 
    210 	const int *signals;
    211 	size_t nsignals;
    212 	void (*signal_cb)(int, void *);
    213 	void *signal_cb_ctx;
    214 
    215 #if defined(HAVE_KQUEUE) || defined(HAVE_EPOLL)
    216 	int fd;
    217 #endif
    218 #if defined(HAVE_KQUEUE)
    219 	struct kevent *fds;
    220 #elif defined(HAVE_EPOLL)
    221 	struct epoll_event *fds;
    222 #elif defined(HAVE_PPOLL)
    223 	struct pollfd *fds;
    224 #endif
    225 #if !defined(HAVE_PSELECT)
    226 	size_t nfds;
    227 #endif
    228 
    229 	int exitcode;
    230 	bool exitnow;
    231 	bool events_need_setup;
    232 	bool cleared;
    233 };
    234 
    235 #ifdef HAVE_REALLOCARRAY
    236 #define	eloop_realloca	reallocarray
    237 #else
    238 /* Handy routing to check for potential overflow.
    239  * reallocarray(3) and reallocarr(3) are not portable. */
    240 #define SQRT_SIZE_MAX (((size_t)1) << (sizeof(size_t) * CHAR_BIT / 2))
    241 static void *
    242 eloop_realloca(void *ptr, size_t n, size_t size)
    243 {
    244 
    245 	if ((n | size) >= SQRT_SIZE_MAX && n > SIZE_MAX / size) {
    246 		errno = EOVERFLOW;
    247 		return NULL;
    248 	}
    249 	return realloc(ptr, n * size);
    250 }
    251 #endif
    252 
    253 
    254 static int
    255 eloop_event_setup_fds(struct eloop *eloop)
    256 {
    257 	struct eloop_event *e, *ne;
    258 #if defined(HAVE_KQUEUE)
    259 	struct kevent *pfd;
    260 	size_t nfds = eloop->nsignals;
    261 #elif defined(HAVE_EPOLL)
    262 	struct epoll_event *pfd;
    263 	size_t nfds = 0;
    264 #elif defined(HAVE_PPOLL)
    265 	struct pollfd *pfd;
    266 	size_t nfds = 0;
    267 #endif
    268 
    269 #ifndef HAVE_PSELECT
    270 	nfds += eloop->nevents * NFD;
    271 	if (eloop->nfds < nfds) {
    272 		pfd = eloop_realloca(eloop->fds, nfds, sizeof(*pfd));
    273 		if (pfd == NULL)
    274 			return -1;
    275 		eloop->fds = pfd;
    276 		eloop->nfds = nfds;
    277 	}
    278 #endif
    279 
    280 #ifdef HAVE_PPOLL
    281 	pfd = eloop->fds;
    282 #endif
    283 	TAILQ_FOREACH_SAFE(e, &eloop->events, next, ne) {
    284 		if (e->fd == -1) {
    285 			TAILQ_REMOVE(&eloop->events, e, next);
    286 			TAILQ_INSERT_TAIL(&eloop->free_events, e, next);
    287 			continue;
    288 		}
    289 #ifdef HAVE_PPOLL
    290 		e->pollfd = pfd;
    291 		pfd->fd = e->fd;
    292 		pfd->events = 0;
    293 		if (e->events & ELE_READ)
    294 			pfd->events |= POLLIN;
    295 		if (e->events & ELE_WRITE)
    296 			pfd->events |= POLLOUT;
    297 		pfd->revents = 0;
    298 		pfd++;
    299 #endif
    300 	}
    301 
    302 	eloop->events_need_setup = false;
    303 	return 0;
    304 }
    305 
    306 size_t
    307 eloop_event_count(const struct eloop *eloop)
    308 {
    309 
    310 	return eloop->nevents;
    311 }
    312 
    313 int
    314 eloop_event_add(struct eloop *eloop, int fd, unsigned short events,
    315     void (*cb)(void *, unsigned short), void *cb_arg)
    316 {
    317 	struct eloop_event *e;
    318 	bool added;
    319 #if defined(HAVE_KQUEUE)
    320 	struct kevent ke[2], *kep = &ke[0];
    321 	size_t n;
    322 #elif defined(HAVE_EPOLL)
    323 	struct epoll_event epe;
    324 	int op;
    325 #endif
    326 
    327 	assert(eloop != NULL);
    328 	assert(cb != NULL && cb_arg != NULL);
    329 	if (fd == -1 || !(events & (ELE_READ | ELE_WRITE | ELE_HANGUP))) {
    330 		errno = EINVAL;
    331 		return -1;
    332 	}
    333 
    334 	TAILQ_FOREACH(e, &eloop->events, next) {
    335 		if (e->fd == fd)
    336 			break;
    337 	}
    338 
    339 	if (e == NULL) {
    340 		added = true;
    341 		e = TAILQ_FIRST(&eloop->free_events);
    342 		if (e != NULL)
    343 			TAILQ_REMOVE(&eloop->free_events, e, next);
    344 		else {
    345 			e = malloc(sizeof(*e));
    346 			if (e == NULL) {
    347 				return -1;
    348 			}
    349 		}
    350 		TAILQ_INSERT_HEAD(&eloop->events, e, next);
    351 		eloop->nevents++;
    352 		e->fd = fd;
    353 		e->events = 0;
    354 	} else
    355 		added = false;
    356 
    357 	e->cb = cb;
    358 	e->cb_arg = cb_arg;
    359 
    360 #if defined(HAVE_KQUEUE)
    361 	n = 2;
    362 	if (events & ELE_READ && !(e->events & ELE_READ))
    363 		EV_SET(kep++, (uintptr_t)fd, EVFILT_READ, EV_ADD, 0, 0, e);
    364 	else if (!(events & ELE_READ) && e->events & ELE_READ)
    365 		EV_SET(kep++, (uintptr_t)fd, EVFILT_READ, EV_DELETE, 0, 0, e);
    366 	else
    367 		n--;
    368 	if (events & ELE_WRITE && !(e->events & ELE_WRITE))
    369 		EV_SET(kep++, (uintptr_t)fd, EVFILT_WRITE, EV_ADD, 0, 0, e);
    370 	else if (!(events & ELE_WRITE) && e->events & ELE_WRITE)
    371 		EV_SET(kep++, (uintptr_t)fd, EVFILT_WRITE, EV_DELETE, 0, 0, e);
    372 	else
    373 		n--;
    374 #ifdef EVFILT_PROCDESC
    375 	if (events & ELE_HANGUP)
    376 		EV_SET(kep++, (uintptr_t)fd, EVFILT_PROCDESC, EV_ADD,
    377 		    NOTE_EXIT, 0, e);
    378 	else
    379 		n--;
    380 #endif
    381 	if (n != 0 && _kevent(eloop->fd, ke, n, NULL, 0, NULL) == -1) {
    382 		if (added) {
    383 			TAILQ_REMOVE(&eloop->events, e, next);
    384 			TAILQ_INSERT_TAIL(&eloop->free_events, e, next);
    385 		}
    386 		return -1;
    387 	}
    388 #elif defined(HAVE_EPOLL)
    389 	memset(&epe, 0, sizeof(epe));
    390 	epe.data.ptr = e;
    391 	if (events & ELE_READ)
    392 		epe.events |= EPOLLIN;
    393 	if (events & ELE_WRITE)
    394 		epe.events |= EPOLLOUT;
    395 	op = added ? EPOLL_CTL_ADD : EPOLL_CTL_MOD;
    396 	if (epe.events != 0 && epoll_ctl(eloop->fd, op, fd, &epe) == -1) {
    397 		if (added) {
    398 			TAILQ_REMOVE(&eloop->events, e, next);
    399 			TAILQ_INSERT_TAIL(&eloop->free_events, e, next);
    400 		}
    401 		return -1;
    402 	}
    403 #elif defined(HAVE_PPOLL)
    404 	e->pollfd = NULL;
    405 	UNUSED(added);
    406 #else
    407 	UNUSED(added);
    408 #endif
    409 	e->events = events;
    410 	eloop->events_need_setup = true;
    411 	return 0;
    412 }
    413 
    414 int
    415 eloop_event_delete(struct eloop *eloop, int fd)
    416 {
    417 	struct eloop_event *e;
    418 #if defined(HAVE_KQUEUE)
    419 	struct kevent ke[2], *kep = &ke[0];
    420 	size_t n;
    421 #endif
    422 
    423 	assert(eloop != NULL);
    424 	if (fd == -1) {
    425 		errno = EINVAL;
    426 		return -1;
    427 	}
    428 
    429 	TAILQ_FOREACH(e, &eloop->events, next) {
    430 		if (e->fd == fd)
    431 			break;
    432 	}
    433 	if (e == NULL) {
    434 		errno = ENOENT;
    435 		return -1;
    436 	}
    437 
    438 #if defined(HAVE_KQUEUE)
    439 	n = 0;
    440 	if (e->events & ELE_READ) {
    441 		EV_SET(kep++, (uintptr_t)fd, EVFILT_READ, EV_DELETE, 0, 0, e);
    442 		n++;
    443 	}
    444 	if (e->events & ELE_WRITE) {
    445 		EV_SET(kep++, (uintptr_t)fd, EVFILT_WRITE, EV_DELETE, 0, 0, e);
    446 		n++;
    447 	}
    448 	if (n != 0 && _kevent(eloop->fd, ke, n, NULL, 0, NULL) == -1)
    449 		return -1;
    450 #elif defined(HAVE_EPOLL)
    451 	if (epoll_ctl(eloop->fd, EPOLL_CTL_DEL, fd, NULL) == -1)
    452 		return -1;
    453 #endif
    454 	e->fd = -1;
    455 	eloop->nevents--;
    456 	eloop->events_need_setup = true;
    457 	return 1;
    458 }
    459 
    460 unsigned long long
    461 eloop_timespec_diff(const struct timespec *tsp, const struct timespec *usp,
    462     unsigned int *nsp)
    463 {
    464 	unsigned long long tsecs, usecs, secs;
    465 	long nsecs;
    466 
    467 	if (tsp->tv_sec < 0) /* time wrapped */
    468 		tsecs = UTIME_MAX - (unsigned long long)(-tsp->tv_sec);
    469 	else
    470 		tsecs = (unsigned long long)tsp->tv_sec;
    471 	if (usp->tv_sec < 0) /* time wrapped */
    472 		usecs = UTIME_MAX - (unsigned long long)(-usp->tv_sec);
    473 	else
    474 		usecs = (unsigned long long)usp->tv_sec;
    475 
    476 	if (usecs > tsecs) /* time wrapped */
    477 		secs = (UTIME_MAX - usecs) + tsecs;
    478 	else
    479 		secs = tsecs - usecs;
    480 
    481 	nsecs = tsp->tv_nsec - usp->tv_nsec;
    482 	if (nsecs < 0) {
    483 		if (secs == 0)
    484 			nsecs = 0;
    485 		else {
    486 			secs--;
    487 			nsecs += NSEC_PER_SEC;
    488 		}
    489 	}
    490 	if (nsp != NULL)
    491 		*nsp = (unsigned int)nsecs;
    492 	return secs;
    493 }
    494 
    495 static void
    496 eloop_reduce_timers(struct eloop *eloop)
    497 {
    498 	struct timespec now;
    499 	unsigned long long secs;
    500 	unsigned int nsecs;
    501 	struct eloop_timeout *t;
    502 
    503 	clock_gettime(CLOCK_MONOTONIC, &now);
    504 	secs = eloop_timespec_diff(&now, &eloop->now, &nsecs);
    505 
    506 	TAILQ_FOREACH(t, &eloop->timeouts, next) {
    507 		if (secs > t->seconds) {
    508 			t->seconds = 0;
    509 			t->nseconds = 0;
    510 		} else {
    511 			t->seconds -= (unsigned int)secs;
    512 			if (nsecs > t->nseconds) {
    513 				if (t->seconds == 0)
    514 					t->nseconds = 0;
    515 				else {
    516 					t->seconds--;
    517 					t->nseconds = NSEC_PER_SEC
    518 					    - (nsecs - t->nseconds);
    519 				}
    520 			} else
    521 				t->nseconds -= nsecs;
    522 		}
    523 	}
    524 
    525 	eloop->now = now;
    526 }
    527 
    528 /*
    529  * This implementation should cope with UINT_MAX seconds on a system
    530  * where time_t is INT32_MAX. It should also cope with the monotonic timer
    531  * wrapping, although this is highly unlikely.
    532  * unsigned int should match or be greater than any on wire specified timeout.
    533  */
    534 static int
    535 eloop_q_timeout_add(struct eloop *eloop, int queue,
    536     unsigned int seconds, unsigned int nseconds,
    537     void (*callback)(void *), void *arg)
    538 {
    539 	struct eloop_timeout *t, *tt = NULL;
    540 
    541 	assert(eloop != NULL);
    542 	assert(callback != NULL);
    543 	assert(nseconds <= NSEC_PER_SEC);
    544 
    545 	/* Remove existing timeout if present. */
    546 	TAILQ_FOREACH(t, &eloop->timeouts, next) {
    547 		if (t->callback == callback && t->arg == arg) {
    548 			TAILQ_REMOVE(&eloop->timeouts, t, next);
    549 			break;
    550 		}
    551 	}
    552 
    553 	if (t == NULL) {
    554 		/* No existing, so allocate or grab one from the free pool. */
    555 		if ((t = TAILQ_FIRST(&eloop->free_timeouts))) {
    556 			TAILQ_REMOVE(&eloop->free_timeouts, t, next);
    557 		} else {
    558 			if ((t = malloc(sizeof(*t))) == NULL)
    559 				return -1;
    560 		}
    561 	}
    562 
    563 	eloop_reduce_timers(eloop);
    564 
    565 	t->seconds = seconds;
    566 	t->nseconds = nseconds;
    567 	t->callback = callback;
    568 	t->arg = arg;
    569 	t->queue = queue;
    570 
    571 	/* The timeout list should be in chronological order,
    572 	 * soonest first. */
    573 	TAILQ_FOREACH(tt, &eloop->timeouts, next) {
    574 		if (t->seconds < tt->seconds ||
    575 		    (t->seconds == tt->seconds && t->nseconds < tt->nseconds))
    576 		{
    577 			TAILQ_INSERT_BEFORE(tt, t, next);
    578 			return 0;
    579 		}
    580 	}
    581 	TAILQ_INSERT_TAIL(&eloop->timeouts, t, next);
    582 	return 0;
    583 }
    584 
    585 int
    586 eloop_q_timeout_add_tv(struct eloop *eloop, int queue,
    587     const struct timespec *when, void (*callback)(void *), void *arg)
    588 {
    589 
    590 	if (when->tv_sec < 0 || (unsigned long)when->tv_sec > UINT_MAX) {
    591 		errno = EINVAL;
    592 		return -1;
    593 	}
    594 	if (when->tv_nsec < 0 || when->tv_nsec > NSEC_PER_SEC) {
    595 		errno = EINVAL;
    596 		return -1;
    597 	}
    598 
    599 	return eloop_q_timeout_add(eloop, queue,
    600 	    (unsigned int)when->tv_sec, (unsigned int)when->tv_sec,
    601 	    callback, arg);
    602 }
    603 
    604 int
    605 eloop_q_timeout_add_sec(struct eloop *eloop, int queue, unsigned int seconds,
    606     void (*callback)(void *), void *arg)
    607 {
    608 
    609 	return eloop_q_timeout_add(eloop, queue, seconds, 0, callback, arg);
    610 }
    611 
    612 int
    613 eloop_q_timeout_add_msec(struct eloop *eloop, int queue, unsigned long when,
    614     void (*callback)(void *), void *arg)
    615 {
    616 	unsigned long seconds, nseconds;
    617 
    618 	seconds = when / MSEC_PER_SEC;
    619 	if (seconds > UINT_MAX) {
    620 		errno = EINVAL;
    621 		return -1;
    622 	}
    623 
    624 	nseconds = (when % MSEC_PER_SEC) * NSEC_PER_MSEC;
    625 	return eloop_q_timeout_add(eloop, queue,
    626 		(unsigned int)seconds, (unsigned int)nseconds, callback, arg);
    627 }
    628 
    629 int
    630 eloop_q_timeout_delete(struct eloop *eloop, int queue,
    631     void (*callback)(void *), void *arg)
    632 {
    633 	struct eloop_timeout *t, *tt;
    634 	int n;
    635 
    636 	assert(eloop != NULL);
    637 
    638 	n = 0;
    639 	TAILQ_FOREACH_SAFE(t, &eloop->timeouts, next, tt) {
    640 		if ((queue == 0 || t->queue == queue) &&
    641 		    t->arg == arg &&
    642 		    (!callback || t->callback == callback))
    643 		{
    644 			TAILQ_REMOVE(&eloop->timeouts, t, next);
    645 			TAILQ_INSERT_TAIL(&eloop->free_timeouts, t, next);
    646 			n++;
    647 		}
    648 	}
    649 	return n;
    650 }
    651 
    652 void
    653 eloop_exit(struct eloop *eloop, int code)
    654 {
    655 
    656 	assert(eloop != NULL);
    657 
    658 	eloop->exitcode = code;
    659 	eloop->exitnow = true;
    660 }
    661 
    662 void
    663 eloop_enter(struct eloop *eloop)
    664 {
    665 
    666 	assert(eloop != NULL);
    667 
    668 	eloop->exitnow = false;
    669 }
    670 
    671 /* Must be called after fork(2) */
    672 int
    673 eloop_forked(struct eloop *eloop)
    674 {
    675 #if defined(HAVE_KQUEUE) || defined(HAVE_EPOLL)
    676 	struct eloop_event *e;
    677 #if defined(HAVE_KQUEUE)
    678 	struct kevent *pfds, *pfd;
    679 	size_t i;
    680 #elif defined(HAVE_EPOLL)
    681 	struct epoll_event epe = { .events = 0 };
    682 #endif
    683 
    684 	assert(eloop != NULL);
    685 #if defined(HAVE_KQUEUE) || defined(HAVE_EPOLL)
    686 	if (eloop->fd != -1)
    687 		close(eloop->fd);
    688 	if (eloop_open(eloop) == -1)
    689 		return -1;
    690 #endif
    691 
    692 #ifdef HAVE_KQUEUE
    693 	pfds = malloc((eloop->nsignals + (eloop->nevents * NFD)) * sizeof(*pfds));
    694 	pfd = pfds;
    695 
    696 	if (eloop->signal_cb != NULL) {
    697 		for (i = 0; i < eloop->nsignals; i++) {
    698 			EV_SET(pfd++, (uintptr_t)eloop->signals[i],
    699 			    EVFILT_SIGNAL, EV_ADD, 0, 0, NULL);
    700 		}
    701 	} else
    702 		i = 0;
    703 #endif
    704 
    705 	TAILQ_FOREACH(e, &eloop->events, next) {
    706 		if (e->fd == -1)
    707 			continue;
    708 #if defined(HAVE_KQUEUE)
    709 		if (e->events & ELE_READ) {
    710 			EV_SET(pfd++, (uintptr_t)e->fd,
    711 			    EVFILT_READ, EV_ADD, 0, 0, e);
    712 			i++;
    713 		}
    714 		if (e->events & ELE_WRITE) {
    715 			EV_SET(pfd++, (uintptr_t)e->fd,
    716 			    EVFILT_WRITE, EV_ADD, 0, 0, e);
    717 			i++;
    718 		}
    719 #elif defined(HAVE_EPOLL)
    720 		memset(&epe, 0, sizeof(epe));
    721 		epe.data.ptr = e;
    722 		if (e->events & ELE_READ)
    723 			epe.events |= EPOLLIN;
    724 		if (e->events & ELE_WRITE)
    725 			epe.events |= EPOLLOUT;
    726 		if (epoll_ctl(eloop->fd, EPOLL_CTL_ADD, e->fd, &epe) == -1)
    727 			return -1;
    728 #endif
    729 	}
    730 
    731 #if defined(HAVE_KQUEUE)
    732 	if (i == 0)
    733 		return 0;
    734 	return _kevent(eloop->fd, pfds, i, NULL, 0, NULL);
    735 #else
    736 	return 0;
    737 #endif
    738 #else
    739 	UNUSED(eloop);
    740 	return 0;
    741 #endif
    742 }
    743 
    744 int
    745 eloop_open(struct eloop *eloop)
    746 {
    747 #if defined(HAVE_KQUEUE) || defined(HAVE_EPOLL)
    748 	int fd;
    749 
    750 	assert(eloop != NULL);
    751 #if defined(HAVE_KQUEUE1)
    752 	fd = kqueue1(O_CLOEXEC);
    753 #elif defined(HAVE_KQUEUE)
    754 	int flags;
    755 
    756 	fd = kqueue();
    757 	flags = fcntl(fd, F_GETFD, 0);
    758 	if (!(flags != -1 && !(flags & FD_CLOEXEC) &&
    759 	    fcntl(fd, F_SETFD, flags | FD_CLOEXEC) == 0))
    760 	{
    761 		close(fd);
    762 		return -1;
    763 	}
    764 #elif defined(HAVE_EPOLL)
    765 	fd = epoll_create1(EPOLL_CLOEXEC);
    766 #endif
    767 
    768 	eloop->fd = fd;
    769 	return fd;
    770 #else
    771 	UNUSED(eloop);
    772 	return 0;
    773 #endif
    774 }
    775 
    776 int
    777 eloop_signal_set_cb(struct eloop *eloop,
    778     const int *signals, size_t nsignals,
    779     void (*signal_cb)(int, void *), void *signal_cb_ctx)
    780 {
    781 #ifdef HAVE_KQUEUE
    782 	size_t i;
    783 	struct kevent *ke, *kes;
    784 #endif
    785 	int error = 0;
    786 
    787 	assert(eloop != NULL);
    788 
    789 #ifdef HAVE_KQUEUE
    790 	ke = kes = malloc(MAX(eloop->nsignals, nsignals) * sizeof(*kes));
    791 	if (kes == NULL)
    792 		return -1;
    793 	for (i = 0; i < eloop->nsignals; i++) {
    794 		EV_SET(ke++, (uintptr_t)eloop->signals[i],
    795 		    EVFILT_SIGNAL, EV_DELETE, 0, 0, NULL);
    796 	}
    797 	if (i != 0 && _kevent(eloop->fd, kes, i, NULL, 0, NULL) == -1) {
    798 		error = -1;
    799 		goto out;
    800 	}
    801 #endif
    802 
    803 	eloop->signals = signals;
    804 	eloop->nsignals = nsignals;
    805 	eloop->signal_cb = signal_cb;
    806 	eloop->signal_cb_ctx = signal_cb_ctx;
    807 
    808 #ifdef HAVE_KQUEUE
    809 	if (signal_cb == NULL)
    810 		goto out;
    811 	ke = kes;
    812 	for (i = 0; i < eloop->nsignals; i++) {
    813 		EV_SET(ke++, (uintptr_t)eloop->signals[i],
    814 		    EVFILT_SIGNAL, EV_ADD, 0, 0, NULL);
    815 	}
    816 	if (i != 0 && _kevent(eloop->fd, kes, i, NULL, 0, NULL) == -1)
    817 		error = -1;
    818 out:
    819 	free(kes);
    820 #endif
    821 
    822 	return error;
    823 }
    824 
    825 #ifndef HAVE_KQUEUE
    826 static volatile int _eloop_sig[ELOOP_NSIGNALS];
    827 static volatile size_t _eloop_nsig;
    828 
    829 static void
    830 eloop_signal3(int sig, __unused siginfo_t *siginfo, __unused void *arg)
    831 {
    832 
    833 	if (_eloop_nsig == __arraycount(_eloop_sig)) {
    834 #ifdef ELOOP_DEBUG
    835 		fprintf(stderr, "%s: signal storm, discarding signal %d\n",
    836 		    __func__, sig);
    837 #endif
    838 		return;
    839 	}
    840 
    841 	_eloop_sig[_eloop_nsig++] = sig;
    842 }
    843 #endif
    844 
    845 int
    846 eloop_signal_mask(struct eloop *eloop, sigset_t *oldset)
    847 {
    848 	sigset_t newset;
    849 	size_t i;
    850 #ifndef HAVE_KQUEUE
    851 	struct sigaction sa = {
    852 	    .sa_sigaction = eloop_signal3,
    853 	    .sa_flags = SA_SIGINFO,
    854 	};
    855 #endif
    856 
    857 	assert(eloop != NULL);
    858 
    859 	sigemptyset(&newset);
    860 	for (i = 0; i < eloop->nsignals; i++)
    861 		sigaddset(&newset, eloop->signals[i]);
    862 	if (sigprocmask(SIG_SETMASK, &newset, oldset) == -1)
    863 		return -1;
    864 
    865 #ifndef HAVE_KQUEUE
    866 	sigemptyset(&sa.sa_mask);
    867 
    868 	for (i = 0; i < eloop->nsignals; i++) {
    869 		if (sigaction(eloop->signals[i], &sa, NULL) == -1)
    870 			return -1;
    871 	}
    872 #endif
    873 
    874 	return 0;
    875 }
    876 
    877 struct eloop *
    878 eloop_new(void)
    879 {
    880 	struct eloop *eloop;
    881 
    882 	eloop = calloc(1, sizeof(*eloop));
    883 	if (eloop == NULL)
    884 		return NULL;
    885 
    886 	/* Check we have a working monotonic clock. */
    887 	if (clock_gettime(CLOCK_MONOTONIC, &eloop->now) == -1) {
    888 		free(eloop);
    889 		return NULL;
    890 	}
    891 
    892 	TAILQ_INIT(&eloop->events);
    893 	TAILQ_INIT(&eloop->free_events);
    894 	TAILQ_INIT(&eloop->timeouts);
    895 	TAILQ_INIT(&eloop->free_timeouts);
    896 	eloop->exitcode = EXIT_FAILURE;
    897 
    898 #if defined(HAVE_KQUEUE) || defined(HAVE_EPOLL)
    899 	if (eloop_open(eloop) == -1) {
    900 		eloop_free(eloop);
    901 		return NULL;
    902 	}
    903 #endif
    904 
    905 	return eloop;
    906 }
    907 
    908 void
    909 eloop_clear(struct eloop *eloop, ...)
    910 {
    911 	va_list va1, va2;
    912 	int except_fd;
    913 	struct eloop_event *e, *ne;
    914 	struct eloop_timeout *t;
    915 
    916 	if (eloop == NULL)
    917 		return;
    918 
    919 	va_start(va1, eloop);
    920 	TAILQ_FOREACH_SAFE(e, &eloop->events, next, ne) {
    921 		va_copy(va2, va1);
    922 		do
    923 			except_fd = va_arg(va2, int);
    924 		while (except_fd != -1 && except_fd != e->fd);
    925 		va_end(va2);
    926 		if (e->fd == except_fd && e->fd != -1)
    927 			continue;
    928 		TAILQ_REMOVE(&eloop->events, e, next);
    929 		if (e->fd != -1) {
    930 			close(e->fd);
    931 			eloop->nevents--;
    932 		}
    933 		free(e);
    934 	}
    935 	va_end(va1);
    936 
    937 #if !defined(HAVE_PSELECT)
    938 	/* Free the pollfd buffer and ensure it's re-created before
    939 	 * the next run. This allows us to shrink it incase we use a lot less
    940 	 * signals and fds to respond to after forking. */
    941 	free(eloop->fds);
    942 	eloop->fds = NULL;
    943 	eloop->nfds = 0;
    944 	eloop->events_need_setup = true;
    945 #endif
    946 
    947 	while ((e = TAILQ_FIRST(&eloop->free_events))) {
    948 		TAILQ_REMOVE(&eloop->free_events, e, next);
    949 		free(e);
    950 	}
    951 	while ((t = TAILQ_FIRST(&eloop->timeouts))) {
    952 		TAILQ_REMOVE(&eloop->timeouts, t, next);
    953 		free(t);
    954 	}
    955 	while ((t = TAILQ_FIRST(&eloop->free_timeouts))) {
    956 		TAILQ_REMOVE(&eloop->free_timeouts, t, next);
    957 		free(t);
    958 	}
    959 	eloop->cleared = true;
    960 }
    961 
    962 void
    963 eloop_free(struct eloop *eloop)
    964 {
    965 
    966 	eloop_clear(eloop, -1);
    967 #if defined(HAVE_KQUEUE) || defined(HAVE_EPOLL)
    968 	if (eloop != NULL && eloop->fd != -1)
    969 		close(eloop->fd);
    970 #endif
    971 	free(eloop);
    972 }
    973 
    974 #if defined(HAVE_KQUEUE)
    975 static int
    976 eloop_run_kqueue(struct eloop *eloop, const struct timespec *ts)
    977 {
    978 	int n, nn;
    979 	struct kevent *ke;
    980 	struct eloop_event *e;
    981 	unsigned short events;
    982 
    983 	n = _kevent(eloop->fd, NULL, 0, eloop->fds, eloop->nevents, ts);
    984 	if (n == -1)
    985 		return -1;
    986 
    987 	for (nn = n, ke = eloop->fds; nn != 0; nn--, ke++) {
    988 		if (eloop->cleared || eloop->exitnow)
    989 			break;
    990 		e = (struct eloop_event *)ke->udata;
    991 		if (ke->filter == EVFILT_SIGNAL) {
    992 			eloop->signal_cb((int)ke->ident,
    993 			    eloop->signal_cb_ctx);
    994 			continue;
    995 		}
    996 		if (ke->filter == EVFILT_READ)
    997 			events = ELE_READ;
    998 		else if (ke->filter == EVFILT_WRITE)
    999 			events = ELE_WRITE;
   1000 #ifdef EVFILT_PROCDESC
   1001 		else if (ke->filter == EVFILT_PROCDESC &&
   1002 		    ke->fflags & NOTE_EXIT)
   1003 			/* exit status is in ke->data.
   1004 			 * As we default to using ppoll anyway
   1005 			 * we don't have to do anything with it right now. */
   1006 			events = ELE_HANGUP;
   1007 #endif
   1008 		else
   1009 			continue; /* assert? */
   1010 		if (ke->flags & EV_EOF)
   1011 			events |= ELE_HANGUP;
   1012 		if (ke->flags & EV_ERROR)
   1013 			events |= ELE_ERROR;
   1014 		e->cb(e->cb_arg, events);
   1015 	}
   1016 	return n;
   1017 }
   1018 
   1019 #elif defined(HAVE_EPOLL)
   1020 
   1021 static int
   1022 eloop_run_epoll(struct eloop *eloop,
   1023     const struct timespec *ts, const sigset_t *signals)
   1024 {
   1025 	int timeout, n, nn;
   1026 	struct epoll_event *epe;
   1027 	struct eloop_event *e;
   1028 	unsigned short events;
   1029 
   1030 	if (ts != NULL) {
   1031 		if (ts->tv_sec > INT_MAX / 1000 ||
   1032 		    (ts->tv_sec == INT_MAX / 1000 &&
   1033 		     ((ts->tv_nsec + 999999) / 1000000 > INT_MAX % 1000000)))
   1034 			timeout = INT_MAX;
   1035 		else
   1036 			timeout = (int)(ts->tv_sec * 1000 +
   1037 			    (ts->tv_nsec + 999999) / 1000000);
   1038 	} else
   1039 		timeout = -1;
   1040 
   1041 	if (signals != NULL)
   1042 		n = epoll_pwait(eloop->fd, eloop->fds,
   1043 		    (int)eloop->nevents, timeout, signals);
   1044 	else
   1045 		n = epoll_wait(eloop->fd, eloop->fds,
   1046 		    (int)eloop->nevents, timeout);
   1047 	if (n == -1)
   1048 		return -1;
   1049 
   1050 	for (nn = n, epe = eloop->fds; nn != 0; nn--, epe++) {
   1051 		if (eloop->cleared || eloop->exitnow)
   1052 			break;
   1053 		e = (struct eloop_event *)epe->data.ptr;
   1054 		if (e->fd == -1)
   1055 			continue;
   1056 		events = 0;
   1057 		if (epe->events & EPOLLIN)
   1058 			events |= ELE_READ;
   1059 		if (epe->events & EPOLLOUT)
   1060 			events |= ELE_WRITE;
   1061 		if (epe->events & EPOLLHUP)
   1062 			events |= ELE_HANGUP;
   1063 		if (epe->events & EPOLLERR)
   1064 			events |= ELE_ERROR;
   1065 		e->cb(e->cb_arg, events);
   1066 	}
   1067 	return n;
   1068 }
   1069 
   1070 #elif defined(HAVE_PPOLL)
   1071 
   1072 static int
   1073 eloop_run_ppoll(struct eloop *eloop,
   1074     const struct timespec *ts, const sigset_t *signals)
   1075 {
   1076 	int n, nn;
   1077 	struct eloop_event *e;
   1078 	struct pollfd *pfd;
   1079 	unsigned short events;
   1080 
   1081 	n = ppoll(eloop->fds, (nfds_t)eloop->nevents, ts, signals);
   1082 	if (n == -1 || n == 0)
   1083 		return n;
   1084 
   1085 	nn = n;
   1086 	TAILQ_FOREACH(e, &eloop->events, next) {
   1087 		if (eloop->cleared || eloop->exitnow)
   1088 			break;
   1089 		/* Skip freshly added events */
   1090 		if ((pfd = e->pollfd) == NULL)
   1091 			continue;
   1092 		if (e->pollfd->revents) {
   1093 			nn--;
   1094 			events = 0;
   1095 			if (pfd->revents & POLLIN)
   1096 				events |= ELE_READ;
   1097 			if (pfd->revents & POLLOUT)
   1098 				events |= ELE_WRITE;
   1099 			if (pfd->revents & POLLHUP)
   1100 				events |= ELE_HANGUP;
   1101 			if (pfd->revents & POLLERR)
   1102 				events |= ELE_ERROR;
   1103 			if (pfd->revents & POLLNVAL)
   1104 				events |= ELE_NVAL;
   1105 			if (events)
   1106 				e->cb(e->cb_arg, events);
   1107 		}
   1108 		if (nn == 0)
   1109 			break;
   1110 	}
   1111 	return n;
   1112 }
   1113 
   1114 #elif defined(HAVE_PSELECT)
   1115 
   1116 static int
   1117 eloop_run_pselect(struct eloop *eloop,
   1118     const struct timespec *ts, const sigset_t *sigmask)
   1119 {
   1120 	fd_set read_fds, write_fds;
   1121 	int maxfd, n;
   1122 	struct eloop_event *e;
   1123 	unsigned short events;
   1124 
   1125 	FD_ZERO(&read_fds);
   1126 	FD_ZERO(&write_fds);
   1127 	maxfd = 0;
   1128 	TAILQ_FOREACH(e, &eloop->events, next) {
   1129 		if (e->fd == -1)
   1130 			continue;
   1131 		if (e->events & ELE_READ) {
   1132 			FD_SET(e->fd, &read_fds);
   1133 			if (e->fd > maxfd)
   1134 				maxfd = e->fd;
   1135 		}
   1136 		if (e->events & ELE_WRITE) {
   1137 			FD_SET(e->fd, &write_fds);
   1138 			if (e->fd > maxfd)
   1139 				maxfd = e->fd;
   1140 		}
   1141 	}
   1142 
   1143 	/* except_fd's is for STREAMS devices which we don't use. */
   1144 	n = pselect(maxfd + 1, &read_fds, &write_fds, NULL, ts, sigmask);
   1145 	if (n == -1 || n == 0)
   1146 		return n;
   1147 
   1148 	TAILQ_FOREACH(e, &eloop->events, next) {
   1149 		if (eloop->cleared || eloop->exitnow)
   1150 			break;
   1151 		if (e->fd == -1)
   1152 			continue;
   1153 		events = 0;
   1154 		if (FD_ISSET(e->fd, &read_fds))
   1155 			events |= ELE_READ;
   1156 		if (FD_ISSET(e->fd, &write_fds))
   1157 			events |= ELE_WRITE;
   1158 		if (events)
   1159 			e->cb(e->cb_arg, events);
   1160 	}
   1161 
   1162 	return n;
   1163 }
   1164 #endif
   1165 
   1166 int
   1167 eloop_start(struct eloop *eloop, sigset_t *signals)
   1168 {
   1169 	int error;
   1170 	struct eloop_timeout *t;
   1171 	struct timespec ts, *tsp;
   1172 
   1173 	assert(eloop != NULL);
   1174 #ifdef HAVE_KQUEUE
   1175 	UNUSED(signals);
   1176 #endif
   1177 
   1178 	for (;;) {
   1179 		if (eloop->exitnow)
   1180 			break;
   1181 
   1182 #ifndef HAVE_KQUEUE
   1183 		if (_eloop_nsig != 0) {
   1184 			int n = _eloop_sig[--_eloop_nsig];
   1185 
   1186 			if (eloop->signal_cb != NULL)
   1187 				eloop->signal_cb(n, eloop->signal_cb_ctx);
   1188 			continue;
   1189 		}
   1190 #endif
   1191 
   1192 		t = TAILQ_FIRST(&eloop->timeouts);
   1193 		if (t == NULL && eloop->nevents == 0)
   1194 			break;
   1195 
   1196 		if (t != NULL)
   1197 			eloop_reduce_timers(eloop);
   1198 
   1199 		if (t != NULL && t->seconds == 0 && t->nseconds == 0) {
   1200 			TAILQ_REMOVE(&eloop->timeouts, t, next);
   1201 			t->callback(t->arg);
   1202 			TAILQ_INSERT_TAIL(&eloop->free_timeouts, t, next);
   1203 			continue;
   1204 		}
   1205 
   1206 		if (t != NULL) {
   1207 			if (t->seconds > INT_MAX) {
   1208 				ts.tv_sec = (time_t)INT_MAX;
   1209 				ts.tv_nsec = 0;
   1210 			} else {
   1211 				ts.tv_sec = (time_t)t->seconds;
   1212 				ts.tv_nsec = (long)t->nseconds;
   1213 			}
   1214 			tsp = &ts;
   1215 		} else
   1216 			tsp = NULL;
   1217 
   1218 		eloop->cleared = false;
   1219 		if (eloop->events_need_setup)
   1220 			eloop_event_setup_fds(eloop);
   1221 
   1222 #if defined(HAVE_KQUEUE)
   1223 		UNUSED(signals);
   1224 		error = eloop_run_kqueue(eloop, tsp);
   1225 #elif defined(HAVE_EPOLL)
   1226 		error = eloop_run_epoll(eloop, tsp, signals);
   1227 #elif defined(HAVE_PPOLL)
   1228 		error = eloop_run_ppoll(eloop, tsp, signals);
   1229 #elif defined(HAVE_PSELECT)
   1230 		error = eloop_run_pselect(eloop, tsp, signals);
   1231 #else
   1232 #error no polling mechanism to run!
   1233 #endif
   1234 		if (error == -1) {
   1235 			if (errno == EINTR)
   1236 				continue;
   1237 			return -errno;
   1238 		}
   1239 	}
   1240 
   1241 	return eloop->exitcode;
   1242 }
   1243