Home | History | Annotate | Line # | Download | only in kern
      1 /*	$NetBSD: sys_epoll.c,v 1.4 2023/07/30 18:31:13 christos Exp $	*/
      2 
      3 /*-
      4  * SPDX-License-Identifier: BSD-2-Clause
      5  *
      6  * Copyright (c) 2007 Roman Divacky
      7  * Copyright (c) 2014 Dmitry Chagin <dchagin (at) FreeBSD.org>
      8  *
      9  * Redistribution and use in source and binary forms, with or without
     10  * modification, are permitted provided that the following conditions
     11  * are met:
     12  * 1. Redistributions of source code must retain the above copyright
     13  *    notice, this list of conditions and the following disclaimer.
     14  * 2. Redistributions in binary form must reproduce the above copyright
     15  *    notice, this list of conditions and the following disclaimer in the
     16  *    documentation and/or other materials provided with the distribution.
     17  *
     18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
     19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     21  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
     22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     28  * SUCH DAMAGE.
     29  */
     30 #include <sys/cdefs.h>
     31 __KERNEL_RCSID(0, "$NetBSD: sys_epoll.c,v 1.4 2023/07/30 18:31:13 christos Exp $");
     32 
     33 
     34 #include <sys/param.h>
     35 #include <sys/types.h>
     36 #include <sys/bitops.h>
     37 #include <sys/epoll.h>
     38 #include <sys/event.h>
     39 #include <sys/eventvar.h>
     40 #include <sys/errno.h>
     41 #include <sys/file.h>
     42 #include <sys/filedesc.h>
     43 #include <sys/fcntl.h>
     44 #include <sys/proc.h>
     45 #include <sys/signal.h>
     46 #include <sys/vnode.h>
     47 
     48 #include <sys/syscallargs.h>
     49 
     50 #define	EPOLL_MAX_DEPTH		5
     51 
     52 #define	EPOLL_EVRD	(EPOLLIN|EPOLLRDNORM)
     53 #define	EPOLL_EVWR	(EPOLLOUT|EPOLLWRNORM)
     54 #define	EPOLL_EVSUP	(EPOLLET|EPOLLONESHOT|EPOLLHUP|EPOLLERR|EPOLLPRI \
     55 			|EPOLL_EVRD|EPOLL_EVWR|EPOLLRDHUP)
     56 
     57 #define	kext_data	ext[0]
     58 #define	kext_epfd	ext[1]
     59 #define	kext_fd		ext[2]
     60 
     61 #if DEBUG
     62 #define	DPRINTF(x) uprintf x
     63 #else
     64 #define	DPRINTF(x) __nothing
     65 #endif
     66 
     67 struct epoll_edge {
     68 	int epfd;
     69 	int fd;
     70 };
     71 
     72 __BITMAP_TYPE(epoll_seen, char, 1);
     73 
     74 static int	epoll_to_kevent(int, int, struct epoll_event *, struct kevent *,
     75     int *);
     76 static void	kevent_to_epoll(struct kevent *, struct epoll_event *);
     77 static int      epoll_kev_put_events(void *, struct kevent *, struct kevent *,
     78     size_t, int);
     79 static int	epoll_kev_fetch_changes(void *, const struct kevent *,
     80     struct kevent *, size_t, int);
     81 static int	epoll_kev_fetch_timeout(const void *, void *, size_t);
     82 static int	epoll_register_kevent(register_t *, int, int, int,
     83     unsigned int);
     84 static int	epoll_fd_registered(register_t *, int, int);
     85 static int	epoll_delete_all_events(register_t *, int, int);
     86 static int	epoll_recover_watch_tree(struct epoll_edge *, size_t, size_t);
     87 static int	epoll_dfs(struct epoll_edge *, size_t, struct epoll_seen *,
     88     size_t, int, int);
     89 static int	epoll_check_loop_and_depth(struct lwp *, int, int);
     90 
     91 /*
     92  * epoll_create1(2).  Parse the flags and then create a kqueue instance.
     93  */
     94 int
     95 sys_epoll_create1(struct lwp *l, const struct sys_epoll_create1_args *uap,
     96     register_t *retval)
     97 {
     98 	/* {
     99 		syscallarg(int) flags;
    100 	} */
    101 	struct sys_kqueue1_args kqa;
    102 
    103 	if ((SCARG(uap, flags) & ~(EPOLL_CLOEXEC)) != 0)
    104 		return EINVAL;
    105 
    106 	SCARG(&kqa, flags) = 0;
    107 	if (SCARG(uap, flags) & EPOLL_CLOEXEC)
    108 		SCARG(&kqa, flags) |= O_CLOEXEC;
    109 
    110 	return sys_kqueue1(l, &kqa, retval);
    111 }
    112 
    113 /*
    114  * Structure converting function from epoll to kevent.
    115  */
    116 static int
    117 epoll_to_kevent(int epfd, int fd, struct epoll_event *l_event,
    118     struct kevent *kevent, int *nkevents)
    119 {
    120 	uint32_t levents = l_event->events;
    121 	uint32_t kev_flags = EV_ADD | EV_ENABLE;
    122 
    123 	/* flags related to how event is registered */
    124 	if ((levents & EPOLLONESHOT) != 0)
    125 		kev_flags |= EV_DISPATCH;
    126 	if ((levents & EPOLLET) != 0)
    127 		kev_flags |= EV_CLEAR;
    128 	if ((levents & EPOLLERR) != 0)
    129 		kev_flags |= EV_ERROR;
    130 	if ((levents & EPOLLRDHUP) != 0)
    131 		kev_flags |= EV_EOF;
    132 
    133 	/* flags related to what event is registered */
    134 	if ((levents & EPOLL_EVRD) != 0) {
    135 		EV_SET(kevent, fd, EVFILT_READ, kev_flags, 0, 0, 0);
    136 		kevent->kext_data = l_event->data;
    137 		kevent->kext_epfd = epfd;
    138 		kevent->kext_fd = fd;
    139 		++kevent;
    140 		++(*nkevents);
    141 	}
    142 	if ((levents & EPOLL_EVWR) != 0) {
    143 		EV_SET(kevent, fd, EVFILT_WRITE, kev_flags, 0, 0, 0);
    144 		kevent->kext_data = l_event->data;
    145 		kevent->kext_epfd = epfd;
    146 		kevent->kext_fd = fd;
    147 		++kevent;
    148 		++(*nkevents);
    149 	}
    150 	/* zero event mask is legal */
    151 	if ((levents & (EPOLL_EVRD | EPOLL_EVWR)) == 0) {
    152 		EV_SET(kevent++, fd, EVFILT_READ, EV_ADD|EV_DISABLE, 0, 0, 0);
    153 		++(*nkevents);
    154 	}
    155 
    156 	if ((levents & ~(EPOLL_EVSUP)) != 0) {
    157 		return EINVAL;
    158 	}
    159 
    160 	return 0;
    161 }
    162 
    163 /*
    164  * Structure converting function from kevent to epoll. In a case
    165  * this is called on error in registration we store the error in
    166  * event->data and pick it up later in sys_epoll_ctl().
    167  */
    168 static void
    169 kevent_to_epoll(struct kevent *kevent, struct epoll_event *l_event)
    170 {
    171 
    172 	l_event->data = kevent->kext_data;
    173 
    174 	if ((kevent->flags & EV_ERROR) != 0) {
    175 		l_event->events = EPOLLERR;
    176 		return;
    177 	}
    178 
    179 	/* XXX EPOLLPRI, EPOLLHUP */
    180 	switch (kevent->filter) {
    181 	case EVFILT_READ:
    182 		l_event->events = EPOLLIN;
    183 		if ((kevent->flags & EV_EOF) != 0)
    184 			l_event->events |= EPOLLRDHUP;
    185 		break;
    186 	case EVFILT_WRITE:
    187 		l_event->events = EPOLLOUT;
    188 		break;
    189 	default:
    190 		DPRINTF(("%s: unhandled kevent filter %d\n", __func__,
    191 		    kevent->filter));
    192 		break;
    193 	}
    194 }
    195 
    196 /*
    197  * Copyout callback used by kevent.  This converts kevent events to
    198  * epoll events that are located in args->eventlist.
    199  */
    200 static int
    201 epoll_kev_put_events(void *ctx, struct kevent *events,
    202     struct kevent *eventlist, size_t index, int n)
    203 {
    204 	int i;
    205 	struct epoll_event *eep = (struct epoll_event *)eventlist;
    206 
    207 	KASSERT(n >= 0 && n < EPOLL_MAX_EVENTS);
    208 
    209 	for (i = 0; i < n; i++)
    210 		kevent_to_epoll(events + i, eep + index + i);
    211 
    212 	return 0;
    213 }
    214 
    215 /*
    216  * Copyin callback used by kevent. This copies already
    217  * converted filters from kernel memory to the kevent
    218  * internal kernel memory. Hence the memcpy instead of
    219  * copyin.
    220  */
    221 static int
    222 epoll_kev_fetch_changes(void *ctx, const struct kevent *changelist,
    223     struct kevent *changes, size_t index, int n)
    224 {
    225 	KASSERT(n >= 0 && n < EPOLL_MAX_EVENTS);
    226 
    227 	memcpy(changes, changelist + index, n * sizeof(*changes));
    228 
    229 	return 0;
    230 }
    231 
    232 /*
    233  * Timer copy callback used by kevent.  Copies a converted timeout
    234  * from kernel memory to kevent memory.  Hence the memcpy instead of
    235  * just using copyin.
    236  */
    237 static int
    238 epoll_kev_fetch_timeout(const void *src, void *dest, size_t size)
    239 {
    240 	memcpy(dest, src, size);
    241 
    242 	return 0;
    243 }
    244 
    245 /*
    246  * Load epoll filter, convert it to kevent filter and load it into
    247  * kevent subsystem.
    248  *
    249  * event must point to kernel memory or be NULL.
    250  */
    251 int
    252 epoll_ctl_common(struct lwp *l, register_t *retval, int epfd, int op, int fd,
    253     struct epoll_event *event)
    254 {
    255 	struct kevent kev[2];
    256 	struct kevent_ops k_ops = {
    257 		.keo_private = NULL,
    258 		.keo_fetch_timeout = NULL,
    259 		.keo_fetch_changes = epoll_kev_fetch_changes,
    260 		.keo_put_events = NULL,
    261 	};
    262 	file_t *epfp, *fp;
    263 	int error = 0;
    264 	int nchanges = 0;
    265 
    266 	/*
    267 	 * Need to validate epfd and fd separately from kevent1 to match
    268 	 * Linux's errno behaviour.
    269 	 */
    270 	epfp = fd_getfile(epfd);
    271 	if (epfp == NULL)
    272 		return EBADF;
    273 	if (epfp->f_type != DTYPE_KQUEUE)
    274 		error = EINVAL;
    275 	fd_putfile(epfd);
    276 	if (error != 0)
    277 		return error;
    278 
    279 	fp = fd_getfile(fd);
    280 	if (fp == NULL)
    281 		return EBADF;
    282 	if (fp->f_type == DTYPE_VNODE) {
    283 		switch (fp->f_vnode->v_type) {
    284 		case VREG:
    285 		case VDIR:
    286 		case VBLK:
    287 		case VLNK:
    288 			error = EPERM;
    289 			break;
    290 
    291 		default:
    292 			break;
    293 		}
    294 	}
    295 	fd_putfile(fd);
    296 	if (error != 0)
    297 		return error;
    298 
    299 	/* Linux disallows spying on himself */
    300 	if (epfd == fd) {
    301 		return EINVAL;
    302 	}
    303 
    304 	if (op != EPOLL_CTL_DEL) {
    305 		error = epoll_to_kevent(epfd, fd, event, kev, &nchanges);
    306 		if (error != 0)
    307 			return error;
    308 	}
    309 
    310 	switch (op) {
    311 	case EPOLL_CTL_MOD:
    312 		error = epoll_delete_all_events(retval, epfd, fd);
    313 		if (error != 0)
    314 			return error;
    315 		break;
    316 
    317 	case EPOLL_CTL_ADD:
    318 		if (epoll_fd_registered(retval, epfd, fd))
    319 			return EEXIST;
    320 		error = epoll_check_loop_and_depth(l, epfd, fd);
    321 		if (error != 0)
    322 			return error;
    323 		break;
    324 
    325 	case EPOLL_CTL_DEL:
    326 		/* CTL_DEL means unregister this fd with this epoll */
    327 		return epoll_delete_all_events(retval, epfd, fd);
    328 
    329 	default:
    330 		DPRINTF(("%s: invalid op %d\n", __func__, op));
    331 		return EINVAL;
    332 	}
    333 
    334 	error = kevent1(retval, epfd, kev, nchanges, NULL, 0, NULL, &k_ops);
    335 
    336 	if (error == EOPNOTSUPP) {
    337 		error = EPERM;
    338 	}
    339 
    340 	return error;
    341 }
    342 
    343 /*
    344  * epoll_ctl(2).  Copyin event if necessary and then call
    345  * epoll_ctl_common().
    346  */
    347 int
    348 sys_epoll_ctl(struct lwp *l, const struct sys_epoll_ctl_args *uap,
    349     register_t *retval)
    350 {
    351 	/* {
    352 		syscallarg(int) epfd;
    353 		syscallarg(int) op;
    354 		syscallarg(int) fd;
    355 		syscallarg(struct epoll_event *) event;
    356 	} */
    357 	struct epoll_event ee;
    358 	struct epoll_event *eep;
    359 	int error;
    360 
    361 	if (SCARG(uap, op) != EPOLL_CTL_DEL) {
    362 		error = copyin(SCARG(uap, event), &ee, sizeof(ee));
    363 		if (error != 0)
    364 			return error;
    365 
    366 		eep = &ee;
    367 	} else
    368 		eep = NULL;
    369 
    370 	return epoll_ctl_common(l, retval, SCARG(uap, epfd), SCARG(uap, op),
    371 	    SCARG(uap, fd), eep);
    372 }
    373 
    374 /*
    375  * Wait for a filter to be triggered on the epoll file descriptor.
    376  * All of the epoll_*wait* syscalls eventually end up here.
    377  *
    378  * events, nss, and ssp must point to kernel memory (or be NULL).
    379  */
    380 int
    381 epoll_wait_common(struct lwp *l, register_t *retval, int epfd,
    382     struct epoll_event *events, int maxevents, struct timespec *tsp,
    383     const sigset_t *nssp)
    384 {
    385 	struct kevent_ops k_ops = {
    386 	        .keo_private = NULL,
    387 		.keo_fetch_timeout = epoll_kev_fetch_timeout,
    388 		.keo_fetch_changes = NULL,
    389 		.keo_put_events = epoll_kev_put_events,
    390 	};
    391 	struct proc *p = l->l_proc;
    392 	file_t *epfp;
    393 	sigset_t oss;
    394 	int error = 0;
    395 
    396 	if (maxevents <= 0 || maxevents > EPOLL_MAX_EVENTS)
    397 		return EINVAL;
    398 
    399 	/*
    400 	 * Need to validate epfd separately from kevent1 to match
    401 	 * Linux's errno behaviour.
    402 	 */
    403 	epfp = fd_getfile(epfd);
    404 	if (epfp == NULL)
    405 		return EBADF;
    406 	if (epfp->f_type != DTYPE_KQUEUE)
    407 		error = EINVAL;
    408 	fd_putfile(epfd);
    409 	if (error != 0)
    410 		return error;
    411 
    412 	if (nssp != NULL) {
    413 		mutex_enter(p->p_lock);
    414 		error = sigprocmask1(l, SIG_SETMASK, nssp, &oss);
    415 		mutex_exit(p->p_lock);
    416 		if (error != 0)
    417 			return error;
    418 	}
    419 
    420 	error = kevent1(retval, epfd, NULL, 0, (struct kevent *)events,
    421 	    maxevents, tsp, &k_ops);
    422 	/*
    423 	 * Since we're not registering nay events, ENOMEM should not
    424 	 * be possible for this specific kevent1 call.
    425 	 */
    426 	KASSERT(error != ENOMEM);
    427 
    428 	if (nssp != NULL) {
    429 	        mutex_enter(p->p_lock);
    430 		error = sigprocmask1(l, SIG_SETMASK, &oss, NULL);
    431 		mutex_exit(p->p_lock);
    432 	}
    433 
    434 	return error;
    435 }
    436 
    437 /*
    438  * epoll_pwait2(2).
    439  */
    440 int
    441 sys_epoll_pwait2(struct lwp *l, const struct sys_epoll_pwait2_args *uap,
    442     register_t *retval)
    443 {
    444 	/* {
    445 		syscallarg(int) epfd;
    446 		syscallarg(struct epoll_event *) events;
    447 		syscallarg(int) maxevents;
    448 		syscallarg(struct timespec *) timeout;
    449 		syscallarg(sigset_t *) sigmask;
    450 	} */
    451 	struct epoll_event *events;
    452 	struct timespec ts, *tsp;
    453 	sigset_t ss, *ssp;
    454 	int error;
    455 	const int maxevents = SCARG(uap, maxevents);
    456 
    457 	if (maxevents <= 0 || maxevents >= EPOLL_MAX_EVENTS)
    458 		return EINVAL;
    459 
    460 	if (SCARG(uap, timeout) != NULL) {
    461 		error = copyin(SCARG(uap, timeout), &ts, sizeof(ts));
    462 		if (error != 0)
    463 			return error;
    464 
    465 		tsp = &ts;
    466 	} else
    467 		tsp = NULL;
    468 
    469 	if (SCARG(uap, sigmask) != NULL) {
    470 		error = copyin(SCARG(uap, sigmask), &ss, sizeof(ss));
    471 		if (error != 0)
    472 			return error;
    473 
    474 		ssp = &ss;
    475 	} else
    476 		ssp = NULL;
    477 
    478 	events = kmem_alloc(maxevents * sizeof(*events), KM_SLEEP);
    479 
    480 	error = epoll_wait_common(l, retval, SCARG(uap, epfd), events,
    481 	    maxevents, tsp, ssp);
    482 	if (error == 0)
    483 		error = copyout(events, SCARG(uap, events),
    484 		    *retval * sizeof(*events));
    485 
    486 	kmem_free(events, maxevents * sizeof(*events));
    487 	return error;
    488 }
    489 
    490 /*
    491  * Helper that registers a single kevent.
    492  */
    493 static int
    494 epoll_register_kevent(register_t *retval, int epfd, int fd, int filter,
    495     unsigned int flags)
    496 {
    497 	struct kevent kev;
    498 	struct kevent_ops k_ops = {
    499 		.keo_private = NULL,
    500 		.keo_fetch_timeout = NULL,
    501 		.keo_fetch_changes = epoll_kev_fetch_changes,
    502 		.keo_put_events = NULL,
    503 	};
    504 
    505 	EV_SET(&kev, fd, filter, flags, 0, 0, 0);
    506 
    507 	return kevent1(retval, epfd, &kev, 1, NULL, 0, NULL, &k_ops);
    508 }
    509 
    510 /*
    511  * Check if an fd is already registered in the kqueue referenced by epfd.
    512  */
    513 static int
    514 epoll_fd_registered(register_t *retval, int epfd, int fd)
    515 {
    516 	/*
    517 	 * Set empty filter flags to avoid accidental modification of already
    518 	 * registered events. In the case of event re-registration:
    519 	 * 1. If event does not exists kevent() does nothing and returns ENOENT
    520 	 * 2. If event does exists, it's enabled/disabled state is preserved
    521 	 *    but fflags, data and udata fields are overwritten. So we can not
    522 	 *    set socket lowats and store user's context pointer in udata.
    523 	 */
    524 	if (epoll_register_kevent(retval, epfd, fd, EVFILT_READ, 0) != ENOENT ||
    525 	    epoll_register_kevent(retval, epfd, fd, EVFILT_WRITE, 0) != ENOENT)
    526 		return 1;
    527 
    528 	return 0;
    529 }
    530 
    531 /*
    532  * Remove all events in the kqueue referenced by epfd that depend on
    533  * fd.
    534  */
    535 static int
    536 epoll_delete_all_events(register_t *retval, int epfd, int fd)
    537 {
    538 	int error1, error2;
    539 
    540 	error1 = epoll_register_kevent(retval, epfd, fd, EVFILT_READ,
    541 	    EV_DELETE);
    542 	error2 = epoll_register_kevent(retval, epfd, fd, EVFILT_WRITE,
    543 	    EV_DELETE);
    544 
    545 	/* return 0 if at least one result positive */
    546 	return error1 == 0 ? 0 : error2;
    547 }
    548 
    549 /*
    550  * Interate through all the knotes and recover a directed graph on
    551  * which kqueues are watching each other.
    552  *
    553  * If edges is NULL, the number of edges is still counted but no graph
    554  * is assembled.
    555  */
    556 static int
    557 epoll_recover_watch_tree(struct epoll_edge *edges, size_t nedges, size_t nfds) {
    558 	file_t *currfp, *targetfp;
    559 	struct knote *kn, *tmpkn;
    560 	size_t i, nedges_so_far = 0;
    561 
    562 	for (i = 0; i < nfds && (edges == NULL || nedges_so_far < nedges); i++)
    563 	{
    564 		currfp = fd_getfile(i);
    565 		if (currfp == NULL)
    566 			continue;
    567 		if (currfp->f_type != DTYPE_KQUEUE)
    568 			goto continue_count_outer;
    569 
    570 		SLIST_FOREACH_SAFE(kn, &currfp->f_kqueue->kq_sel.sel_klist,
    571 		    kn_selnext, tmpkn) {
    572 			targetfp = fd_getfile(kn->kn_kevent.kext_epfd);
    573 			if (targetfp == NULL)
    574 				continue;
    575 			if (targetfp->f_type == DTYPE_KQUEUE) {
    576 				if (edges != NULL) {
    577 					edges[nedges_so_far].epfd =
    578 					    kn->kn_kevent.kext_epfd;
    579 					edges[nedges_so_far].fd =
    580 					    kn->kn_kevent.kext_fd;
    581 				}
    582 				nedges_so_far++;
    583 			}
    584 
    585 			fd_putfile(kn->kn_kevent.kext_epfd);
    586 		}
    587 
    588 continue_count_outer:
    589 		fd_putfile(i);
    590 	}
    591 
    592 	return nedges_so_far;
    593 }
    594 
    595 /*
    596  * Run dfs on the graph described by edges, checking for loops and a
    597  * depth greater than EPOLL_MAX_DEPTH.
    598  */
    599 static int
    600 epoll_dfs(struct epoll_edge *edges, size_t nedges, struct epoll_seen *seen,
    601     size_t nseen, int currfd, int depth)
    602 {
    603 	int error;
    604 	size_t i;
    605 
    606 	KASSERT(edges != NULL);
    607 	KASSERT(seen != NULL);
    608 	KASSERT(nedges > 0);
    609 	KASSERT(currfd < nseen);
    610 	KASSERT(0 <= depth && depth <= EPOLL_MAX_DEPTH + 1);
    611 
    612 	if (__BITMAP_ISSET(currfd, seen))
    613 		return ELOOP;
    614 
    615 	__BITMAP_SET(currfd, seen);
    616 
    617 	depth++;
    618 	if (depth > EPOLL_MAX_DEPTH)
    619 		return EINVAL;
    620 
    621 	for (i = 0; i < nedges; i++) {
    622 		if (edges[i].epfd != currfd)
    623 			continue;
    624 
    625 		error = epoll_dfs(edges, nedges, seen, nseen,
    626 		    edges[i].fd, depth);
    627 		if (error != 0)
    628 			return error;
    629 	}
    630 
    631 	return 0;
    632 }
    633 
    634 /*
    635  * Check if adding fd to epfd would violate the maximum depth or
    636  * create a loop.
    637  */
    638 static int
    639 epoll_check_loop_and_depth(struct lwp *l, int epfd, int fd)
    640 {
    641 	int error;
    642 	file_t *fp;
    643 	struct epoll_edge *edges;
    644 	struct epoll_seen *seen;
    645 	size_t nedges, nfds, seen_size;
    646 	bool fdirrelevant;
    647 
    648 	/* If the target isn't another kqueue, we can skip this check */
    649 	fp = fd_getfile(fd);
    650 	if (fp == NULL)
    651 		return 0;
    652 	fdirrelevant = fp->f_type != DTYPE_KQUEUE;
    653 	fd_putfile(fd);
    654 	if (fdirrelevant)
    655 		return 0;
    656 
    657 	nfds = l->l_proc->p_fd->fd_lastfile + 1;
    658 
    659 	/*
    660 	 * We call epoll_recover_watch_tree twice, once to find the
    661 	 * number of edges, and once to actually fill them in.  We add one
    662 	 * because we want to include the edge epfd->fd.
    663 	 */
    664 	nedges = 1 + epoll_recover_watch_tree(NULL, 0, nfds);
    665 
    666 	edges = kmem_zalloc(nedges * sizeof(*edges), KM_SLEEP);
    667 
    668 	epoll_recover_watch_tree(edges + 1, nedges - 1, nfds);
    669 
    670 	edges[0].epfd = epfd;
    671 	edges[0].fd = fd;
    672 
    673 	seen_size = __BITMAP_SIZE(char, nfds);
    674 	seen = kmem_zalloc(seen_size, KM_SLEEP);
    675 
    676 	error = epoll_dfs(edges, nedges, seen, nfds, epfd, 0);
    677 
    678 	kmem_free(seen, seen_size);
    679 	kmem_free(edges, nedges * sizeof(*edges));
    680 
    681 	return error;
    682 }
    683