1 1.4 christos /* $NetBSD: sys_epoll.c,v 1.4 2023/07/30 18:31:13 christos Exp $ */ 2 1.1 christos 3 1.1 christos /*- 4 1.1 christos * SPDX-License-Identifier: BSD-2-Clause 5 1.1 christos * 6 1.1 christos * Copyright (c) 2007 Roman Divacky 7 1.1 christos * Copyright (c) 2014 Dmitry Chagin <dchagin (at) FreeBSD.org> 8 1.1 christos * 9 1.1 christos * Redistribution and use in source and binary forms, with or without 10 1.1 christos * modification, are permitted provided that the following conditions 11 1.1 christos * are met: 12 1.1 christos * 1. Redistributions of source code must retain the above copyright 13 1.1 christos * notice, this list of conditions and the following disclaimer. 14 1.1 christos * 2. Redistributions in binary form must reproduce the above copyright 15 1.1 christos * notice, this list of conditions and the following disclaimer in the 16 1.1 christos * documentation and/or other materials provided with the distribution. 17 1.1 christos * 18 1.1 christos * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 1.1 christos * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 1.1 christos * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 1.1 christos * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 1.1 christos * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 1.1 christos * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 1.1 christos * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 1.1 christos * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 1.1 christos * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 1.1 christos * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 1.1 christos * SUCH DAMAGE. 29 1.1 christos */ 30 1.1 christos #include <sys/cdefs.h> 31 1.4 christos __KERNEL_RCSID(0, "$NetBSD: sys_epoll.c,v 1.4 2023/07/30 18:31:13 christos Exp $"); 32 1.1 christos 33 1.1 christos 34 1.1 christos #include <sys/param.h> 35 1.1 christos #include <sys/types.h> 36 1.1 christos #include <sys/bitops.h> 37 1.1 christos #include <sys/epoll.h> 38 1.1 christos #include <sys/event.h> 39 1.1 christos #include <sys/eventvar.h> 40 1.1 christos #include <sys/errno.h> 41 1.1 christos #include <sys/file.h> 42 1.1 christos #include <sys/filedesc.h> 43 1.1 christos #include <sys/fcntl.h> 44 1.1 christos #include <sys/proc.h> 45 1.1 christos #include <sys/signal.h> 46 1.1 christos #include <sys/vnode.h> 47 1.1 christos 48 1.1 christos #include <sys/syscallargs.h> 49 1.1 christos 50 1.1 christos #define EPOLL_MAX_DEPTH 5 51 1.1 christos 52 1.1 christos #define EPOLL_EVRD (EPOLLIN|EPOLLRDNORM) 53 1.1 christos #define EPOLL_EVWR (EPOLLOUT|EPOLLWRNORM) 54 1.1 christos #define EPOLL_EVSUP (EPOLLET|EPOLLONESHOT|EPOLLHUP|EPOLLERR|EPOLLPRI \ 55 1.1 christos |EPOLL_EVRD|EPOLL_EVWR|EPOLLRDHUP) 56 1.1 christos 57 1.1 christos #define kext_data ext[0] 58 1.1 christos #define kext_epfd ext[1] 59 1.1 christos #define kext_fd ext[2] 60 1.1 christos 61 1.1 christos #if DEBUG 62 1.1 christos #define DPRINTF(x) uprintf x 63 1.1 christos #else 64 1.1 christos #define DPRINTF(x) __nothing 65 1.1 christos #endif 66 1.1 christos 67 1.1 christos struct epoll_edge { 68 1.1 christos int epfd; 69 1.1 christos int fd; 70 1.1 christos }; 71 1.1 christos 72 1.1 christos __BITMAP_TYPE(epoll_seen, char, 1); 73 1.1 christos 74 1.1 christos static int epoll_to_kevent(int, int, struct epoll_event *, struct kevent *, 75 1.1 christos int *); 76 1.1 christos static void kevent_to_epoll(struct kevent *, struct epoll_event *); 77 1.1 christos static int epoll_kev_put_events(void *, struct kevent *, struct kevent *, 78 1.1 christos size_t, int); 79 1.1 christos static int epoll_kev_fetch_changes(void *, const struct kevent *, 80 1.1 christos struct kevent *, size_t, int); 81 1.1 christos static int epoll_kev_fetch_timeout(const void *, void *, size_t); 82 1.1 christos static int epoll_register_kevent(register_t *, int, int, int, 83 1.1 christos unsigned int); 84 1.1 christos static int epoll_fd_registered(register_t *, int, int); 85 1.1 christos static int epoll_delete_all_events(register_t *, int, int); 86 1.1 christos static int epoll_recover_watch_tree(struct epoll_edge *, size_t, size_t); 87 1.1 christos static int epoll_dfs(struct epoll_edge *, size_t, struct epoll_seen *, 88 1.1 christos size_t, int, int); 89 1.1 christos static int epoll_check_loop_and_depth(struct lwp *, int, int); 90 1.1 christos 91 1.1 christos /* 92 1.1 christos * epoll_create1(2). Parse the flags and then create a kqueue instance. 93 1.1 christos */ 94 1.1 christos int 95 1.1 christos sys_epoll_create1(struct lwp *l, const struct sys_epoll_create1_args *uap, 96 1.1 christos register_t *retval) 97 1.1 christos { 98 1.1 christos /* { 99 1.1 christos syscallarg(int) flags; 100 1.1 christos } */ 101 1.1 christos struct sys_kqueue1_args kqa; 102 1.1 christos 103 1.4 christos if ((SCARG(uap, flags) & ~(EPOLL_CLOEXEC)) != 0) 104 1.1 christos return EINVAL; 105 1.1 christos 106 1.4 christos SCARG(&kqa, flags) = 0; 107 1.4 christos if (SCARG(uap, flags) & EPOLL_CLOEXEC) 108 1.4 christos SCARG(&kqa, flags) |= O_CLOEXEC; 109 1.1 christos 110 1.1 christos return sys_kqueue1(l, &kqa, retval); 111 1.1 christos } 112 1.1 christos 113 1.1 christos /* 114 1.1 christos * Structure converting function from epoll to kevent. 115 1.1 christos */ 116 1.1 christos static int 117 1.1 christos epoll_to_kevent(int epfd, int fd, struct epoll_event *l_event, 118 1.1 christos struct kevent *kevent, int *nkevents) 119 1.1 christos { 120 1.1 christos uint32_t levents = l_event->events; 121 1.1 christos uint32_t kev_flags = EV_ADD | EV_ENABLE; 122 1.1 christos 123 1.1 christos /* flags related to how event is registered */ 124 1.1 christos if ((levents & EPOLLONESHOT) != 0) 125 1.1 christos kev_flags |= EV_DISPATCH; 126 1.1 christos if ((levents & EPOLLET) != 0) 127 1.1 christos kev_flags |= EV_CLEAR; 128 1.1 christos if ((levents & EPOLLERR) != 0) 129 1.1 christos kev_flags |= EV_ERROR; 130 1.1 christos if ((levents & EPOLLRDHUP) != 0) 131 1.1 christos kev_flags |= EV_EOF; 132 1.1 christos 133 1.1 christos /* flags related to what event is registered */ 134 1.1 christos if ((levents & EPOLL_EVRD) != 0) { 135 1.1 christos EV_SET(kevent, fd, EVFILT_READ, kev_flags, 0, 0, 0); 136 1.1 christos kevent->kext_data = l_event->data; 137 1.1 christos kevent->kext_epfd = epfd; 138 1.1 christos kevent->kext_fd = fd; 139 1.1 christos ++kevent; 140 1.1 christos ++(*nkevents); 141 1.1 christos } 142 1.1 christos if ((levents & EPOLL_EVWR) != 0) { 143 1.1 christos EV_SET(kevent, fd, EVFILT_WRITE, kev_flags, 0, 0, 0); 144 1.1 christos kevent->kext_data = l_event->data; 145 1.1 christos kevent->kext_epfd = epfd; 146 1.1 christos kevent->kext_fd = fd; 147 1.1 christos ++kevent; 148 1.1 christos ++(*nkevents); 149 1.1 christos } 150 1.1 christos /* zero event mask is legal */ 151 1.1 christos if ((levents & (EPOLL_EVRD | EPOLL_EVWR)) == 0) { 152 1.1 christos EV_SET(kevent++, fd, EVFILT_READ, EV_ADD|EV_DISABLE, 0, 0, 0); 153 1.1 christos ++(*nkevents); 154 1.1 christos } 155 1.1 christos 156 1.1 christos if ((levents & ~(EPOLL_EVSUP)) != 0) { 157 1.1 christos return EINVAL; 158 1.1 christos } 159 1.1 christos 160 1.1 christos return 0; 161 1.1 christos } 162 1.1 christos 163 1.1 christos /* 164 1.1 christos * Structure converting function from kevent to epoll. In a case 165 1.1 christos * this is called on error in registration we store the error in 166 1.1 christos * event->data and pick it up later in sys_epoll_ctl(). 167 1.1 christos */ 168 1.1 christos static void 169 1.1 christos kevent_to_epoll(struct kevent *kevent, struct epoll_event *l_event) 170 1.1 christos { 171 1.1 christos 172 1.1 christos l_event->data = kevent->kext_data; 173 1.1 christos 174 1.1 christos if ((kevent->flags & EV_ERROR) != 0) { 175 1.1 christos l_event->events = EPOLLERR; 176 1.1 christos return; 177 1.1 christos } 178 1.1 christos 179 1.1 christos /* XXX EPOLLPRI, EPOLLHUP */ 180 1.1 christos switch (kevent->filter) { 181 1.1 christos case EVFILT_READ: 182 1.1 christos l_event->events = EPOLLIN; 183 1.1 christos if ((kevent->flags & EV_EOF) != 0) 184 1.1 christos l_event->events |= EPOLLRDHUP; 185 1.1 christos break; 186 1.1 christos case EVFILT_WRITE: 187 1.1 christos l_event->events = EPOLLOUT; 188 1.1 christos break; 189 1.1 christos default: 190 1.1 christos DPRINTF(("%s: unhandled kevent filter %d\n", __func__, 191 1.1 christos kevent->filter)); 192 1.1 christos break; 193 1.1 christos } 194 1.1 christos } 195 1.1 christos 196 1.1 christos /* 197 1.1 christos * Copyout callback used by kevent. This converts kevent events to 198 1.1 christos * epoll events that are located in args->eventlist. 199 1.1 christos */ 200 1.1 christos static int 201 1.1 christos epoll_kev_put_events(void *ctx, struct kevent *events, 202 1.1 christos struct kevent *eventlist, size_t index, int n) 203 1.1 christos { 204 1.1 christos int i; 205 1.1 christos struct epoll_event *eep = (struct epoll_event *)eventlist; 206 1.1 christos 207 1.1 christos KASSERT(n >= 0 && n < EPOLL_MAX_EVENTS); 208 1.1 christos 209 1.1 christos for (i = 0; i < n; i++) 210 1.1 christos kevent_to_epoll(events + i, eep + index + i); 211 1.1 christos 212 1.1 christos return 0; 213 1.1 christos } 214 1.1 christos 215 1.1 christos /* 216 1.1 christos * Copyin callback used by kevent. This copies already 217 1.1 christos * converted filters from kernel memory to the kevent 218 1.1 christos * internal kernel memory. Hence the memcpy instead of 219 1.1 christos * copyin. 220 1.1 christos */ 221 1.1 christos static int 222 1.1 christos epoll_kev_fetch_changes(void *ctx, const struct kevent *changelist, 223 1.1 christos struct kevent *changes, size_t index, int n) 224 1.1 christos { 225 1.1 christos KASSERT(n >= 0 && n < EPOLL_MAX_EVENTS); 226 1.1 christos 227 1.1 christos memcpy(changes, changelist + index, n * sizeof(*changes)); 228 1.1 christos 229 1.1 christos return 0; 230 1.1 christos } 231 1.1 christos 232 1.1 christos /* 233 1.1 christos * Timer copy callback used by kevent. Copies a converted timeout 234 1.1 christos * from kernel memory to kevent memory. Hence the memcpy instead of 235 1.1 christos * just using copyin. 236 1.1 christos */ 237 1.1 christos static int 238 1.1 christos epoll_kev_fetch_timeout(const void *src, void *dest, size_t size) 239 1.1 christos { 240 1.1 christos memcpy(dest, src, size); 241 1.1 christos 242 1.1 christos return 0; 243 1.1 christos } 244 1.1 christos 245 1.1 christos /* 246 1.1 christos * Load epoll filter, convert it to kevent filter and load it into 247 1.1 christos * kevent subsystem. 248 1.1 christos * 249 1.1 christos * event must point to kernel memory or be NULL. 250 1.1 christos */ 251 1.1 christos int 252 1.1 christos epoll_ctl_common(struct lwp *l, register_t *retval, int epfd, int op, int fd, 253 1.1 christos struct epoll_event *event) 254 1.1 christos { 255 1.1 christos struct kevent kev[2]; 256 1.3 rin struct kevent_ops k_ops = { 257 1.1 christos .keo_private = NULL, 258 1.1 christos .keo_fetch_timeout = NULL, 259 1.1 christos .keo_fetch_changes = epoll_kev_fetch_changes, 260 1.1 christos .keo_put_events = NULL, 261 1.1 christos }; 262 1.1 christos file_t *epfp, *fp; 263 1.1 christos int error = 0; 264 1.1 christos int nchanges = 0; 265 1.1 christos 266 1.1 christos /* 267 1.1 christos * Need to validate epfd and fd separately from kevent1 to match 268 1.1 christos * Linux's errno behaviour. 269 1.1 christos */ 270 1.1 christos epfp = fd_getfile(epfd); 271 1.1 christos if (epfp == NULL) 272 1.1 christos return EBADF; 273 1.1 christos if (epfp->f_type != DTYPE_KQUEUE) 274 1.1 christos error = EINVAL; 275 1.1 christos fd_putfile(epfd); 276 1.1 christos if (error != 0) 277 1.1 christos return error; 278 1.1 christos 279 1.1 christos fp = fd_getfile(fd); 280 1.1 christos if (fp == NULL) 281 1.1 christos return EBADF; 282 1.1 christos if (fp->f_type == DTYPE_VNODE) { 283 1.1 christos switch (fp->f_vnode->v_type) { 284 1.1 christos case VREG: 285 1.1 christos case VDIR: 286 1.1 christos case VBLK: 287 1.1 christos case VLNK: 288 1.1 christos error = EPERM; 289 1.1 christos break; 290 1.1 christos 291 1.1 christos default: 292 1.1 christos break; 293 1.1 christos } 294 1.1 christos } 295 1.1 christos fd_putfile(fd); 296 1.1 christos if (error != 0) 297 1.1 christos return error; 298 1.1 christos 299 1.1 christos /* Linux disallows spying on himself */ 300 1.1 christos if (epfd == fd) { 301 1.1 christos return EINVAL; 302 1.1 christos } 303 1.1 christos 304 1.1 christos if (op != EPOLL_CTL_DEL) { 305 1.1 christos error = epoll_to_kevent(epfd, fd, event, kev, &nchanges); 306 1.1 christos if (error != 0) 307 1.1 christos return error; 308 1.1 christos } 309 1.1 christos 310 1.1 christos switch (op) { 311 1.1 christos case EPOLL_CTL_MOD: 312 1.1 christos error = epoll_delete_all_events(retval, epfd, fd); 313 1.1 christos if (error != 0) 314 1.1 christos return error; 315 1.1 christos break; 316 1.1 christos 317 1.1 christos case EPOLL_CTL_ADD: 318 1.1 christos if (epoll_fd_registered(retval, epfd, fd)) 319 1.1 christos return EEXIST; 320 1.1 christos error = epoll_check_loop_and_depth(l, epfd, fd); 321 1.1 christos if (error != 0) 322 1.1 christos return error; 323 1.1 christos break; 324 1.1 christos 325 1.1 christos case EPOLL_CTL_DEL: 326 1.1 christos /* CTL_DEL means unregister this fd with this epoll */ 327 1.1 christos return epoll_delete_all_events(retval, epfd, fd); 328 1.1 christos 329 1.1 christos default: 330 1.2 pgoyette DPRINTF(("%s: invalid op %d\n", __func__, op)); 331 1.1 christos return EINVAL; 332 1.1 christos } 333 1.1 christos 334 1.1 christos error = kevent1(retval, epfd, kev, nchanges, NULL, 0, NULL, &k_ops); 335 1.1 christos 336 1.1 christos if (error == EOPNOTSUPP) { 337 1.1 christos error = EPERM; 338 1.1 christos } 339 1.1 christos 340 1.1 christos return error; 341 1.1 christos } 342 1.1 christos 343 1.1 christos /* 344 1.1 christos * epoll_ctl(2). Copyin event if necessary and then call 345 1.1 christos * epoll_ctl_common(). 346 1.1 christos */ 347 1.1 christos int 348 1.1 christos sys_epoll_ctl(struct lwp *l, const struct sys_epoll_ctl_args *uap, 349 1.1 christos register_t *retval) 350 1.1 christos { 351 1.1 christos /* { 352 1.1 christos syscallarg(int) epfd; 353 1.1 christos syscallarg(int) op; 354 1.1 christos syscallarg(int) fd; 355 1.1 christos syscallarg(struct epoll_event *) event; 356 1.1 christos } */ 357 1.1 christos struct epoll_event ee; 358 1.1 christos struct epoll_event *eep; 359 1.1 christos int error; 360 1.1 christos 361 1.1 christos if (SCARG(uap, op) != EPOLL_CTL_DEL) { 362 1.1 christos error = copyin(SCARG(uap, event), &ee, sizeof(ee)); 363 1.1 christos if (error != 0) 364 1.1 christos return error; 365 1.1 christos 366 1.1 christos eep = ⅇ 367 1.1 christos } else 368 1.1 christos eep = NULL; 369 1.1 christos 370 1.1 christos return epoll_ctl_common(l, retval, SCARG(uap, epfd), SCARG(uap, op), 371 1.1 christos SCARG(uap, fd), eep); 372 1.1 christos } 373 1.1 christos 374 1.1 christos /* 375 1.1 christos * Wait for a filter to be triggered on the epoll file descriptor. 376 1.1 christos * All of the epoll_*wait* syscalls eventually end up here. 377 1.1 christos * 378 1.1 christos * events, nss, and ssp must point to kernel memory (or be NULL). 379 1.1 christos */ 380 1.1 christos int 381 1.1 christos epoll_wait_common(struct lwp *l, register_t *retval, int epfd, 382 1.1 christos struct epoll_event *events, int maxevents, struct timespec *tsp, 383 1.1 christos const sigset_t *nssp) 384 1.1 christos { 385 1.1 christos struct kevent_ops k_ops = { 386 1.1 christos .keo_private = NULL, 387 1.1 christos .keo_fetch_timeout = epoll_kev_fetch_timeout, 388 1.1 christos .keo_fetch_changes = NULL, 389 1.1 christos .keo_put_events = epoll_kev_put_events, 390 1.1 christos }; 391 1.1 christos struct proc *p = l->l_proc; 392 1.1 christos file_t *epfp; 393 1.1 christos sigset_t oss; 394 1.1 christos int error = 0; 395 1.1 christos 396 1.1 christos if (maxevents <= 0 || maxevents > EPOLL_MAX_EVENTS) 397 1.1 christos return EINVAL; 398 1.1 christos 399 1.1 christos /* 400 1.1 christos * Need to validate epfd separately from kevent1 to match 401 1.1 christos * Linux's errno behaviour. 402 1.1 christos */ 403 1.1 christos epfp = fd_getfile(epfd); 404 1.1 christos if (epfp == NULL) 405 1.1 christos return EBADF; 406 1.1 christos if (epfp->f_type != DTYPE_KQUEUE) 407 1.1 christos error = EINVAL; 408 1.1 christos fd_putfile(epfd); 409 1.1 christos if (error != 0) 410 1.1 christos return error; 411 1.1 christos 412 1.1 christos if (nssp != NULL) { 413 1.1 christos mutex_enter(p->p_lock); 414 1.1 christos error = sigprocmask1(l, SIG_SETMASK, nssp, &oss); 415 1.1 christos mutex_exit(p->p_lock); 416 1.1 christos if (error != 0) 417 1.1 christos return error; 418 1.1 christos } 419 1.1 christos 420 1.1 christos error = kevent1(retval, epfd, NULL, 0, (struct kevent *)events, 421 1.1 christos maxevents, tsp, &k_ops); 422 1.1 christos /* 423 1.1 christos * Since we're not registering nay events, ENOMEM should not 424 1.1 christos * be possible for this specific kevent1 call. 425 1.1 christos */ 426 1.1 christos KASSERT(error != ENOMEM); 427 1.1 christos 428 1.1 christos if (nssp != NULL) { 429 1.1 christos mutex_enter(p->p_lock); 430 1.1 christos error = sigprocmask1(l, SIG_SETMASK, &oss, NULL); 431 1.1 christos mutex_exit(p->p_lock); 432 1.1 christos } 433 1.1 christos 434 1.1 christos return error; 435 1.1 christos } 436 1.1 christos 437 1.1 christos /* 438 1.1 christos * epoll_pwait2(2). 439 1.1 christos */ 440 1.1 christos int 441 1.1 christos sys_epoll_pwait2(struct lwp *l, const struct sys_epoll_pwait2_args *uap, 442 1.1 christos register_t *retval) 443 1.1 christos { 444 1.1 christos /* { 445 1.1 christos syscallarg(int) epfd; 446 1.1 christos syscallarg(struct epoll_event *) events; 447 1.1 christos syscallarg(int) maxevents; 448 1.1 christos syscallarg(struct timespec *) timeout; 449 1.1 christos syscallarg(sigset_t *) sigmask; 450 1.1 christos } */ 451 1.1 christos struct epoll_event *events; 452 1.1 christos struct timespec ts, *tsp; 453 1.1 christos sigset_t ss, *ssp; 454 1.1 christos int error; 455 1.1 christos const int maxevents = SCARG(uap, maxevents); 456 1.1 christos 457 1.1 christos if (maxevents <= 0 || maxevents >= EPOLL_MAX_EVENTS) 458 1.1 christos return EINVAL; 459 1.1 christos 460 1.1 christos if (SCARG(uap, timeout) != NULL) { 461 1.1 christos error = copyin(SCARG(uap, timeout), &ts, sizeof(ts)); 462 1.1 christos if (error != 0) 463 1.1 christos return error; 464 1.1 christos 465 1.1 christos tsp = &ts; 466 1.1 christos } else 467 1.1 christos tsp = NULL; 468 1.1 christos 469 1.1 christos if (SCARG(uap, sigmask) != NULL) { 470 1.1 christos error = copyin(SCARG(uap, sigmask), &ss, sizeof(ss)); 471 1.1 christos if (error != 0) 472 1.1 christos return error; 473 1.1 christos 474 1.1 christos ssp = &ss; 475 1.1 christos } else 476 1.1 christos ssp = NULL; 477 1.1 christos 478 1.1 christos events = kmem_alloc(maxevents * sizeof(*events), KM_SLEEP); 479 1.1 christos 480 1.1 christos error = epoll_wait_common(l, retval, SCARG(uap, epfd), events, 481 1.1 christos maxevents, tsp, ssp); 482 1.1 christos if (error == 0) 483 1.1 christos error = copyout(events, SCARG(uap, events), 484 1.1 christos *retval * sizeof(*events)); 485 1.1 christos 486 1.1 christos kmem_free(events, maxevents * sizeof(*events)); 487 1.1 christos return error; 488 1.1 christos } 489 1.1 christos 490 1.1 christos /* 491 1.1 christos * Helper that registers a single kevent. 492 1.1 christos */ 493 1.1 christos static int 494 1.1 christos epoll_register_kevent(register_t *retval, int epfd, int fd, int filter, 495 1.1 christos unsigned int flags) 496 1.1 christos { 497 1.1 christos struct kevent kev; 498 1.1 christos struct kevent_ops k_ops = { 499 1.1 christos .keo_private = NULL, 500 1.1 christos .keo_fetch_timeout = NULL, 501 1.1 christos .keo_fetch_changes = epoll_kev_fetch_changes, 502 1.1 christos .keo_put_events = NULL, 503 1.1 christos }; 504 1.1 christos 505 1.1 christos EV_SET(&kev, fd, filter, flags, 0, 0, 0); 506 1.1 christos 507 1.3 rin return kevent1(retval, epfd, &kev, 1, NULL, 0, NULL, &k_ops); 508 1.1 christos } 509 1.1 christos 510 1.1 christos /* 511 1.1 christos * Check if an fd is already registered in the kqueue referenced by epfd. 512 1.1 christos */ 513 1.1 christos static int 514 1.1 christos epoll_fd_registered(register_t *retval, int epfd, int fd) 515 1.1 christos { 516 1.1 christos /* 517 1.1 christos * Set empty filter flags to avoid accidental modification of already 518 1.1 christos * registered events. In the case of event re-registration: 519 1.1 christos * 1. If event does not exists kevent() does nothing and returns ENOENT 520 1.1 christos * 2. If event does exists, it's enabled/disabled state is preserved 521 1.1 christos * but fflags, data and udata fields are overwritten. So we can not 522 1.1 christos * set socket lowats and store user's context pointer in udata. 523 1.1 christos */ 524 1.1 christos if (epoll_register_kevent(retval, epfd, fd, EVFILT_READ, 0) != ENOENT || 525 1.1 christos epoll_register_kevent(retval, epfd, fd, EVFILT_WRITE, 0) != ENOENT) 526 1.1 christos return 1; 527 1.1 christos 528 1.1 christos return 0; 529 1.1 christos } 530 1.1 christos 531 1.1 christos /* 532 1.1 christos * Remove all events in the kqueue referenced by epfd that depend on 533 1.1 christos * fd. 534 1.1 christos */ 535 1.1 christos static int 536 1.1 christos epoll_delete_all_events(register_t *retval, int epfd, int fd) 537 1.1 christos { 538 1.1 christos int error1, error2; 539 1.1 christos 540 1.1 christos error1 = epoll_register_kevent(retval, epfd, fd, EVFILT_READ, 541 1.1 christos EV_DELETE); 542 1.1 christos error2 = epoll_register_kevent(retval, epfd, fd, EVFILT_WRITE, 543 1.1 christos EV_DELETE); 544 1.1 christos 545 1.1 christos /* return 0 if at least one result positive */ 546 1.1 christos return error1 == 0 ? 0 : error2; 547 1.1 christos } 548 1.1 christos 549 1.1 christos /* 550 1.1 christos * Interate through all the knotes and recover a directed graph on 551 1.1 christos * which kqueues are watching each other. 552 1.1 christos * 553 1.1 christos * If edges is NULL, the number of edges is still counted but no graph 554 1.1 christos * is assembled. 555 1.1 christos */ 556 1.1 christos static int 557 1.1 christos epoll_recover_watch_tree(struct epoll_edge *edges, size_t nedges, size_t nfds) { 558 1.1 christos file_t *currfp, *targetfp; 559 1.1 christos struct knote *kn, *tmpkn; 560 1.1 christos size_t i, nedges_so_far = 0; 561 1.1 christos 562 1.1 christos for (i = 0; i < nfds && (edges == NULL || nedges_so_far < nedges); i++) 563 1.1 christos { 564 1.1 christos currfp = fd_getfile(i); 565 1.1 christos if (currfp == NULL) 566 1.1 christos continue; 567 1.1 christos if (currfp->f_type != DTYPE_KQUEUE) 568 1.1 christos goto continue_count_outer; 569 1.1 christos 570 1.1 christos SLIST_FOREACH_SAFE(kn, &currfp->f_kqueue->kq_sel.sel_klist, 571 1.1 christos kn_selnext, tmpkn) { 572 1.1 christos targetfp = fd_getfile(kn->kn_kevent.kext_epfd); 573 1.1 christos if (targetfp == NULL) 574 1.1 christos continue; 575 1.1 christos if (targetfp->f_type == DTYPE_KQUEUE) { 576 1.1 christos if (edges != NULL) { 577 1.1 christos edges[nedges_so_far].epfd = 578 1.1 christos kn->kn_kevent.kext_epfd; 579 1.1 christos edges[nedges_so_far].fd = 580 1.1 christos kn->kn_kevent.kext_fd; 581 1.1 christos } 582 1.1 christos nedges_so_far++; 583 1.1 christos } 584 1.1 christos 585 1.1 christos fd_putfile(kn->kn_kevent.kext_epfd); 586 1.1 christos } 587 1.1 christos 588 1.1 christos continue_count_outer: 589 1.1 christos fd_putfile(i); 590 1.1 christos } 591 1.1 christos 592 1.1 christos return nedges_so_far; 593 1.1 christos } 594 1.1 christos 595 1.1 christos /* 596 1.1 christos * Run dfs on the graph described by edges, checking for loops and a 597 1.1 christos * depth greater than EPOLL_MAX_DEPTH. 598 1.1 christos */ 599 1.1 christos static int 600 1.1 christos epoll_dfs(struct epoll_edge *edges, size_t nedges, struct epoll_seen *seen, 601 1.1 christos size_t nseen, int currfd, int depth) 602 1.1 christos { 603 1.1 christos int error; 604 1.1 christos size_t i; 605 1.1 christos 606 1.1 christos KASSERT(edges != NULL); 607 1.1 christos KASSERT(seen != NULL); 608 1.1 christos KASSERT(nedges > 0); 609 1.1 christos KASSERT(currfd < nseen); 610 1.1 christos KASSERT(0 <= depth && depth <= EPOLL_MAX_DEPTH + 1); 611 1.1 christos 612 1.1 christos if (__BITMAP_ISSET(currfd, seen)) 613 1.1 christos return ELOOP; 614 1.1 christos 615 1.1 christos __BITMAP_SET(currfd, seen); 616 1.1 christos 617 1.1 christos depth++; 618 1.1 christos if (depth > EPOLL_MAX_DEPTH) 619 1.1 christos return EINVAL; 620 1.1 christos 621 1.1 christos for (i = 0; i < nedges; i++) { 622 1.1 christos if (edges[i].epfd != currfd) 623 1.1 christos continue; 624 1.1 christos 625 1.1 christos error = epoll_dfs(edges, nedges, seen, nseen, 626 1.1 christos edges[i].fd, depth); 627 1.1 christos if (error != 0) 628 1.1 christos return error; 629 1.1 christos } 630 1.1 christos 631 1.1 christos return 0; 632 1.1 christos } 633 1.1 christos 634 1.1 christos /* 635 1.1 christos * Check if adding fd to epfd would violate the maximum depth or 636 1.1 christos * create a loop. 637 1.1 christos */ 638 1.1 christos static int 639 1.1 christos epoll_check_loop_and_depth(struct lwp *l, int epfd, int fd) 640 1.1 christos { 641 1.1 christos int error; 642 1.1 christos file_t *fp; 643 1.1 christos struct epoll_edge *edges; 644 1.1 christos struct epoll_seen *seen; 645 1.1 christos size_t nedges, nfds, seen_size; 646 1.1 christos bool fdirrelevant; 647 1.1 christos 648 1.1 christos /* If the target isn't another kqueue, we can skip this check */ 649 1.1 christos fp = fd_getfile(fd); 650 1.1 christos if (fp == NULL) 651 1.1 christos return 0; 652 1.1 christos fdirrelevant = fp->f_type != DTYPE_KQUEUE; 653 1.1 christos fd_putfile(fd); 654 1.1 christos if (fdirrelevant) 655 1.1 christos return 0; 656 1.1 christos 657 1.1 christos nfds = l->l_proc->p_fd->fd_lastfile + 1; 658 1.1 christos 659 1.1 christos /* 660 1.1 christos * We call epoll_recover_watch_tree twice, once to find the 661 1.1 christos * number of edges, and once to actually fill them in. We add one 662 1.1 christos * because we want to include the edge epfd->fd. 663 1.1 christos */ 664 1.3 rin nedges = 1 + epoll_recover_watch_tree(NULL, 0, nfds); 665 1.1 christos 666 1.1 christos edges = kmem_zalloc(nedges * sizeof(*edges), KM_SLEEP); 667 1.1 christos 668 1.1 christos epoll_recover_watch_tree(edges + 1, nedges - 1, nfds); 669 1.1 christos 670 1.1 christos edges[0].epfd = epfd; 671 1.1 christos edges[0].fd = fd; 672 1.1 christos 673 1.1 christos seen_size = __BITMAP_SIZE(char, nfds); 674 1.1 christos seen = kmem_zalloc(seen_size, KM_SLEEP); 675 1.1 christos 676 1.1 christos error = epoll_dfs(edges, nedges, seen, nfds, epfd, 0); 677 1.1 christos 678 1.1 christos kmem_free(seen, seen_size); 679 1.1 christos kmem_free(edges, nedges * sizeof(*edges)); 680 1.1 christos 681 1.1 christos return error; 682 1.1 christos } 683