Home | History | Annotate | Line # | Download | only in kern
kern_event.c revision 1.1.1.1
      1 /*-
      2  * Copyright (c) 1999,2000,2001 Jonathan Lemon <jlemon (at) FreeBSD.org>
      3  * All rights reserved.
      4  *
      5  * Redistribution and use in source and binary forms, with or without
      6  * modification, are permitted provided that the following conditions
      7  * are met:
      8  * 1. Redistributions of source code must retain the above copyright
      9  *    notice, this list of conditions and the following disclaimer.
     10  * 2. Redistributions in binary form must reproduce the above copyright
     11  *    notice, this list of conditions and the following disclaimer in the
     12  *    documentation and/or other materials provided with the distribution.
     13  *
     14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
     15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
     18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     24  * SUCH DAMAGE.
     25  *
     26  * $FreeBSD: src/sys/kern/kern_event.c,v 1.27 2001/07/05 17:10:44 rwatson Exp $
     27  */
     28 
     29 #include <sys/param.h>
     30 #include <sys/systm.h>
     31 #include <sys/kernel.h>
     32 #include <sys/lock.h>
     33 #include <sys/mutex.h>
     34 #include <sys/proc.h>
     35 #include <sys/malloc.h>
     36 #include <sys/unistd.h>
     37 #include <sys/file.h>
     38 #include <sys/fcntl.h>
     39 #include <sys/selinfo.h>
     40 #include <sys/queue.h>
     41 #include <sys/event.h>
     42 #include <sys/eventvar.h>
     43 #include <sys/poll.h>
     44 #include <sys/protosw.h>
     45 #include <sys/socket.h>
     46 #include <sys/socketvar.h>
     47 #include <sys/stat.h>
     48 #include <sys/sysproto.h>
     49 #include <sys/uio.h>
     50 
     51 #include <vm/vm_zone.h>
     52 
     53 static int	kqueue_scan(struct file *fp, int maxevents,
     54 		    struct kevent *ulistp, const struct timespec *timeout,
     55 		    struct proc *p);
     56 static int 	kqueue_read(struct file *fp, struct uio *uio,
     57 		    struct ucred *cred, int flags, struct proc *p);
     58 static int	kqueue_write(struct file *fp, struct uio *uio,
     59 		    struct ucred *cred, int flags, struct proc *p);
     60 static int	kqueue_ioctl(struct file *fp, u_long com, caddr_t data,
     61 		    struct proc *p);
     62 static int 	kqueue_poll(struct file *fp, int events, struct ucred *cred,
     63 		    struct proc *p);
     64 static int 	kqueue_kqfilter(struct file *fp, struct knote *kn);
     65 static int 	kqueue_stat(struct file *fp, struct stat *st, struct proc *p);
     66 static int 	kqueue_close(struct file *fp, struct proc *p);
     67 static void 	kqueue_wakeup(struct kqueue *kq);
     68 
     69 static struct fileops kqueueops = {
     70 	kqueue_read,
     71 	kqueue_write,
     72 	kqueue_ioctl,
     73 	kqueue_poll,
     74 	kqueue_kqfilter,
     75 	kqueue_stat,
     76 	kqueue_close
     77 };
     78 
     79 static void 	knote_attach(struct knote *kn, struct filedesc *fdp);
     80 static void 	knote_drop(struct knote *kn, struct proc *p);
     81 static void 	knote_enqueue(struct knote *kn);
     82 static void 	knote_dequeue(struct knote *kn);
     83 static void 	knote_init(void);
     84 static struct 	knote *knote_alloc(void);
     85 static void 	knote_free(struct knote *kn);
     86 
     87 static void	filt_kqdetach(struct knote *kn);
     88 static int	filt_kqueue(struct knote *kn, long hint);
     89 static int	filt_procattach(struct knote *kn);
     90 static void	filt_procdetach(struct knote *kn);
     91 static int	filt_proc(struct knote *kn, long hint);
     92 static int	filt_fileattach(struct knote *kn);
     93 
     94 static struct filterops kqread_filtops =
     95 	{ 1, NULL, filt_kqdetach, filt_kqueue };
     96 static struct filterops proc_filtops =
     97 	{ 0, filt_procattach, filt_procdetach, filt_proc };
     98 static struct filterops file_filtops =
     99 	{ 1, filt_fileattach, NULL, NULL };
    100 
    101 static vm_zone_t	knote_zone;
    102 
    103 #define KNOTE_ACTIVATE(kn) do { 					\
    104 	kn->kn_status |= KN_ACTIVE;					\
    105 	if ((kn->kn_status & (KN_QUEUED | KN_DISABLED)) == 0)		\
    106 		knote_enqueue(kn);					\
    107 } while(0)
    108 
    109 #define	KN_HASHSIZE		64		/* XXX should be tunable */
    110 #define KN_HASH(val, mask)	(((val) ^ (val >> 8)) & (mask))
    111 
    112 extern struct filterops aio_filtops;
    113 extern struct filterops sig_filtops;
    114 
    115 /*
    116  * Table for for all system-defined filters.
    117  */
    118 static struct filterops *sysfilt_ops[] = {
    119 	&file_filtops,			/* EVFILT_READ */
    120 	&file_filtops,			/* EVFILT_WRITE */
    121 	&aio_filtops,			/* EVFILT_AIO */
    122 	&file_filtops,			/* EVFILT_VNODE */
    123 	&proc_filtops,			/* EVFILT_PROC */
    124 	&sig_filtops,			/* EVFILT_SIGNAL */
    125 };
    126 
    127 static int
    128 filt_fileattach(struct knote *kn)
    129 {
    130 
    131 	return (fo_kqfilter(kn->kn_fp, kn));
    132 }
    133 
    134 /*ARGSUSED*/
    135 static int
    136 kqueue_kqfilter(struct file *fp, struct knote *kn)
    137 {
    138 	struct kqueue *kq = (struct kqueue *)kn->kn_fp->f_data;
    139 
    140 	if (kn->kn_filter != EVFILT_READ)
    141 		return (1);
    142 
    143 	kn->kn_fop = &kqread_filtops;
    144 	SLIST_INSERT_HEAD(&kq->kq_sel.si_note, kn, kn_selnext);
    145 	return (0);
    146 }
    147 
    148 static void
    149 filt_kqdetach(struct knote *kn)
    150 {
    151 	struct kqueue *kq = (struct kqueue *)kn->kn_fp->f_data;
    152 
    153 	SLIST_REMOVE(&kq->kq_sel.si_note, kn, knote, kn_selnext);
    154 }
    155 
    156 /*ARGSUSED*/
    157 static int
    158 filt_kqueue(struct knote *kn, long hint)
    159 {
    160 	struct kqueue *kq = (struct kqueue *)kn->kn_fp->f_data;
    161 
    162 	kn->kn_data = kq->kq_count;
    163 	return (kn->kn_data > 0);
    164 }
    165 
    166 static int
    167 filt_procattach(struct knote *kn)
    168 {
    169 	struct proc *p;
    170 	int error;
    171 
    172 	p = pfind(kn->kn_id);
    173 	if (p == NULL)
    174 		return (ESRCH);
    175 	if ((error = p_cansee(curproc, p))) {
    176 		PROC_UNLOCK(p);
    177 		return (error);
    178 	}
    179 
    180 	kn->kn_ptr.p_proc = p;
    181 	kn->kn_flags |= EV_CLEAR;		/* automatically set */
    182 
    183 	/*
    184 	 * internal flag indicating registration done by kernel
    185 	 */
    186 	if (kn->kn_flags & EV_FLAG1) {
    187 		kn->kn_data = kn->kn_sdata;		/* ppid */
    188 		kn->kn_fflags = NOTE_CHILD;
    189 		kn->kn_flags &= ~EV_FLAG1;
    190 	}
    191 
    192 	SLIST_INSERT_HEAD(&p->p_klist, kn, kn_selnext);
    193 	PROC_UNLOCK(p);
    194 
    195 	return (0);
    196 }
    197 
    198 /*
    199  * The knote may be attached to a different process, which may exit,
    200  * leaving nothing for the knote to be attached to.  So when the process
    201  * exits, the knote is marked as DETACHED and also flagged as ONESHOT so
    202  * it will be deleted when read out.  However, as part of the knote deletion,
    203  * this routine is called, so a check is needed to avoid actually performing
    204  * a detach, because the original process does not exist any more.
    205  */
    206 static void
    207 filt_procdetach(struct knote *kn)
    208 {
    209 	struct proc *p = kn->kn_ptr.p_proc;
    210 
    211 	if (kn->kn_status & KN_DETACHED)
    212 		return;
    213 
    214 	PROC_LOCK(p);
    215 	SLIST_REMOVE(&p->p_klist, kn, knote, kn_selnext);
    216 	PROC_UNLOCK(p);
    217 }
    218 
    219 static int
    220 filt_proc(struct knote *kn, long hint)
    221 {
    222 	u_int event;
    223 
    224 	/*
    225 	 * mask off extra data
    226 	 */
    227 	event = (u_int)hint & NOTE_PCTRLMASK;
    228 
    229 	/*
    230 	 * if the user is interested in this event, record it.
    231 	 */
    232 	if (kn->kn_sfflags & event)
    233 		kn->kn_fflags |= event;
    234 
    235 	/*
    236 	 * process is gone, so flag the event as finished.
    237 	 */
    238 	if (event == NOTE_EXIT) {
    239 		kn->kn_status |= KN_DETACHED;
    240 		kn->kn_flags |= (EV_EOF | EV_ONESHOT);
    241 		return (1);
    242 	}
    243 
    244 	/*
    245 	 * process forked, and user wants to track the new process,
    246 	 * so attach a new knote to it, and immediately report an
    247 	 * event with the parent's pid.
    248 	 */
    249 	if ((event == NOTE_FORK) && (kn->kn_sfflags & NOTE_TRACK)) {
    250 		struct kevent kev;
    251 		int error;
    252 
    253 		/*
    254 		 * register knote with new process.
    255 		 */
    256 		kev.ident = hint & NOTE_PDATAMASK;	/* pid */
    257 		kev.filter = kn->kn_filter;
    258 		kev.flags = kn->kn_flags | EV_ADD | EV_ENABLE | EV_FLAG1;
    259 		kev.fflags = kn->kn_sfflags;
    260 		kev.data = kn->kn_id;			/* parent */
    261 		kev.udata = kn->kn_kevent.udata;	/* preserve udata */
    262 		error = kqueue_register(kn->kn_kq, &kev, NULL);
    263 		if (error)
    264 			kn->kn_fflags |= NOTE_TRACKERR;
    265 	}
    266 
    267 	return (kn->kn_fflags != 0);
    268 }
    269 
    270 int
    271 kqueue(struct proc *p, struct kqueue_args *uap)
    272 {
    273 	struct filedesc *fdp = p->p_fd;
    274 	struct kqueue *kq;
    275 	struct file *fp;
    276 	int fd, error;
    277 
    278 	error = falloc(p, &fp, &fd);
    279 	if (error)
    280 		return (error);
    281 	fp->f_flag = FREAD | FWRITE;
    282 	fp->f_type = DTYPE_KQUEUE;
    283 	fp->f_ops = &kqueueops;
    284 	kq = malloc(sizeof(struct kqueue), M_TEMP, M_WAITOK | M_ZERO);
    285 	TAILQ_INIT(&kq->kq_head);
    286 	fp->f_data = (caddr_t)kq;
    287 	p->p_retval[0] = fd;
    288 	if (fdp->fd_knlistsize < 0)
    289 		fdp->fd_knlistsize = 0;		/* this process has a kq */
    290 	kq->kq_fdp = fdp;
    291 	return (error);
    292 }
    293 
    294 #ifndef _SYS_SYSPROTO_H_
    295 struct kevent_args {
    296 	int	fd;
    297 	const struct kevent *changelist;
    298 	int	nchanges;
    299 	struct	kevent *eventlist;
    300 	int	nevents;
    301 	const struct timespec *timeout;
    302 };
    303 #endif
    304 int
    305 kevent(struct proc *p, struct kevent_args *uap)
    306 {
    307 	struct filedesc* fdp = p->p_fd;
    308 	struct kevent *kevp;
    309 	struct kqueue *kq;
    310 	struct file *fp = NULL;
    311 	struct timespec ts;
    312 	int i, n, nerrors, error;
    313 
    314         if (((u_int)uap->fd) >= fdp->fd_nfiles ||
    315             (fp = fdp->fd_ofiles[uap->fd]) == NULL ||
    316 	    (fp->f_type != DTYPE_KQUEUE))
    317 		return (EBADF);
    318 
    319 	fhold(fp);
    320 
    321 	if (uap->timeout != NULL) {
    322 		error = copyin(uap->timeout, &ts, sizeof(ts));
    323 		if (error)
    324 			goto done;
    325 		uap->timeout = &ts;
    326 	}
    327 
    328 	kq = (struct kqueue *)fp->f_data;
    329 	nerrors = 0;
    330 
    331 	while (uap->nchanges > 0) {
    332 		n = uap->nchanges > KQ_NEVENTS ? KQ_NEVENTS : uap->nchanges;
    333 		error = copyin(uap->changelist, kq->kq_kev,
    334 		    n * sizeof(struct kevent));
    335 		if (error)
    336 			goto done;
    337 		for (i = 0; i < n; i++) {
    338 			kevp = &kq->kq_kev[i];
    339 			kevp->flags &= ~EV_SYSFLAGS;
    340 			error = kqueue_register(kq, kevp, p);
    341 			if (error) {
    342 				if (uap->nevents != 0) {
    343 					kevp->flags = EV_ERROR;
    344 					kevp->data = error;
    345 					(void) copyout((caddr_t)kevp,
    346 					    (caddr_t)uap->eventlist,
    347 					    sizeof(*kevp));
    348 					uap->eventlist++;
    349 					uap->nevents--;
    350 					nerrors++;
    351 				} else {
    352 					goto done;
    353 				}
    354 			}
    355 		}
    356 		uap->nchanges -= n;
    357 		uap->changelist += n;
    358 	}
    359 	if (nerrors) {
    360         	p->p_retval[0] = nerrors;
    361 		error = 0;
    362 		goto done;
    363 	}
    364 
    365 	error = kqueue_scan(fp, uap->nevents, uap->eventlist, uap->timeout, p);
    366 done:
    367 	if (fp != NULL)
    368 		fdrop(fp, p);
    369 	return (error);
    370 }
    371 
    372 int
    373 kqueue_register(struct kqueue *kq, struct kevent *kev, struct proc *p)
    374 {
    375 	struct filedesc *fdp = kq->kq_fdp;
    376 	struct filterops *fops;
    377 	struct file *fp = NULL;
    378 	struct knote *kn = NULL;
    379 	int s, error = 0;
    380 
    381 	if (kev->filter < 0) {
    382 		if (kev->filter + EVFILT_SYSCOUNT < 0)
    383 			return (EINVAL);
    384 		fops = sysfilt_ops[~kev->filter];	/* to 0-base index */
    385 	} else {
    386 		/*
    387 		 * XXX
    388 		 * filter attach routine is responsible for insuring that
    389 		 * the identifier can be attached to it.
    390 		 */
    391 		printf("unknown filter: %d\n", kev->filter);
    392 		return (EINVAL);
    393 	}
    394 
    395 	if (fops->f_isfd) {
    396 		/* validate descriptor */
    397 		if ((u_int)kev->ident >= fdp->fd_nfiles ||
    398 		    (fp = fdp->fd_ofiles[kev->ident]) == NULL)
    399 			return (EBADF);
    400 		fhold(fp);
    401 
    402 		if (kev->ident < fdp->fd_knlistsize) {
    403 			SLIST_FOREACH(kn, &fdp->fd_knlist[kev->ident], kn_link)
    404 				if (kq == kn->kn_kq &&
    405 				    kev->filter == kn->kn_filter)
    406 					break;
    407 		}
    408 	} else {
    409 		if (fdp->fd_knhashmask != 0) {
    410 			struct klist *list;
    411 
    412 			list = &fdp->fd_knhash[
    413 			    KN_HASH((u_long)kev->ident, fdp->fd_knhashmask)];
    414 			SLIST_FOREACH(kn, list, kn_link)
    415 				if (kev->ident == kn->kn_id &&
    416 				    kq == kn->kn_kq &&
    417 				    kev->filter == kn->kn_filter)
    418 					break;
    419 		}
    420 	}
    421 
    422 	if (kn == NULL && ((kev->flags & EV_ADD) == 0)) {
    423 		error = ENOENT;
    424 		goto done;
    425 	}
    426 
    427 	/*
    428 	 * kn now contains the matching knote, or NULL if no match
    429 	 */
    430 	if (kev->flags & EV_ADD) {
    431 
    432 		if (kn == NULL) {
    433 			kn = knote_alloc();
    434 			if (kn == NULL) {
    435 				error = ENOMEM;
    436 				goto done;
    437 			}
    438 			kn->kn_fp = fp;
    439 			kn->kn_kq = kq;
    440 			kn->kn_fop = fops;
    441 
    442 			/*
    443 			 * apply reference count to knote structure, and
    444 			 * do not release it at the end of this routine.
    445 			 */
    446 			fp = NULL;
    447 
    448 			kn->kn_sfflags = kev->fflags;
    449 			kn->kn_sdata = kev->data;
    450 			kev->fflags = 0;
    451 			kev->data = 0;
    452 			kn->kn_kevent = *kev;
    453 
    454 			knote_attach(kn, fdp);
    455 			if ((error = fops->f_attach(kn)) != 0) {
    456 				knote_drop(kn, p);
    457 				goto done;
    458 			}
    459 		} else {
    460 			/*
    461 			 * The user may change some filter values after the
    462 			 * initial EV_ADD, but doing so will not reset any
    463 			 * filter which have already been triggered.
    464 			 */
    465 			kn->kn_sfflags = kev->fflags;
    466 			kn->kn_sdata = kev->data;
    467 			kn->kn_kevent.udata = kev->udata;
    468 		}
    469 
    470 		s = splhigh();
    471 		if (kn->kn_fop->f_event(kn, 0))
    472 			KNOTE_ACTIVATE(kn);
    473 		splx(s);
    474 
    475 	} else if (kev->flags & EV_DELETE) {
    476 		kn->kn_fop->f_detach(kn);
    477 		knote_drop(kn, p);
    478 		goto done;
    479 	}
    480 
    481 	if ((kev->flags & EV_DISABLE) &&
    482 	    ((kn->kn_status & KN_DISABLED) == 0)) {
    483 		s = splhigh();
    484 		kn->kn_status |= KN_DISABLED;
    485 		splx(s);
    486 	}
    487 
    488 	if ((kev->flags & EV_ENABLE) && (kn->kn_status & KN_DISABLED)) {
    489 		s = splhigh();
    490 		kn->kn_status &= ~KN_DISABLED;
    491 		if ((kn->kn_status & KN_ACTIVE) &&
    492 		    ((kn->kn_status & KN_QUEUED) == 0))
    493 			knote_enqueue(kn);
    494 		splx(s);
    495 	}
    496 
    497 done:
    498 	if (fp != NULL)
    499 		fdrop(fp, p);
    500 	return (error);
    501 }
    502 
    503 static int
    504 kqueue_scan(struct file *fp, int maxevents, struct kevent *ulistp,
    505 	const struct timespec *tsp, struct proc *p)
    506 {
    507 	struct kqueue *kq = (struct kqueue *)fp->f_data;
    508 	struct kevent *kevp;
    509 	struct timeval atv, rtv, ttv;
    510 	struct knote *kn, marker;
    511 	int s, count, timeout, nkev = 0, error = 0;
    512 
    513 	count = maxevents;
    514 	if (count == 0)
    515 		goto done;
    516 
    517 	if (tsp != NULL) {
    518 		TIMESPEC_TO_TIMEVAL(&atv, tsp);
    519 		if (itimerfix(&atv)) {
    520 			error = EINVAL;
    521 			goto done;
    522 		}
    523 		if (tsp->tv_sec == 0 && tsp->tv_nsec == 0)
    524 			timeout = -1;
    525 		else
    526 			timeout = atv.tv_sec > 24 * 60 * 60 ?
    527 			    24 * 60 * 60 * hz : tvtohz(&atv);
    528 		getmicrouptime(&rtv);
    529 		timevaladd(&atv, &rtv);
    530 	} else {
    531 		atv.tv_sec = 0;
    532 		atv.tv_usec = 0;
    533 		timeout = 0;
    534 	}
    535 	goto start;
    536 
    537 retry:
    538 	if (atv.tv_sec || atv.tv_usec) {
    539 		getmicrouptime(&rtv);
    540 		if (timevalcmp(&rtv, &atv, >=))
    541 			goto done;
    542 		ttv = atv;
    543 		timevalsub(&ttv, &rtv);
    544 		timeout = ttv.tv_sec > 24 * 60 * 60 ?
    545 			24 * 60 * 60 * hz : tvtohz(&ttv);
    546 	}
    547 
    548 start:
    549 	kevp = kq->kq_kev;
    550 	s = splhigh();
    551 	if (kq->kq_count == 0) {
    552 		if (timeout < 0) {
    553 			error = EWOULDBLOCK;
    554 		} else {
    555 			kq->kq_state |= KQ_SLEEP;
    556 			error = tsleep(kq, PSOCK | PCATCH, "kqread", timeout);
    557 		}
    558 		splx(s);
    559 		if (error == 0)
    560 			goto retry;
    561 		/* don't restart after signals... */
    562 		if (error == ERESTART)
    563 			error = EINTR;
    564 		else if (error == EWOULDBLOCK)
    565 			error = 0;
    566 		goto done;
    567 	}
    568 
    569 	TAILQ_INSERT_TAIL(&kq->kq_head, &marker, kn_tqe);
    570 	while (count) {
    571 		kn = TAILQ_FIRST(&kq->kq_head);
    572 		TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe);
    573 		if (kn == &marker) {
    574 			splx(s);
    575 			if (count == maxevents)
    576 				goto retry;
    577 			goto done;
    578 		}
    579 		if (kn->kn_status & KN_DISABLED) {
    580 			kn->kn_status &= ~KN_QUEUED;
    581 			kq->kq_count--;
    582 			continue;
    583 		}
    584 		if ((kn->kn_flags & EV_ONESHOT) == 0 &&
    585 		    kn->kn_fop->f_event(kn, 0) == 0) {
    586 			kn->kn_status &= ~(KN_QUEUED | KN_ACTIVE);
    587 			kq->kq_count--;
    588 			continue;
    589 		}
    590 		*kevp = kn->kn_kevent;
    591 		kevp++;
    592 		nkev++;
    593 		if (kn->kn_flags & EV_ONESHOT) {
    594 			kn->kn_status &= ~KN_QUEUED;
    595 			kq->kq_count--;
    596 			splx(s);
    597 			kn->kn_fop->f_detach(kn);
    598 			knote_drop(kn, p);
    599 			s = splhigh();
    600 		} else if (kn->kn_flags & EV_CLEAR) {
    601 			kn->kn_data = 0;
    602 			kn->kn_fflags = 0;
    603 			kn->kn_status &= ~(KN_QUEUED | KN_ACTIVE);
    604 			kq->kq_count--;
    605 		} else {
    606 			TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe);
    607 		}
    608 		count--;
    609 		if (nkev == KQ_NEVENTS) {
    610 			splx(s);
    611 			error = copyout((caddr_t)&kq->kq_kev, (caddr_t)ulistp,
    612 			    sizeof(struct kevent) * nkev);
    613 			ulistp += nkev;
    614 			nkev = 0;
    615 			kevp = kq->kq_kev;
    616 			s = splhigh();
    617 			if (error)
    618 				break;
    619 		}
    620 	}
    621 	TAILQ_REMOVE(&kq->kq_head, &marker, kn_tqe);
    622 	splx(s);
    623 done:
    624 	if (nkev != 0)
    625 		error = copyout((caddr_t)&kq->kq_kev, (caddr_t)ulistp,
    626 		    sizeof(struct kevent) * nkev);
    627         p->p_retval[0] = maxevents - count;
    628 	return (error);
    629 }
    630 
    631 /*
    632  * XXX
    633  * This could be expanded to call kqueue_scan, if desired.
    634  */
    635 /*ARGSUSED*/
    636 static int
    637 kqueue_read(struct file *fp, struct uio *uio, struct ucred *cred,
    638 	int flags, struct proc *p)
    639 {
    640 	return (ENXIO);
    641 }
    642 
    643 /*ARGSUSED*/
    644 static int
    645 kqueue_write(struct file *fp, struct uio *uio, struct ucred *cred,
    646 	 int flags, struct proc *p)
    647 {
    648 	return (ENXIO);
    649 }
    650 
    651 /*ARGSUSED*/
    652 static int
    653 kqueue_ioctl(struct file *fp, u_long com, caddr_t data, struct proc *p)
    654 {
    655 	return (ENOTTY);
    656 }
    657 
    658 /*ARGSUSED*/
    659 static int
    660 kqueue_poll(struct file *fp, int events, struct ucred *cred, struct proc *p)
    661 {
    662 	struct kqueue *kq = (struct kqueue *)fp->f_data;
    663 	int revents = 0;
    664 	int s = splnet();
    665 
    666         if (events & (POLLIN | POLLRDNORM)) {
    667                 if (kq->kq_count) {
    668                         revents |= events & (POLLIN | POLLRDNORM);
    669 		} else {
    670                         selrecord(p, &kq->kq_sel);
    671 			kq->kq_state |= KQ_SEL;
    672 		}
    673 	}
    674 	splx(s);
    675 	return (revents);
    676 }
    677 
    678 /*ARGSUSED*/
    679 static int
    680 kqueue_stat(struct file *fp, struct stat *st, struct proc *p)
    681 {
    682 	struct kqueue *kq = (struct kqueue *)fp->f_data;
    683 
    684 	bzero((void *)st, sizeof(*st));
    685 	st->st_size = kq->kq_count;
    686 	st->st_blksize = sizeof(struct kevent);
    687 	st->st_mode = S_IFIFO;
    688 	return (0);
    689 }
    690 
    691 /*ARGSUSED*/
    692 static int
    693 kqueue_close(struct file *fp, struct proc *p)
    694 {
    695 	struct kqueue *kq = (struct kqueue *)fp->f_data;
    696 	struct filedesc *fdp = p->p_fd;
    697 	struct knote **knp, *kn, *kn0;
    698 	int i;
    699 
    700 	for (i = 0; i < fdp->fd_knlistsize; i++) {
    701 		knp = &SLIST_FIRST(&fdp->fd_knlist[i]);
    702 		kn = *knp;
    703 		while (kn != NULL) {
    704 			kn0 = SLIST_NEXT(kn, kn_link);
    705 			if (kq == kn->kn_kq) {
    706 				kn->kn_fop->f_detach(kn);
    707 				fdrop(kn->kn_fp, p);
    708 				knote_free(kn);
    709 				*knp = kn0;
    710 			} else {
    711 				knp = &SLIST_NEXT(kn, kn_link);
    712 			}
    713 			kn = kn0;
    714 		}
    715 	}
    716 	if (fdp->fd_knhashmask != 0) {
    717 		for (i = 0; i < fdp->fd_knhashmask + 1; i++) {
    718 			knp = &SLIST_FIRST(&fdp->fd_knhash[i]);
    719 			kn = *knp;
    720 			while (kn != NULL) {
    721 				kn0 = SLIST_NEXT(kn, kn_link);
    722 				if (kq == kn->kn_kq) {
    723 					kn->kn_fop->f_detach(kn);
    724 		/* XXX non-fd release of kn->kn_ptr */
    725 					knote_free(kn);
    726 					*knp = kn0;
    727 				} else {
    728 					knp = &SLIST_NEXT(kn, kn_link);
    729 				}
    730 				kn = kn0;
    731 			}
    732 		}
    733 	}
    734 	free(kq, M_TEMP);
    735 	fp->f_data = NULL;
    736 
    737 	return (0);
    738 }
    739 
    740 static void
    741 kqueue_wakeup(struct kqueue *kq)
    742 {
    743 
    744 	if (kq->kq_state & KQ_SLEEP) {
    745 		kq->kq_state &= ~KQ_SLEEP;
    746 		wakeup(kq);
    747 	}
    748 	if (kq->kq_state & KQ_SEL) {
    749 		kq->kq_state &= ~KQ_SEL;
    750 		selwakeup(&kq->kq_sel);
    751 	}
    752 	KNOTE(&kq->kq_sel.si_note, 0);
    753 }
    754 
    755 /*
    756  * walk down a list of knotes, activating them if their event has triggered.
    757  */
    758 void
    759 knote(struct klist *list, long hint)
    760 {
    761 	struct knote *kn;
    762 
    763 	SLIST_FOREACH(kn, list, kn_selnext)
    764 		if (kn->kn_fop->f_event(kn, hint))
    765 			KNOTE_ACTIVATE(kn);
    766 }
    767 
    768 /*
    769  * remove all knotes from a specified klist
    770  */
    771 void
    772 knote_remove(struct proc *p, struct klist *list)
    773 {
    774 	struct knote *kn;
    775 
    776 	while ((kn = SLIST_FIRST(list)) != NULL) {
    777 		kn->kn_fop->f_detach(kn);
    778 		knote_drop(kn, p);
    779 	}
    780 }
    781 
    782 /*
    783  * remove all knotes referencing a specified fd
    784  */
    785 void
    786 knote_fdclose(struct proc *p, int fd)
    787 {
    788 	struct filedesc *fdp = p->p_fd;
    789 	struct klist *list = &fdp->fd_knlist[fd];
    790 
    791 	knote_remove(p, list);
    792 }
    793 
    794 static void
    795 knote_attach(struct knote *kn, struct filedesc *fdp)
    796 {
    797 	struct klist *list;
    798 	int size;
    799 
    800 	if (! kn->kn_fop->f_isfd) {
    801 		if (fdp->fd_knhashmask == 0)
    802 			fdp->fd_knhash = hashinit(KN_HASHSIZE, M_TEMP,
    803 			    &fdp->fd_knhashmask);
    804 		list = &fdp->fd_knhash[KN_HASH(kn->kn_id, fdp->fd_knhashmask)];
    805 		goto done;
    806 	}
    807 
    808 	if (fdp->fd_knlistsize <= kn->kn_id) {
    809 		size = fdp->fd_knlistsize;
    810 		while (size <= kn->kn_id)
    811 			size += KQEXTENT;
    812 		MALLOC(list, struct klist *,
    813 		    size * sizeof(struct klist *), M_TEMP, M_WAITOK);
    814 		bcopy((caddr_t)fdp->fd_knlist, (caddr_t)list,
    815 		    fdp->fd_knlistsize * sizeof(struct klist *));
    816 		bzero((caddr_t)list +
    817 		    fdp->fd_knlistsize * sizeof(struct klist *),
    818 		    (size - fdp->fd_knlistsize) * sizeof(struct klist *));
    819 		if (fdp->fd_knlist != NULL)
    820 			FREE(fdp->fd_knlist, M_TEMP);
    821 		fdp->fd_knlistsize = size;
    822 		fdp->fd_knlist = list;
    823 	}
    824 	list = &fdp->fd_knlist[kn->kn_id];
    825 done:
    826 	SLIST_INSERT_HEAD(list, kn, kn_link);
    827 	kn->kn_status = 0;
    828 }
    829 
    830 /*
    831  * should be called at spl == 0, since we don't want to hold spl
    832  * while calling fdrop and free.
    833  */
    834 static void
    835 knote_drop(struct knote *kn, struct proc *p)
    836 {
    837         struct filedesc *fdp = p->p_fd;
    838 	struct klist *list;
    839 
    840 	if (kn->kn_fop->f_isfd)
    841 		list = &fdp->fd_knlist[kn->kn_id];
    842 	else
    843 		list = &fdp->fd_knhash[KN_HASH(kn->kn_id, fdp->fd_knhashmask)];
    844 
    845 	SLIST_REMOVE(list, kn, knote, kn_link);
    846 	if (kn->kn_status & KN_QUEUED)
    847 		knote_dequeue(kn);
    848 	if (kn->kn_fop->f_isfd)
    849 		fdrop(kn->kn_fp, p);
    850 	knote_free(kn);
    851 }
    852 
    853 
    854 static void
    855 knote_enqueue(struct knote *kn)
    856 {
    857 	struct kqueue *kq = kn->kn_kq;
    858 	int s = splhigh();
    859 
    860 	KASSERT((kn->kn_status & KN_QUEUED) == 0, ("knote already queued"));
    861 
    862 	TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe);
    863 	kn->kn_status |= KN_QUEUED;
    864 	kq->kq_count++;
    865 	splx(s);
    866 	kqueue_wakeup(kq);
    867 }
    868 
    869 static void
    870 knote_dequeue(struct knote *kn)
    871 {
    872 	struct kqueue *kq = kn->kn_kq;
    873 	int s = splhigh();
    874 
    875 	KASSERT(kn->kn_status & KN_QUEUED, ("knote not queued"));
    876 
    877 	TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe);
    878 	kn->kn_status &= ~KN_QUEUED;
    879 	kq->kq_count--;
    880 	splx(s);
    881 }
    882 
    883 static void
    884 knote_init(void)
    885 {
    886 	knote_zone = zinit("KNOTE", sizeof(struct knote), 0, 0, 1);
    887 }
    888 SYSINIT(knote, SI_SUB_PSEUDO, SI_ORDER_ANY, knote_init, NULL)
    889 
    890 static struct knote *
    891 knote_alloc(void)
    892 {
    893 	return ((struct knote *)zalloc(knote_zone));
    894 }
    895 
    896 static void
    897 knote_free(struct knote *kn)
    898 {
    899 	zfree(knote_zone, kn);
    900 }
    901