kern_event.c revision 1.1.1.1.2.1 1 /* $NetBSD: kern_event.c,v 1.1.1.1.2.1 2001/07/10 13:42:29 lukem Exp $ */
2 /*-
3 * Copyright (c) 1999,2000,2001 Jonathan Lemon <jlemon (at) FreeBSD.org>
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 *
27 * $FreeBSD: src/sys/kern/kern_event.c,v 1.27 2001/07/05 17:10:44 rwatson Exp $
28 */
29
30 #include <sys/param.h>
31 #include <sys/systm.h>
32 #include <sys/kernel.h>
33 #include <sys/proc.h>
34 #include <sys/malloc.h>
35 #include <sys/unistd.h>
36 #include <sys/file.h>
37 #include <sys/fcntl.h>
38 #include <sys/select.h>
39 #include <sys/queue.h>
40 #include <sys/event.h>
41 #include <sys/eventvar.h>
42 #include <sys/poll.h>
43 #include <sys/pool.h>
44 #include <sys/protosw.h>
45 #include <sys/socket.h>
46 #include <sys/socketvar.h>
47 #include <sys/stat.h>
48 #include <sys/uio.h>
49 #include <sys/mount.h>
50 #include <sys/filedesc.h>
51 #include <sys/syscallargs.h>
52
53 static int kqueue_scan(struct file *fp, int maxevents,
54 struct kevent *ulistp, const struct timespec *timeout,
55 struct proc *p, register_t *retval);
56 static void kqueue_wakeup(struct kqueue *kq);
57
58 static int kqueue_read(struct file *fp, off_t *offset, struct uio *uio,
59 struct ucred *cred, int flags);
60 static int kqueue_write(struct file *fp, off_t *offset, struct uio *uio,
61 struct ucred *cred, int flags);
62 static int kqueue_ioctl(struct file *fp, u_long com, caddr_t data,
63 struct proc *p);
64 static int kqueue_fcntl(struct file *fp, u_int com, caddr_t data,
65 struct proc *p);
66 static int kqueue_poll(struct file *fp, int events, struct proc *p);
67 static int kqueue_kqfilter(struct file *fp, struct knote *kn);
68 static int kqueue_stat(struct file *fp, struct stat *sp, struct proc *p);
69 static int kqueue_close(struct file *fp, struct proc *p);
70
71 static struct fileops kqueueops = {
72 kqueue_read, kqueue_write, kqueue_ioctl, kqueue_fcntl, kqueue_poll,
73 kqueue_stat, kqueue_close, kqueue_kqfilter
74 };
75
76 static void knote_attach(struct knote *kn, struct filedesc *fdp);
77 static void knote_drop(struct knote *kn, struct proc *p);
78 static void knote_enqueue(struct knote *kn);
79 static void knote_dequeue(struct knote *kn);
80 static void knote_init(void);
81 static struct knote *knote_alloc(void);
82 static void knote_free(struct knote *kn);
83
84 static void filt_kqdetach(struct knote *kn);
85 static int filt_kqueue(struct knote *kn, long hint);
86 static int filt_procattach(struct knote *kn);
87 static void filt_procdetach(struct knote *kn);
88 static int filt_proc(struct knote *kn, long hint);
89 static int filt_fileattach(struct knote *kn);
90
91 static struct filterops kqread_filtops =
92 { 1, NULL, filt_kqdetach, filt_kqueue };
93 static struct filterops proc_filtops =
94 { 0, filt_procattach, filt_procdetach, filt_proc };
95 static struct filterops file_filtops =
96 { 1, filt_fileattach, NULL, NULL };
97
98 struct pool knote_pool;
99
100 #define KNOTE_ACTIVATE(kn) \
101 do { \
102 kn->kn_status |= KN_ACTIVE; \
103 if ((kn->kn_status & (KN_QUEUED | KN_DISABLED)) == 0) \
104 knote_enqueue(kn); \
105 } while(0)
106
107 #define KN_HASHSIZE 64 /* XXX should be tunable */
108 #define KN_HASH(val, mask) (((val) ^ (val >> 8)) & (mask))
109
110 extern struct filterops sig_filtops;
111
112 /*
113 * Table for for all system-defined filters.
114 * These should be listed in the numeric order of the EVFILT_* defines.
115 * If filtops is NULL, the filter isn't implemented in NetBSD.
116 * End of list is when name is NULL.
117 */
118 struct kfilter {
119 char *name; /* name of filter */
120 uint32_t filter; /* id of filter */
121 struct filterops *filtops; /* operations for filter */
122 };
123
124 /* System defined filters */
125 static struct kfilter sys_kfilters[] = {
126 { "EVFILT_READ", EVFILT_READ, &file_filtops },
127 { "EVFILT_WRITE", EVFILT_WRITE, &file_filtops },
128 { "EVFILT_AIO", EVFILT_AIO, NULL },
129 { "EVFILT_VNODE", EVFILT_VNODE, &file_filtops },
130 { "EVFILT_PROC", EVFILT_PROC, &proc_filtops },
131 { "EVFILT_SIGNAL", EVFILT_SIGNAL, &sig_filtops },
132 { NULL, 0, NULL }, /* end of list */
133 };
134
135 /* User defined kfilters */
136 static struct kfilter *user_kfilters; /* array */
137 static int user_kfilterc; /* current offset */
138 static int user_kfiltermaxc; /* max size so far */
139
140 static struct kfilter *kfilter_byname(const char *);
141 static struct kfilter *kfilter_byfilter(uint32_t);
142
143 /*
144 * Find kfilter entry by name, or NULL if not found.
145 */
146 static struct kfilter *
147 kfilter_byname(const char *name)
148 {
149 struct kfilter *kfilter;
150 int i;
151
152 kfilter = sys_kfilters; /* first look in system kfilters */
153 while (kfilter != NULL) {
154 for (i = 0; kfilter[i].name != NULL; i++) {
155 /* search for matching name */
156 if (kfilter[i].name[0] != '\0' &&
157 (strcmp(name, kfilter[i].name) == 0))
158 return (&kfilter[i]);
159 }
160 /* swap to user kfilters */
161 if (kfilter == sys_kfilters)
162 kfilter = user_kfilters;
163 else
164 kfilter = NULL;
165 }
166 return (NULL);
167 }
168
169 /*
170 * Find kfilter entry by filter id, or NULL if not found.
171 * Assumes entries are indexed in filter id order, for speed.
172 */
173 static struct kfilter *
174 kfilter_byfilter(uint32_t filter)
175 {
176 struct kfilter *kfilter;
177
178 if (filter < EVFILT_SYSCOUNT) /* it's a system filter */
179 kfilter = &sys_kfilters[filter];
180 else if (user_kfilters != NULL &&
181 filter < EVFILT_SYSCOUNT + user_kfilterc)
182 /* it's a user filter */
183 kfilter = &user_kfilters[filter - EVFILT_SYSCOUNT];
184 else
185 return (NULL); /* out of range */
186 KASSERT(kfilter->filter == filter); /* sanity check! */
187 return (kfilter);
188 }
189
190 /*
191 * Register a new kfilter. Stores the entry in user_kfilters.
192 * Returns 0 if operation succeeded, or an appropriate errno(2) otherwise.
193 * If retfilter != NULL, the new filterid is returned in it.
194 */
195 int
196 kfilter_register(const char *name, struct filterops *filtops, int *retfilter)
197 {
198 struct kfilter *kfilter;
199 int len;
200
201 if (name == NULL || name[0] == '\0' || filtops == NULL)
202 return (EINVAL); /* invalid args */
203 kfilter = kfilter_byname(name);
204 if (kfilter != NULL) /* already exists */
205 return (EEXIST);
206 if (user_kfilterc > 0xffffffff - EVFILT_SYSCOUNT)
207 return (EINVAL); /* too many */
208
209 /* need to grow user_kfilters */
210 if (user_kfilterc + 1 > user_kfiltermaxc) {
211 /*
212 * grow in KFILTER_EXTENT chunks. use
213 * malloc(9), because we want to
214 * traverse user_kfilters as an array.
215 */
216 user_kfiltermaxc += KFILTER_EXTENT;
217 kfilter = malloc(user_kfiltermaxc * sizeof(struct filter *),
218 M_KEVENT, M_WAITOK);
219 /* copy existing user_kfilters */
220 if (user_kfilters != NULL)
221 memcpy((caddr_t)kfilter, (caddr_t)user_kfilters,
222 user_kfilterc * sizeof(struct kfilter *));
223 /* zero new sections */
224 memset((caddr_t)kfilter +
225 user_kfilterc * sizeof(struct kfilter *), 0,
226 (user_kfiltermaxc - user_kfilterc) *
227 sizeof(struct kfilter *));
228 /* switch to new kfilter */
229 if (user_kfilters != NULL)
230 FREE(user_kfilters, M_KEVENT);
231 user_kfilters = kfilter;
232 }
233 len = strlen(name) + 1; /* copy name */
234 user_kfilters[user_kfilterc].name = (char *)
235 malloc(len, M_KEVENT, M_WAITOK);
236 memcpy(user_kfilters[user_kfilterc].name, name, len);
237 user_kfilters[user_kfilterc].filter = user_kfilterc + EVFILT_SYSCOUNT;
238 len = sizeof(struct filterops); /* copy filtops */
239 user_kfilters[user_kfilterc].filtops = (struct filterops *)
240 malloc(len, M_KEVENT, M_WAITOK);
241 memcpy(user_kfilters[user_kfilterc].filtops, filtops, len);
242 if (retfilter != NULL)
243 *retfilter = user_kfilters[user_kfilterc].filter;
244 user_kfilterc++; /* finally, increment count */
245 return (0);
246 }
247
248 /*
249 * Unregister a kfilter previously registered with kfilter_register.
250 * This retains the filter id, but clears the name and frees filtops (filter
251 * operations), so that the number isn't reused during a boot.
252 * Returns 0 if operation succeeded, or an appropriate errno(2) otherwise.
253 */
254 int
255 kfilter_unregister(const char *name)
256 {
257 struct kfilter *kfilter;
258
259 if (name == NULL || name[0] == '\0')
260 return (EINVAL); /* invalid name */
261 kfilter = kfilter_byname(name);
262 if (kfilter == NULL) /* not found */
263 return (ENOENT);
264 if (kfilter->filter < EVFILT_SYSCOUNT)
265 return (EINVAL); /* can't detach system filters */
266
267 if (kfilter->name[0] != '\0') {
268 free(kfilter->name, M_KEVENT);
269 kfilter->name = ""; /* mark as `not implemented' */
270 }
271 if (kfilter->filtops != NULL) {
272 free(kfilter->filtops, M_KEVENT);
273 kfilter->filtops = NULL; /* mark as `not implemented' */
274 }
275 return (0);
276 }
277
278
279 /*
280 * Filter attach method for EVFILT_READ and EVFILT_WRITE on normal file
281 * descriptors. Calls struct fileops kqfilter method for given file descriptor.
282 */
283 static int
284 filt_fileattach(struct knote *kn)
285 {
286 struct file *fp;
287
288 fp = kn->kn_fp;
289 return ((*fp->f_ops->fo_kqfilter)(fp, kn));
290 }
291
292 /*
293 * Filter detach method for EVFILT_READ on kqueue descriptor.
294 */
295 static void
296 filt_kqdetach(struct knote *kn)
297 {
298 struct kqueue *kq;
299
300 kq = (struct kqueue *)kn->kn_fp->f_data;
301 SLIST_REMOVE(&kq->kq_sel.si_klist, kn, knote, kn_selnext);
302 }
303
304 /*
305 * Filter event method for EVFILT_READ on kqueue descriptor.
306 */
307 /*ARGSUSED*/
308 static int
309 filt_kqueue(struct knote *kn, long hint)
310 {
311 struct kqueue *kq;
312
313 kq = (struct kqueue *)kn->kn_fp->f_data;
314 kn->kn_data = kq->kq_count;
315 return (kn->kn_data > 0);
316 }
317
318 /*
319 * Filter attach method for EVFILT_PROC.
320 */
321 static int
322 filt_procattach(struct knote *kn)
323 {
324 struct proc *p;
325
326 p = pfind(kn->kn_id);
327 if (p == NULL)
328 return (ESRCH);
329
330 kn->kn_ptr.p_proc = p;
331 kn->kn_flags |= EV_CLEAR; /* automatically set */
332
333 /*
334 * internal flag indicating registration done by kernel
335 */
336 if (kn->kn_flags & EV_FLAG1) {
337 kn->kn_data = kn->kn_sdata; /* ppid */
338 kn->kn_fflags = NOTE_CHILD;
339 kn->kn_flags &= ~EV_FLAG1;
340 }
341
342 /* XXXLUKEM */
343 /* XXX lock the proc here while adding to the list? */
344 SLIST_INSERT_HEAD(&p->p_klist, kn, kn_selnext);
345
346 return (0);
347 }
348
349 /*
350 * Filter detach method for EVFILT_PROC.
351 *
352 * The knote may be attached to a different process, which may exit,
353 * leaving nothing for the knote to be attached to. So when the process
354 * exits, the knote is marked as DETACHED and also flagged as ONESHOT so
355 * it will be deleted when read out. However, as part of the knote deletion,
356 * this routine is called, so a check is needed to avoid actually performing
357 * a detach, because the original process does not exist any more.
358 */
359 static void
360 filt_procdetach(struct knote *kn)
361 {
362 struct proc *p;
363
364 p = kn->kn_ptr.p_proc;
365 if (kn->kn_status & KN_DETACHED)
366 return;
367
368 /* XXXLUKEM */
369 /* XXX locking? this might modify another process. */
370 SLIST_REMOVE(&p->p_klist, kn, knote, kn_selnext);
371 }
372
373 /*
374 * Filter event method for EVFILT_PROC.
375 */
376 static int
377 filt_proc(struct knote *kn, long hint)
378 {
379 u_int event;
380
381 /*
382 * mask off extra data
383 */
384 event = (u_int)hint & NOTE_PCTRLMASK;
385
386 /*
387 * if the user is interested in this event, record it.
388 */
389 if (kn->kn_sfflags & event)
390 kn->kn_fflags |= event;
391
392 /*
393 * process is gone, so flag the event as finished.
394 */
395 if (event == NOTE_EXIT) {
396 kn->kn_status |= KN_DETACHED;
397 kn->kn_flags |= (EV_EOF | EV_ONESHOT);
398 return (1);
399 }
400
401 /*
402 * process forked, and user wants to track the new process,
403 * so attach a new knote to it, and immediately report an
404 * event with the parent's pid.
405 */
406 if ((event == NOTE_FORK) && (kn->kn_sfflags & NOTE_TRACK)) {
407 struct kevent kev;
408 int error;
409
410 /*
411 * register knote with new process.
412 */
413 kev.ident = hint & NOTE_PDATAMASK; /* pid */
414 kev.filter = kn->kn_filter;
415 kev.flags = kn->kn_flags | EV_ADD | EV_ENABLE | EV_FLAG1;
416 kev.fflags = kn->kn_sfflags;
417 kev.data = kn->kn_id; /* parent */
418 kev.udata = kn->kn_kevent.udata; /* preserve udata */
419 error = kqueue_register(kn->kn_kq, &kev, NULL);
420 if (error)
421 kn->kn_fflags |= NOTE_TRACKERR;
422 }
423
424 return (kn->kn_fflags != 0);
425 }
426
427 /*
428 * kqueue(2) system call.
429 */
430 int
431 sys_kqueue(struct proc *p, void *v, register_t *retval)
432 {
433 struct filedesc *fdp;
434 struct kqueue *kq;
435 struct file *fp;
436 int fd, error;
437
438 fdp = p->p_fd;
439 error = falloc(p, &fp, &fd); /* setup a new file descriptor */
440 if (error)
441 return (error);
442 fp->f_flag = FREAD | FWRITE;
443 fp->f_type = DTYPE_KQUEUE;
444 fp->f_ops = &kqueueops;
445 MALLOC(kq, struct kqueue *, sizeof(struct kqueue), M_KEVENT, M_WAITOK);
446 memset((char *)kq, 0, (u_long)sizeof(struct kqueue));
447 TAILQ_INIT(&kq->kq_head);
448 fp->f_data = (caddr_t)kq; /* store the kqueue with the fp */
449 *retval = fd;
450 if (fdp->fd_knlistsize < 0)
451 fdp->fd_knlistsize = 0; /* this process has a kq */
452 kq->kq_fdp = fdp;
453 FILE_SET_MATURE(fp);
454 FILE_UNUSE(fp, p); /* falloc() does FILE_USE() */
455 return (error);
456 }
457
458 /*
459 * kevent(2) system call.
460 */
461 int
462 sys_kevent(struct proc *p, void *v, register_t *retval)
463 {
464 struct sys_kevent_args /* {
465 syscallarg(int) fd;
466 syscallarg(const struct kevent *) changelist;
467 syscallarg(int) nchanges;
468 syscallarg(struct kevent *) eventlist;
469 syscallarg(int) nevents;
470 syscallarg(const struct timespec *) timeout;
471 } */ *uap = v;
472 struct filedesc *fdp;
473 struct kevent *kevp;
474 struct kqueue *kq;
475 struct file *fp;
476 struct timespec ts;
477 int i, n, nerrors, error;
478
479 fdp = p->p_fd; /* check that we're dealing with a kq */
480 if ((u_int)SCARG(uap, fd) >= fdp->fd_nfiles ||
481 (fp = fdp->fd_ofiles[SCARG(uap, fd)]) == NULL ||
482 (fp->f_type != DTYPE_KQUEUE))
483 return (EBADF);
484
485 FILE_USE(fp);
486
487 if (SCARG(uap, timeout) != NULL) {
488 error = copyin(SCARG(uap, timeout), &ts, sizeof(ts));
489 if (error)
490 goto done;
491 SCARG(uap, timeout) = &ts;
492 }
493
494 kq = (struct kqueue *)fp->f_data;
495 nerrors = 0;
496
497 /* traverse list of events to register */
498 while (SCARG(uap, nchanges) > 0) {
499 /* copyin a maximum of KQ_EVENTS at each pass */
500 n = MIN(SCARG(uap, nchanges), KQ_NEVENTS);
501 error = copyin(SCARG(uap, changelist), kq->kq_kev,
502 n * sizeof(struct kevent));
503 if (error)
504 goto done;
505 for (i = 0; i < n; i++) {
506 kevp = &kq->kq_kev[i];
507 kevp->flags &= ~EV_SYSFLAGS;
508 /* register each knote */
509 error = kqueue_register(kq, kevp, p);
510 if (error) {
511 if (SCARG(uap, nevents) != 0) {
512 kevp->flags = EV_ERROR;
513 kevp->data = error;
514 error = copyout((caddr_t)kevp,
515 (caddr_t)SCARG(uap, eventlist),
516 sizeof(*kevp));
517 if (error)
518 goto done;
519 SCARG(uap, eventlist)++;
520 SCARG(uap, nevents)--;
521 nerrors++;
522 } else {
523 goto done;
524 }
525 }
526 }
527 SCARG(uap, nchanges) -= n; /* update the results */
528 SCARG(uap, changelist) += n;
529 }
530 if (nerrors) {
531 *retval = nerrors;
532 error = 0;
533 goto done;
534 }
535
536 /* actually scan through the events */
537 error = kqueue_scan(fp, SCARG(uap, nevents), SCARG(uap, eventlist),
538 SCARG(uap, timeout), p, retval);
539 done:
540 FILE_UNUSE(fp, p);
541 return (error);
542 }
543
544 /*
545 * Register a given kevent kev onto the kqueue
546 */
547 int
548 kqueue_register(struct kqueue *kq, struct kevent *kev, struct proc *p)
549 {
550 struct filedesc *fdp;
551 struct kfilter *kfilter;
552 struct file *fp;
553 struct knote *kn;
554 int s, error;
555
556 fdp = kq->kq_fdp;
557 fp = NULL;
558 kn = NULL;
559 error = 0;
560 kfilter = kfilter_byfilter(kev->filter);
561 if (kfilter == NULL || kfilter->filtops == NULL)
562 return (EINVAL); /* filter not found nor implemented */
563
564 /* search if knote already exists */
565 if (kfilter->filtops->f_isfd) { /* monitoring a file descriptor */
566 if ((u_int)kev->ident >= fdp->fd_nfiles ||
567 (fp = fdp->fd_ofiles[kev->ident]) == NULL)
568 return (EBADF); /* validate descriptor */
569 FILE_USE(fp);
570
571 if (kev->ident < fdp->fd_knlistsize) {
572 SLIST_FOREACH(kn, &fdp->fd_knlist[kev->ident], kn_link)
573 if (kq == kn->kn_kq &&
574 kev->filter == kn->kn_filter)
575 break;
576 }
577 } else {
578 /*
579 * not monitoring a file descriptor, so
580 * lookup knotes in internal hash table
581 */
582 if (fdp->fd_knhashmask != 0) {
583 struct klist *list;
584
585 list = &fdp->fd_knhash[
586 KN_HASH((u_long)kev->ident, fdp->fd_knhashmask)];
587 SLIST_FOREACH(kn, list, kn_link)
588 if (kev->ident == kn->kn_id &&
589 kq == kn->kn_kq &&
590 kev->filter == kn->kn_filter)
591 break;
592 }
593 }
594
595 if (kn == NULL && ((kev->flags & EV_ADD) == 0)) {
596 error = ENOENT; /* filter not found */
597 goto done;
598 }
599
600 /*
601 * kn now contains the matching knote, or NULL if no match
602 */
603 if (kev->flags & EV_ADD) { /* add knote */
604
605 if (kn == NULL) { /* create new knote */
606 kn = knote_alloc();
607 if (kn == NULL) {
608 error = ENOMEM;
609 goto done;
610 }
611 kn->kn_fp = fp;
612 kn->kn_kq = kq;
613 kn->kn_fop = kfilter->filtops;
614
615 /*
616 * apply reference count to knote structure, and
617 * do not release it at the end of this routine.
618 */
619 fp = NULL;
620
621 kn->kn_sfflags = kev->fflags;
622 kn->kn_sdata = kev->data;
623 kev->fflags = 0;
624 kev->data = 0;
625 kn->kn_kevent = *kev;
626
627 knote_attach(kn, fdp);
628 if ((error = kfilter->filtops->f_attach(kn)) != 0) {
629 knote_drop(kn, p);
630 goto done;
631 }
632 } else { /* modify existing knote */
633 /*
634 * The user may change some filter values after the
635 * initial EV_ADD, but doing so will not reset any
636 * filter which have already been triggered.
637 */
638 kn->kn_sfflags = kev->fflags;
639 kn->kn_sdata = kev->data;
640 kn->kn_kevent.udata = kev->udata;
641 }
642
643 s = splhigh();
644 if (kn->kn_fop->f_event(kn, 0))
645 KNOTE_ACTIVATE(kn);
646 splx(s);
647
648 } else if (kev->flags & EV_DELETE) { /* delete knote */
649 kn->kn_fop->f_detach(kn);
650 knote_drop(kn, p);
651 goto done;
652 }
653
654 /* disable knote */
655 if ((kev->flags & EV_DISABLE) &&
656 ((kn->kn_status & KN_DISABLED) == 0)) {
657 s = splhigh();
658 kn->kn_status |= KN_DISABLED;
659 splx(s);
660 }
661
662 /* enable knote */
663 if ((kev->flags & EV_ENABLE) && (kn->kn_status & KN_DISABLED)) {
664 s = splhigh();
665 kn->kn_status &= ~KN_DISABLED;
666 if ((kn->kn_status & KN_ACTIVE) &&
667 ((kn->kn_status & KN_QUEUED) == 0))
668 knote_enqueue(kn);
669 splx(s);
670 }
671
672 done:
673 if (fp != NULL)
674 FILE_UNUSE(fp, p);
675 return (error);
676 }
677
678 /*
679 * Scan through the list of events on fp (for a maximum of maxevents),
680 * returning the results in to ulistp. Timeout is determined by tsp; if
681 * NULL, wait indefinitely, if 0 valued, perform a poll, otherwise wait
682 * as appropriate.
683 */
684 static int
685 kqueue_scan(struct file *fp, int maxevents, struct kevent *ulistp,
686 const struct timespec *tsp, struct proc *p, register_t *retval)
687 {
688 struct kqueue *kq;
689 struct kevent *kevp;
690 struct timeval atv;
691 struct knote *kn, marker;
692 int s, count, timeout, nkev, error;
693
694 kq = (struct kqueue *)fp->f_data;
695 count = maxevents;
696 nkev = error = 0;
697 if (count == 0)
698 goto done;
699
700 if (tsp != NULL) { /* timeout supplied */
701 TIMESPEC_TO_TIMEVAL(&atv, tsp);
702 if (itimerfix(&atv)) {
703 error = EINVAL;
704 goto done;
705 }
706 s = splclock();
707 timeradd(&atv, &time, &atv); /* calc. time to wait until */
708 splx(s);
709 if (tsp->tv_sec == 0 && tsp->tv_nsec == 0)
710 timeout = -1; /* perform a poll */
711 else
712 timeout = hzto(&atv); /* calculate hz till timeout */
713 } else {
714 atv.tv_sec = 0; /* no timeout, wait forever */
715 atv.tv_usec = 0;
716 timeout = 0;
717 }
718 goto start;
719
720 retry:
721 if (atv.tv_sec || atv.tv_usec) { /* timeout requested */
722 s = splclock();
723 if (timercmp(&time, &atv, >=)) {
724 splx(s);
725 goto done; /* timeout reached */
726 }
727 splx(s);
728 timeout = hzto(&atv); /* recalc. timeout remaining */
729 }
730
731 start:
732 kevp = kq->kq_kev;
733 s = splhigh();
734 if (kq->kq_count == 0) {
735 if (timeout < 0) {
736 error = EWOULDBLOCK;
737 } else {
738 kq->kq_state |= KQ_SLEEP;
739 error = tsleep(kq, PSOCK | PCATCH, "kqread", timeout);
740 }
741 splx(s);
742 if (error == 0)
743 goto retry;
744 /* don't restart after signals... */
745 if (error == ERESTART)
746 error = EINTR;
747 else if (error == EWOULDBLOCK)
748 error = 0;
749 goto done;
750 }
751
752 TAILQ_INSERT_TAIL(&kq->kq_head, &marker, kn_tqe);
753 /* mark end of knote list */
754 while (count) { /* while user wants data ... */
755 kn = TAILQ_FIRST(&kq->kq_head); /* get next knote */
756 TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe);
757 if (kn == &marker) { /* if it's our marker, stop */
758 splx(s);
759 if (count == maxevents)
760 goto retry;
761 goto done;
762 }
763 if (kn->kn_status & KN_DISABLED) {
764 /* don't want disabled events */
765 kn->kn_status &= ~KN_QUEUED;
766 kq->kq_count--;
767 continue;
768 }
769 if ((kn->kn_flags & EV_ONESHOT) == 0 &&
770 kn->kn_fop->f_event(kn, 0) == 0) {
771 /*
772 * non-ONESHOT event that hasn't
773 * triggered again, so de-queue.
774 */
775 kn->kn_status &= ~(KN_QUEUED | KN_ACTIVE);
776 kq->kq_count--;
777 continue;
778 }
779 *kevp = kn->kn_kevent;
780 kevp++;
781 nkev++;
782 if (kn->kn_flags & EV_ONESHOT) {
783 /* delete ONESHOT events after retrieval */
784 kn->kn_status &= ~KN_QUEUED;
785 kq->kq_count--;
786 splx(s);
787 kn->kn_fop->f_detach(kn);
788 knote_drop(kn, p);
789 s = splhigh();
790 } else if (kn->kn_flags & EV_CLEAR) {
791 /* clear state after retrieval */
792 kn->kn_data = 0;
793 kn->kn_fflags = 0;
794 kn->kn_status &= ~(KN_QUEUED | KN_ACTIVE);
795 kq->kq_count--;
796 } else {
797 /* add event back on list */
798 TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe);
799 }
800 count--;
801 if (nkev == KQ_NEVENTS) {
802 /* do copyouts in KQ_NEVENTS chunks */
803 splx(s);
804 error = copyout((caddr_t)&kq->kq_kev, (caddr_t)ulistp,
805 sizeof(struct kevent) * nkev);
806 ulistp += nkev;
807 nkev = 0;
808 kevp = kq->kq_kev;
809 s = splhigh();
810 if (error)
811 break;
812 }
813 }
814 /* remove marker */
815 TAILQ_REMOVE(&kq->kq_head, &marker, kn_tqe);
816 splx(s);
817 done:
818 if (nkev != 0) /* copyout remaining events */
819 error = copyout((caddr_t)&kq->kq_kev, (caddr_t)ulistp,
820 sizeof(struct kevent) * nkev);
821 *retval = maxevents - count;
822 return (error);
823 }
824
825 /*
826 * struct fileops read method for a kqueue descriptor.
827 * Not implemented.
828 * XXX: This could be expanded to call kqueue_scan, if desired.
829 */
830 /*ARGSUSED*/
831 static int
832 kqueue_read(struct file *fp, off_t *offset, struct uio *uio,
833 struct ucred *cred, int flags)
834 {
835
836 return (ENXIO);
837 }
838
839 /*
840 * struct fileops write method for a kqueue descriptor.
841 * Not implemented.
842 */
843 /*ARGSUSED*/
844 static int
845 kqueue_write(struct file *fp, off_t *offset, struct uio *uio,
846 struct ucred *cred, int flags)
847 {
848
849 return (ENXIO);
850 }
851
852 /*
853 * struct fileops ioctl method for a kqueue descriptor.
854 *
855 * Two ioctls are currently supported. They both use struct kfilter_mapping:
856 * KFILTER_BYNAME find name for filter, and return result in
857 * name, which is of size len.
858 * KFILTER_BYFILTER find filter for name. len is ignored.
859 */
860 /*ARGSUSED*/
861 static int
862 kqueue_ioctl(struct file *fp, u_long com, caddr_t data, struct proc *p)
863 {
864 struct kfilter_mapping *km;
865 struct kfilter *kfilter;
866 char *name;
867 int error;
868
869 km = (struct kfilter_mapping *)data;
870 error = 0;
871
872 switch (com) {
873 case KFILTER_BYFILTER: /* convert filter -> name */
874 kfilter = kfilter_byfilter(km->filter);
875 if (kfilter != NULL)
876 error = copyoutstr(kfilter->name, km->name, km->len,
877 NULL);
878 else
879 error = ENOENT;
880 break;
881
882 case KFILTER_BYNAME: /* convert name -> filter */
883 MALLOC(name, char *, KFILTER_MAXNAME, M_KEVENT, M_WAITOK);
884 error = copyinstr(km->name, name, KFILTER_MAXNAME, NULL);
885 if (error) {
886 free(name, M_KEVENT);
887 break;
888 }
889 kfilter = kfilter_byname(name);
890 if (kfilter != NULL)
891 km->filter = kfilter->filter;
892 else
893 error = ENOENT;
894 free(name, M_KEVENT);
895 break;
896
897 #if 1 /* XXXLUKEM - test register & unregister */
898 case KFILTER_REGISTER:
899 case KFILTER_UNREGISTER:
900 MALLOC(name, char *, KFILTER_MAXNAME, M_KEVENT, M_WAITOK);
901 error = copyinstr(km->name, name, KFILTER_MAXNAME, NULL);
902 if (error) {
903 free(name, M_KEVENT);
904 break;
905 }
906 if (com == KFILTER_REGISTER) {
907 kfilter = kfilter_byfilter(km->filter);
908 if (kfilter != NULL) {
909 error = kfilter_register(name,
910 kfilter->filtops, &km->filter);
911 } else
912 error = ENOENT;
913 } else
914 error = kfilter_unregister(name);
915 free(name, M_KEVENT);
916 break;
917 #endif
918
919 default:
920 error = ENOTTY;
921
922 }
923 return (error);
924 }
925
926 /*
927 * struct fileops fcntl method for a kqueue descriptor.
928 * Not implemented.
929 */
930 /*ARGSUSED*/
931 static int
932 kqueue_fcntl(struct file *fp, u_int com, caddr_t data, struct proc *p)
933 {
934
935 return (ENOTTY);
936 }
937
938 /*
939 * struct fileops poll method for a kqueue descriptor.
940 * Determine if kqueue has events pending.
941 */
942 /*ARGSUSED*/
943 static int
944 kqueue_poll(struct file *fp, int events, struct proc *p)
945 {
946 struct kqueue *kq;
947 int revents, s;
948
949 kq = (struct kqueue *)fp->f_data;
950 revents = 0;
951 s = splnet(); /* XXXLUKEM: is this correct? */
952 if (events & (POLLIN | POLLRDNORM)) {
953 if (kq->kq_count) {
954 revents |= events & (POLLIN | POLLRDNORM);
955 } else {
956 /* XXXLUKEM: splsched() for next? */
957 selrecord(p, &kq->kq_sel);
958 kq->kq_state |= KQ_SEL;
959 }
960 }
961 splx(s);
962 return (revents);
963 }
964
965 /*
966 * struct fileops stat method for a kqueue descriptor.
967 * Returns dummy info, with st_size being number of events pending.
968 */
969 /*ARGSUSED*/
970 static int
971 kqueue_stat(struct file *fp, struct stat *st, struct proc *p)
972 {
973 struct kqueue *kq;
974
975 kq = (struct kqueue *)fp->f_data;
976 memset((void *)st, 0, sizeof(*st));
977 st->st_size = kq->kq_count;
978 st->st_blksize = sizeof(struct kevent);
979 st->st_mode = S_IFIFO;
980 return (0);
981 }
982
983 /*
984 * struct fileops close method for a kqueue descriptor.
985 * Cleans up kqueue.
986 */
987 /*ARGSUSED*/
988 static int
989 kqueue_close(struct file *fp, struct proc *p)
990 {
991 struct kqueue *kq;
992 struct filedesc *fdp;
993 struct knote **knp, *kn, *kn0;
994 int i;
995
996 kq = (struct kqueue *)fp->f_data;
997 fdp = p->p_fd;
998 for (i = 0; i < fdp->fd_knlistsize; i++) {
999 knp = &SLIST_FIRST(&fdp->fd_knlist[i]);
1000 kn = *knp;
1001 while (kn != NULL) {
1002 kn0 = SLIST_NEXT(kn, kn_link);
1003 if (kq == kn->kn_kq) {
1004 kn->kn_fop->f_detach(kn);
1005 FILE_UNUSE(kn->kn_fp, p);
1006 knote_free(kn);
1007 *knp = kn0;
1008 } else {
1009 knp = &SLIST_NEXT(kn, kn_link);
1010 }
1011 kn = kn0;
1012 }
1013 }
1014 if (fdp->fd_knhashmask != 0) {
1015 for (i = 0; i < fdp->fd_knhashmask + 1; i++) {
1016 knp = &SLIST_FIRST(&fdp->fd_knhash[i]);
1017 kn = *knp;
1018 while (kn != NULL) {
1019 kn0 = SLIST_NEXT(kn, kn_link);
1020 if (kq == kn->kn_kq) {
1021 kn->kn_fop->f_detach(kn);
1022 /* XXX non-fd release of kn->kn_ptr */
1023 knote_free(kn);
1024 *knp = kn0;
1025 } else {
1026 knp = &SLIST_NEXT(kn, kn_link);
1027 }
1028 kn = kn0;
1029 }
1030 }
1031 }
1032 free(kq, M_KEVENT);
1033 fp->f_data = NULL;
1034
1035 return (0);
1036 }
1037
1038 /*
1039 * wakeup a kqueue
1040 */
1041 static void
1042 kqueue_wakeup(struct kqueue *kq)
1043 {
1044
1045 if (kq->kq_state & KQ_SLEEP) { /* if currently sleeping ... */
1046 kq->kq_state &= ~KQ_SLEEP;
1047 wakeup(kq); /* ... wakeup */
1048 }
1049 if (kq->kq_state & KQ_SEL) { /* if currently polling ... */
1050 kq->kq_state &= ~KQ_SEL;
1051 selwakeup(&kq->kq_sel); /* ... selwakeup */
1052 }
1053 KNOTE(&kq->kq_sel.si_klist, 0);
1054 }
1055
1056 /*
1057 * struct fileops kqfilter method for a kqueue descriptor.
1058 * Event triggered when monitored kqueue changes.
1059 */
1060 /*ARGSUSED*/
1061 static int
1062 kqueue_kqfilter(struct file *fp, struct knote *kn)
1063 {
1064 struct kqueue *kq;
1065
1066 kq = (struct kqueue *)kn->kn_fp->f_data;
1067 if (kn->kn_filter != EVFILT_READ)
1068 return (1);
1069 kn->kn_fop = &kqread_filtops;
1070 SLIST_INSERT_HEAD(&kq->kq_sel.si_klist, kn, kn_selnext);
1071 return (0);
1072 }
1073
1074
1075 /*
1076 * Walk down a list of knotes, activating them if their event has triggered.
1077 */
1078 void
1079 knote(struct klist *list, long hint)
1080 {
1081 struct knote *kn;
1082
1083 SLIST_FOREACH(kn, list, kn_selnext)
1084 if (kn->kn_fop->f_event(kn, hint))
1085 KNOTE_ACTIVATE(kn);
1086 }
1087
1088 /*
1089 * Remove all knotes from a specified klist
1090 */
1091 void
1092 knote_remove(struct proc *p, struct klist *list)
1093 {
1094 struct knote *kn;
1095
1096 while ((kn = SLIST_FIRST(list)) != NULL) {
1097 kn->kn_fop->f_detach(kn);
1098 knote_drop(kn, p);
1099 }
1100 }
1101
1102 /*
1103 * Remove all knotes referencing a specified fd
1104 */
1105 void
1106 knote_fdclose(struct proc *p, int fd)
1107 {
1108 struct filedesc *fdp;
1109 struct klist *list;
1110
1111 fdp = p->p_fd;
1112 list = &fdp->fd_knlist[fd];
1113 knote_remove(p, list);
1114 }
1115
1116 /*
1117 * Attach a new knote to a file descriptor
1118 */
1119 static void
1120 knote_attach(struct knote *kn, struct filedesc *fdp)
1121 {
1122 struct klist *list;
1123 int size;
1124
1125 if (! kn->kn_fop->f_isfd) {
1126 /*
1127 * if knote is not on an fd, store
1128 * on internal hash table.
1129 */
1130 if (fdp->fd_knhashmask == 0)
1131 fdp->fd_knhash = hashinit(KN_HASHSIZE, HASH_LIST,
1132 M_KEVENT, M_WAITOK, &fdp->fd_knhashmask);
1133 list = &fdp->fd_knhash[KN_HASH(kn->kn_id, fdp->fd_knhashmask)];
1134 goto done;
1135 }
1136
1137 /*
1138 * otherwise, knote is on an fd.
1139 * knotes are stored in fd_knlist
1140 * indexed by kn->kn_id.
1141 */
1142 if (fdp->fd_knlistsize <= kn->kn_id) {
1143 /* expand list if too small */
1144 size = fdp->fd_knlistsize;
1145 while (size <= kn->kn_id)
1146 size += KQ_EXTENT; /* grow in KQ_EXTENT chunks */
1147 list = malloc(size * sizeof(struct klist *), M_KEVENT,M_WAITOK);
1148 /* copy existing knlist */
1149 memcpy((caddr_t)list, (caddr_t)fdp->fd_knlist,
1150 fdp->fd_knlistsize * sizeof(struct klist *));
1151 /* zero new sections */
1152 memset((caddr_t)list +
1153 fdp->fd_knlistsize * sizeof(struct klist *), 0,
1154 (size - fdp->fd_knlistsize) * sizeof(struct klist *));
1155 if (fdp->fd_knlist != NULL) /* switch to new knlist */
1156 FREE(fdp->fd_knlist, M_KEVENT);
1157 fdp->fd_knlistsize = size;
1158 fdp->fd_knlist = list;
1159 }
1160 list = &fdp->fd_knlist[kn->kn_id]; /* get list head for this fd */
1161 done:
1162 SLIST_INSERT_HEAD(list, kn, kn_link); /* add new knote */
1163 kn->kn_status = 0;
1164 }
1165
1166 /*
1167 * Drop knote.
1168 * Should be called at spl == 0, since we don't want to hold spl
1169 * while calling FILE_UNUSE and free.
1170 */
1171 static void
1172 knote_drop(struct knote *kn, struct proc *p)
1173 {
1174 struct filedesc *fdp;
1175 struct klist *list;
1176
1177 fdp = p->p_fd;
1178 if (kn->kn_fop->f_isfd)
1179 list = &fdp->fd_knlist[kn->kn_id];
1180 else
1181 list = &fdp->fd_knhash[KN_HASH(kn->kn_id, fdp->fd_knhashmask)];
1182
1183 SLIST_REMOVE(list, kn, knote, kn_link);
1184 if (kn->kn_status & KN_QUEUED)
1185 knote_dequeue(kn);
1186 if (kn->kn_fop->f_isfd)
1187 FILE_UNUSE(kn->kn_fp, p);
1188 knote_free(kn);
1189 }
1190
1191
1192 /*
1193 * Queue new event for knote.
1194 */
1195 static void
1196 knote_enqueue(struct knote *kn)
1197 {
1198 struct kqueue *kq;
1199 int s;
1200
1201 kq = kn->kn_kq;
1202 s = splhigh();
1203 KASSERT((kn->kn_status & KN_QUEUED) == 0);
1204
1205 TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe);
1206 kn->kn_status |= KN_QUEUED;
1207 kq->kq_count++;
1208 splx(s);
1209 kqueue_wakeup(kq);
1210 }
1211
1212 /*
1213 * Dequeue event for knote.
1214 */
1215 static void
1216 knote_dequeue(struct knote *kn)
1217 {
1218 struct kqueue *kq;
1219 int s;
1220
1221 kq = kn->kn_kq;
1222 s = splhigh();
1223 KASSERT(kn->kn_status & KN_QUEUED);
1224
1225 TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe);
1226 kn->kn_status &= ~KN_QUEUED;
1227 kq->kq_count--;
1228 splx(s);
1229 }
1230
1231 /*
1232 * Initialise pool for knotes.
1233 */
1234 static void
1235 knote_init(void)
1236 {
1237 /* XXXLUKEM: how to initialise this? */
1238
1239 pool_init(&knote_pool, sizeof(struct knote), 0, 0, 0, "knotepl",
1240 0, pool_page_alloc_nointr, pool_page_free_nointr, M_KEVENT);
1241 }
1242
1243 /*
1244 * Create a new knote.
1245 */
1246 static struct knote *
1247 knote_alloc(void)
1248 {
1249 static int knote_pool_initialised;
1250
1251 if (! knote_pool_initialised) {
1252 /* initialise pool if necessary */
1253 /* XXXLUKEM: is there a better way? */
1254 knote_init();
1255 knote_pool_initialised++;
1256 }
1257
1258 return (pool_get(&knote_pool, PR_WAITOK));
1259 }
1260
1261 /*
1262 * Free a knote.
1263 */
1264 static void
1265 knote_free(struct knote *kn)
1266 {
1267
1268 pool_put(&knote_pool, kn);
1269 }
1270