sys_generic.c revision 1.75 1 /* $NetBSD: sys_generic.c,v 1.75 2003/06/28 14:21:56 darrenr Exp $ */
2
3 /*
4 * Copyright (c) 1982, 1986, 1989, 1993
5 * The Regents of the University of California. All rights reserved.
6 * (c) UNIX System Laboratories, Inc.
7 * All or some portions of this file are derived from material licensed
8 * to the University of California by American Telephone and Telegraph
9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
10 * the permission of UNIX System Laboratories, Inc.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. All advertising materials mentioning features or use of this software
21 * must display the following acknowledgement:
22 * This product includes software developed by the University of
23 * California, Berkeley and its contributors.
24 * 4. Neither the name of the University nor the names of its contributors
25 * may be used to endorse or promote products derived from this software
26 * without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
29 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
32 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
33 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
34 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
35 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
37 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38 * SUCH DAMAGE.
39 *
40 * @(#)sys_generic.c 8.9 (Berkeley) 2/14/95
41 */
42
43 #include <sys/cdefs.h>
44 __KERNEL_RCSID(0, "$NetBSD: sys_generic.c,v 1.75 2003/06/28 14:21:56 darrenr Exp $");
45
46 #include "opt_ktrace.h"
47
48 #include <sys/param.h>
49 #include <sys/systm.h>
50 #include <sys/filedesc.h>
51 #include <sys/ioctl.h>
52 #include <sys/file.h>
53 #include <sys/proc.h>
54 #include <sys/socketvar.h>
55 #include <sys/signalvar.h>
56 #include <sys/uio.h>
57 #include <sys/kernel.h>
58 #include <sys/stat.h>
59 #include <sys/malloc.h>
60 #include <sys/poll.h>
61 #ifdef KTRACE
62 #include <sys/ktrace.h>
63 #endif
64
65 #include <sys/mount.h>
66 #include <sys/sa.h>
67 #include <sys/syscallargs.h>
68
69 int selscan __P((struct lwp *, fd_mask *, fd_mask *, int, register_t *));
70 int pollscan __P((struct lwp *, struct pollfd *, int, register_t *));
71
72 /*
73 * Read system call.
74 */
75 /* ARGSUSED */
76 int
77 sys_read(struct lwp *l, void *v, register_t *retval)
78 {
79 struct sys_read_args /* {
80 syscallarg(int) fd;
81 syscallarg(void *) buf;
82 syscallarg(size_t) nbyte;
83 } */ *uap = v;
84 int fd;
85 struct file *fp;
86 struct proc *p;
87 struct filedesc *fdp;
88
89 fd = SCARG(uap, fd);
90 p = l->l_proc;
91 fdp = p->p_fd;
92
93 if ((fp = fd_getfile(fdp, fd)) == NULL)
94 return (EBADF);
95
96 if ((fp->f_flag & FREAD) == 0) {
97 simple_unlock(&fp->f_slock);
98 return (EBADF);
99 }
100
101 FILE_USE(fp);
102
103 /* dofileread() will unuse the descriptor for us */
104 return (dofileread(l, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
105 &fp->f_offset, FOF_UPDATE_OFFSET, retval));
106 }
107
108 int
109 dofileread(struct lwp *l, int fd, struct file *fp, void *buf, size_t nbyte,
110 off_t *offset, int flags, register_t *retval)
111 {
112 struct iovec aiov;
113 struct uio auio;
114 struct proc *p;
115 size_t cnt;
116 int error;
117 #ifdef KTRACE
118 struct iovec ktriov;
119 #endif
120 p = l->l_proc;
121 error = 0;
122
123 aiov.iov_base = (caddr_t)buf;
124 aiov.iov_len = nbyte;
125 auio.uio_iov = &aiov;
126 auio.uio_iovcnt = 1;
127 auio.uio_resid = nbyte;
128 auio.uio_rw = UIO_READ;
129 auio.uio_segflg = UIO_USERSPACE;
130 auio.uio_lwp = l;
131
132 /*
133 * Reads return ssize_t because -1 is returned on error. Therefore
134 * we must restrict the length to SSIZE_MAX to avoid garbage return
135 * values.
136 */
137 if (auio.uio_resid > SSIZE_MAX) {
138 error = EINVAL;
139 goto out;
140 }
141
142 #ifdef KTRACE
143 /*
144 * if tracing, save a copy of iovec
145 */
146 if (KTRPOINT(p, KTR_GENIO))
147 ktriov = aiov;
148 #endif
149 cnt = auio.uio_resid;
150 error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred, flags);
151 if (error)
152 if (auio.uio_resid != cnt && (error == ERESTART ||
153 error == EINTR || error == EWOULDBLOCK))
154 error = 0;
155 cnt -= auio.uio_resid;
156 #ifdef KTRACE
157 if (KTRPOINT(p, KTR_GENIO) && error == 0)
158 ktrgenio(l, fd, UIO_READ, &ktriov, cnt, error);
159 #endif
160 *retval = cnt;
161 out:
162 FILE_UNUSE(fp, l);
163 return (error);
164 }
165
166 /*
167 * Scatter read system call.
168 */
169 int
170 sys_readv(struct lwp *l, void *v, register_t *retval)
171 {
172 struct sys_readv_args /* {
173 syscallarg(int) fd;
174 syscallarg(const struct iovec *) iovp;
175 syscallarg(int) iovcnt;
176 } */ *uap = v;
177 struct filedesc *fdp;
178 struct file *fp;
179 struct proc *p;
180 int fd;
181
182 fd = SCARG(uap, fd);
183 p = l->l_proc;
184 fdp = p->p_fd;
185
186 if ((fp = fd_getfile(fdp, fd)) == NULL)
187 return (EBADF);
188
189 if ((fp->f_flag & FREAD) == 0) {
190 simple_unlock(&fp->f_slock);
191 return (EBADF);
192 }
193
194 FILE_USE(fp);
195
196 /* dofilereadv() will unuse the descriptor for us */
197 return (dofilereadv(l, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt),
198 &fp->f_offset, FOF_UPDATE_OFFSET, retval));
199 }
200
201 int
202 dofilereadv(struct lwp *l, int fd, struct file *fp, const struct iovec *iovp,
203 int iovcnt, off_t *offset, int flags, register_t *retval)
204 {
205 struct proc *p;
206 struct uio auio;
207 struct iovec *iov, *needfree, aiov[UIO_SMALLIOV];
208 int i, error;
209 size_t cnt;
210 u_int iovlen;
211 #ifdef KTRACE
212 struct iovec *ktriov;
213 #endif
214
215 p = l->l_proc;
216 error = 0;
217 #ifdef KTRACE
218 ktriov = NULL;
219 #endif
220 /* note: can't use iovlen until iovcnt is validated */
221 iovlen = iovcnt * sizeof(struct iovec);
222 if ((u_int)iovcnt > UIO_SMALLIOV) {
223 if ((u_int)iovcnt > IOV_MAX) {
224 error = EINVAL;
225 goto out;
226 }
227 iov = malloc(iovlen, M_IOV, M_WAITOK);
228 needfree = iov;
229 } else if ((u_int)iovcnt > 0) {
230 iov = aiov;
231 needfree = NULL;
232 } else {
233 error = EINVAL;
234 goto out;
235 }
236
237 auio.uio_iov = iov;
238 auio.uio_iovcnt = iovcnt;
239 auio.uio_rw = UIO_READ;
240 auio.uio_segflg = UIO_USERSPACE;
241 auio.uio_lwp = l;
242 error = copyin(iovp, iov, iovlen);
243 if (error)
244 goto done;
245 auio.uio_resid = 0;
246 for (i = 0; i < iovcnt; i++) {
247 auio.uio_resid += iov->iov_len;
248 /*
249 * Reads return ssize_t because -1 is returned on error.
250 * Therefore we must restrict the length to SSIZE_MAX to
251 * avoid garbage return values.
252 */
253 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) {
254 error = EINVAL;
255 goto done;
256 }
257 iov++;
258 }
259 #ifdef KTRACE
260 /*
261 * if tracing, save a copy of iovec
262 */
263 if (KTRPOINT(p, KTR_GENIO)) {
264 ktriov = malloc(iovlen, M_TEMP, M_WAITOK);
265 memcpy((caddr_t)ktriov, (caddr_t)auio.uio_iov, iovlen);
266 }
267 #endif
268 cnt = auio.uio_resid;
269 error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred, flags);
270 if (error)
271 if (auio.uio_resid != cnt && (error == ERESTART ||
272 error == EINTR || error == EWOULDBLOCK))
273 error = 0;
274 cnt -= auio.uio_resid;
275 #ifdef KTRACE
276 if (ktriov != NULL) {
277 if (error == 0)
278 ktrgenio(l, fd, UIO_READ, ktriov, cnt, error);
279 free(ktriov, M_TEMP);
280 }
281 #endif
282 *retval = cnt;
283 done:
284 if (needfree)
285 free(needfree, M_IOV);
286 out:
287 FILE_UNUSE(fp, l);
288 return (error);
289 }
290
291 /*
292 * Write system call
293 */
294 int
295 sys_write(struct lwp *l, void *v, register_t *retval)
296 {
297 struct sys_write_args /* {
298 syscallarg(int) fd;
299 syscallarg(const void *) buf;
300 syscallarg(size_t) nbyte;
301 } */ *uap = v;
302 int fd;
303 struct file *fp;
304 struct proc *p;
305 struct filedesc *fdp;
306
307 fd = SCARG(uap, fd);
308 p = l->l_proc;
309 fdp = p->p_fd;
310
311 if ((fp = fd_getfile(fdp, fd)) == NULL)
312 return (EBADF);
313
314 if ((fp->f_flag & FWRITE) == 0) {
315 simple_unlock(&fp->f_slock);
316 return (EBADF);
317 }
318
319 FILE_USE(fp);
320
321 /* dofilewrite() will unuse the descriptor for us */
322 return (dofilewrite(l, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
323 &fp->f_offset, FOF_UPDATE_OFFSET, retval));
324 }
325
326 int
327 dofilewrite(struct lwp *l, int fd, struct file *fp, const void *buf,
328 size_t nbyte, off_t *offset, int flags, register_t *retval)
329 {
330 struct iovec aiov;
331 struct uio auio;
332 struct proc *p;
333 size_t cnt;
334 int error;
335 #ifdef KTRACE
336 struct iovec ktriov;
337 #endif
338
339 p = l->l_proc;
340 error = 0;
341 aiov.iov_base = (caddr_t)buf; /* XXX kills const */
342 aiov.iov_len = nbyte;
343 auio.uio_iov = &aiov;
344 auio.uio_iovcnt = 1;
345 auio.uio_resid = nbyte;
346 auio.uio_rw = UIO_WRITE;
347 auio.uio_segflg = UIO_USERSPACE;
348 auio.uio_lwp = l;
349
350 /*
351 * Writes return ssize_t because -1 is returned on error. Therefore
352 * we must restrict the length to SSIZE_MAX to avoid garbage return
353 * values.
354 */
355 if (auio.uio_resid > SSIZE_MAX) {
356 error = EINVAL;
357 goto out;
358 }
359
360 #ifdef KTRACE
361 /*
362 * if tracing, save a copy of iovec
363 */
364 if (KTRPOINT(p, KTR_GENIO))
365 ktriov = aiov;
366 #endif
367 cnt = auio.uio_resid;
368 error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred, flags);
369 if (error) {
370 if (auio.uio_resid != cnt && (error == ERESTART ||
371 error == EINTR || error == EWOULDBLOCK))
372 error = 0;
373 if (error == EPIPE)
374 psignal(p, SIGPIPE);
375 }
376 cnt -= auio.uio_resid;
377 #ifdef KTRACE
378 if (KTRPOINT(p, KTR_GENIO) && error == 0)
379 ktrgenio(l, fd, UIO_WRITE, &ktriov, cnt, error);
380 #endif
381 *retval = cnt;
382 out:
383 FILE_UNUSE(fp, l);
384 return (error);
385 }
386
387 /*
388 * Gather write system call
389 */
390 int
391 sys_writev(struct lwp *l, void *v, register_t *retval)
392 {
393 struct sys_writev_args /* {
394 syscallarg(int) fd;
395 syscallarg(const struct iovec *) iovp;
396 syscallarg(int) iovcnt;
397 } */ *uap = v;
398 int fd;
399 struct file *fp;
400 struct proc *p;
401 struct filedesc *fdp;
402
403 fd = SCARG(uap, fd);
404 p = l->l_proc;
405 fdp = p->p_fd;
406
407 if ((fp = fd_getfile(fdp, fd)) == NULL)
408 return (EBADF);
409
410 if ((fp->f_flag & FWRITE) == 0) {
411 simple_unlock(&fp->f_slock);
412 return (EBADF);
413 }
414
415 FILE_USE(fp);
416
417 /* dofilewritev() will unuse the descriptor for us */
418 return (dofilewritev(l, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt),
419 &fp->f_offset, FOF_UPDATE_OFFSET, retval));
420 }
421
422 int
423 dofilewritev(struct lwp *l, int fd, struct file *fp, const struct iovec *iovp,
424 int iovcnt, off_t *offset, int flags, register_t *retval)
425 {
426 struct proc *p;
427 struct uio auio;
428 struct iovec *iov, *needfree, aiov[UIO_SMALLIOV];
429 int i, error;
430 size_t cnt;
431 u_int iovlen;
432 #ifdef KTRACE
433 struct iovec *ktriov;
434 #endif
435
436 p = l->l_proc;
437 error = 0;
438 #ifdef KTRACE
439 ktriov = NULL;
440 #endif
441 /* note: can't use iovlen until iovcnt is validated */
442 iovlen = iovcnt * sizeof(struct iovec);
443 if ((u_int)iovcnt > UIO_SMALLIOV) {
444 if ((u_int)iovcnt > IOV_MAX) {
445 error = EINVAL;
446 goto out;
447 }
448 iov = malloc(iovlen, M_IOV, M_WAITOK);
449 needfree = iov;
450 } else if ((u_int)iovcnt > 0) {
451 iov = aiov;
452 needfree = NULL;
453 } else {
454 error = EINVAL;
455 goto out;
456 }
457
458 auio.uio_iov = iov;
459 auio.uio_iovcnt = iovcnt;
460 auio.uio_rw = UIO_WRITE;
461 auio.uio_segflg = UIO_USERSPACE;
462 auio.uio_lwp = l;
463 error = copyin(iovp, iov, iovlen);
464 if (error)
465 goto done;
466 auio.uio_resid = 0;
467 for (i = 0; i < iovcnt; i++) {
468 auio.uio_resid += iov->iov_len;
469 /*
470 * Writes return ssize_t because -1 is returned on error.
471 * Therefore we must restrict the length to SSIZE_MAX to
472 * avoid garbage return values.
473 */
474 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) {
475 error = EINVAL;
476 goto done;
477 }
478 iov++;
479 }
480 #ifdef KTRACE
481 /*
482 * if tracing, save a copy of iovec
483 */
484 if (KTRPOINT(p, KTR_GENIO)) {
485 ktriov = malloc(iovlen, M_TEMP, M_WAITOK);
486 memcpy((caddr_t)ktriov, (caddr_t)auio.uio_iov, iovlen);
487 }
488 #endif
489 cnt = auio.uio_resid;
490 error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred, flags);
491 if (error) {
492 if (auio.uio_resid != cnt && (error == ERESTART ||
493 error == EINTR || error == EWOULDBLOCK))
494 error = 0;
495 if (error == EPIPE)
496 psignal(p, SIGPIPE);
497 }
498 cnt -= auio.uio_resid;
499 #ifdef KTRACE
500 if (KTRPOINT(p, KTR_GENIO))
501 if (error == 0) {
502 ktrgenio(l, fd, UIO_WRITE, ktriov, cnt, error);
503 free(ktriov, M_TEMP);
504 }
505 #endif
506 *retval = cnt;
507 done:
508 if (needfree)
509 free(needfree, M_IOV);
510 out:
511 FILE_UNUSE(fp, l);
512 return (error);
513 }
514
515 /*
516 * Ioctl system call
517 */
518 /* ARGSUSED */
519 int
520 sys_ioctl(struct lwp *l, void *v, register_t *retval)
521 {
522 struct sys_ioctl_args /* {
523 syscallarg(int) fd;
524 syscallarg(u_long) com;
525 syscallarg(caddr_t) data;
526 } */ *uap = v;
527 struct file *fp;
528 struct proc *p;
529 struct filedesc *fdp;
530 u_long com;
531 int error;
532 u_int size;
533 caddr_t data, memp;
534 int tmp;
535 #define STK_PARAMS 128
536 u_long stkbuf[STK_PARAMS/sizeof(u_long)];
537
538 error = 0;
539 p = l->l_proc;
540 fdp = p->p_fd;
541
542 if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL)
543 return (EBADF);
544
545 FILE_USE(fp);
546
547 if ((fp->f_flag & (FREAD | FWRITE)) == 0) {
548 error = EBADF;
549 com = 0;
550 goto out;
551 }
552
553 switch (com = SCARG(uap, com)) {
554 case FIONCLEX:
555 fdp->fd_ofileflags[SCARG(uap, fd)] &= ~UF_EXCLOSE;
556 goto out;
557
558 case FIOCLEX:
559 fdp->fd_ofileflags[SCARG(uap, fd)] |= UF_EXCLOSE;
560 goto out;
561 }
562
563 /*
564 * Interpret high order word to find amount of data to be
565 * copied to/from the user's address space.
566 */
567 size = IOCPARM_LEN(com);
568 if (size > IOCPARM_MAX) {
569 error = ENOTTY;
570 goto out;
571 }
572 memp = NULL;
573 if (size > sizeof(stkbuf)) {
574 memp = (caddr_t)malloc((u_long)size, M_IOCTLOPS, M_WAITOK);
575 data = memp;
576 } else
577 data = (caddr_t)stkbuf;
578 if (com&IOC_IN) {
579 if (size) {
580 error = copyin(SCARG(uap, data), data, size);
581 if (error) {
582 if (memp)
583 free(memp, M_IOCTLOPS);
584 goto out;
585 }
586 #ifdef KTRACE
587 if (KTRPOINT(p, KTR_GENIO)) {
588 struct iovec iov;
589 iov.iov_base = SCARG(uap, data);
590 iov.iov_len = size;
591 ktrgenio(l, SCARG(uap, fd), UIO_WRITE, &iov,
592 size, 0);
593 }
594 #endif
595 } else
596 *(caddr_t *)data = SCARG(uap, data);
597 } else if ((com&IOC_OUT) && size)
598 /*
599 * Zero the buffer so the user always
600 * gets back something deterministic.
601 */
602 memset(data, 0, size);
603 else if (com&IOC_VOID)
604 *(caddr_t *)data = SCARG(uap, data);
605
606 switch (com) {
607
608 case FIONBIO:
609 if ((tmp = *(int *)data) != 0)
610 fp->f_flag |= FNONBLOCK;
611 else
612 fp->f_flag &= ~FNONBLOCK;
613 error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, l);
614 break;
615
616 case FIOASYNC:
617 if ((tmp = *(int *)data) != 0)
618 fp->f_flag |= FASYNC;
619 else
620 fp->f_flag &= ~FASYNC;
621 error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, (caddr_t)&tmp, l);
622 break;
623
624 case FIOSETOWN:
625 tmp = *(int *)data;
626 if (fp->f_type == DTYPE_SOCKET) {
627 ((struct socket *)fp->f_data)->so_pgid = tmp;
628 error = 0;
629 break;
630 }
631 if (tmp <= 0) {
632 tmp = -tmp;
633 } else {
634 struct proc *p1 = pfind(tmp);
635 if (p1 == 0) {
636 error = ESRCH;
637 break;
638 }
639 tmp = p1->p_pgrp->pg_id;
640 }
641 error = (*fp->f_ops->fo_ioctl)
642 (fp, TIOCSPGRP, (caddr_t)&tmp, l);
643 break;
644
645 case FIOGETOWN:
646 if (fp->f_type == DTYPE_SOCKET) {
647 error = 0;
648 *(int *)data = ((struct socket *)fp->f_data)->so_pgid;
649 break;
650 }
651 error = (*fp->f_ops->fo_ioctl)(fp, TIOCGPGRP, data, l);
652 if (error == 0)
653 *(int *)data = -*(int *)data;
654 break;
655
656 default:
657 error = (*fp->f_ops->fo_ioctl)(fp, com, data, l);
658 /*
659 * Copy any data to user, size was
660 * already set and checked above.
661 */
662 if (error == 0 && (com&IOC_OUT) && size) {
663 error = copyout(data, SCARG(uap, data), size);
664 #ifdef KTRACE
665 if (KTRPOINT(p, KTR_GENIO)) {
666 struct iovec iov;
667 iov.iov_base = SCARG(uap, data);
668 iov.iov_len = size;
669 ktrgenio(l, SCARG(uap, fd), UIO_READ, &iov,
670 size, error);
671 }
672 #endif
673 }
674 break;
675 }
676 if (memp)
677 free(memp, M_IOCTLOPS);
678 out:
679 FILE_UNUSE(fp, l);
680 switch (error) {
681 case -1:
682 printf("sys_ioctl: _IO%s%s('%c', %lu, %lu) returned -1: "
683 "pid=%d comm=%s\n",
684 (com & IOC_IN) ? "W" : "", (com & IOC_OUT) ? "R" : "",
685 (char)IOCGROUP(com), (com & 0xff), IOCPARM_LEN(com),
686 p->p_pid, p->p_comm);
687 /* FALLTHROUGH */
688 case EPASSTHROUGH:
689 error = ENOTTY;
690 /* FALLTHROUGH */
691 default:
692 return (error);
693 }
694 }
695
696 int selwait, nselcoll;
697
698 /*
699 * Select system call.
700 */
701 int
702 sys_select(struct lwp *l, void *v, register_t *retval)
703 {
704 struct sys_select_args /* {
705 syscallarg(int) nd;
706 syscallarg(fd_set *) in;
707 syscallarg(fd_set *) ou;
708 syscallarg(fd_set *) ex;
709 syscallarg(struct timeval *) tv;
710 } */ *uap = v;
711 struct proc *p;
712 caddr_t bits;
713 char smallbits[howmany(FD_SETSIZE, NFDBITS) *
714 sizeof(fd_mask) * 6];
715 struct timeval atv;
716 int s, ncoll, error, timo;
717 size_t ni;
718
719 error = 0;
720 p = l->l_proc;
721 if (SCARG(uap, nd) < 0)
722 return (EINVAL);
723 if (SCARG(uap, nd) > p->p_fd->fd_nfiles) {
724 /* forgiving; slightly wrong */
725 SCARG(uap, nd) = p->p_fd->fd_nfiles;
726 }
727 ni = howmany(SCARG(uap, nd), NFDBITS) * sizeof(fd_mask);
728 if (ni * 6 > sizeof(smallbits))
729 bits = malloc(ni * 6, M_TEMP, M_WAITOK);
730 else
731 bits = smallbits;
732
733 #define getbits(name, x) \
734 if (SCARG(uap, name)) { \
735 error = copyin(SCARG(uap, name), bits + ni * x, ni); \
736 if (error) \
737 goto done; \
738 } else \
739 memset(bits + ni * x, 0, ni);
740 getbits(in, 0);
741 getbits(ou, 1);
742 getbits(ex, 2);
743 #undef getbits
744
745 timo = 0;
746 if (SCARG(uap, tv)) {
747 error = copyin(SCARG(uap, tv), (caddr_t)&atv,
748 sizeof(atv));
749 if (error)
750 goto done;
751 if (itimerfix(&atv)) {
752 error = EINVAL;
753 goto done;
754 }
755 s = splclock();
756 timeradd(&atv, &time, &atv);
757 splx(s);
758 }
759
760 retry:
761 ncoll = nselcoll;
762 l->l_flag |= L_SELECT;
763 error = selscan(l, (fd_mask *)(bits + ni * 0),
764 (fd_mask *)(bits + ni * 3), SCARG(uap, nd), retval);
765 if (error || *retval)
766 goto done;
767 if (SCARG(uap, tv)) {
768 /*
769 * We have to recalculate the timeout on every retry.
770 */
771 timo = hzto(&atv);
772 if (timo <= 0)
773 goto done;
774 }
775 s = splsched();
776 if ((l->l_flag & L_SELECT) == 0 || nselcoll != ncoll) {
777 splx(s);
778 goto retry;
779 }
780 l->l_flag &= ~L_SELECT;
781 error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "select", timo);
782 splx(s);
783 if (error == 0)
784 goto retry;
785 done:
786 l->l_flag &= ~L_SELECT;
787 /* select is not restarted after signals... */
788 if (error == ERESTART)
789 error = EINTR;
790 if (error == EWOULDBLOCK)
791 error = 0;
792 if (error == 0) {
793
794 #define putbits(name, x) \
795 if (SCARG(uap, name)) { \
796 error = copyout(bits + ni * x, SCARG(uap, name), ni); \
797 if (error) \
798 goto out; \
799 }
800 putbits(in, 3);
801 putbits(ou, 4);
802 putbits(ex, 5);
803 #undef putbits
804 }
805 out:
806 if (ni * 6 > sizeof(smallbits))
807 free(bits, M_TEMP);
808 return (error);
809 }
810
811 int
812 selscan(struct lwp *l, fd_mask *ibitp, fd_mask *obitp, int nfd,
813 register_t *retval)
814 {
815 static const int flag[3] = { POLLRDNORM | POLLHUP | POLLERR,
816 POLLWRNORM | POLLHUP | POLLERR,
817 POLLRDBAND };
818 struct proc *p = l->l_proc;
819 struct filedesc *fdp;
820 int msk, i, j, fd, n;
821 fd_mask ibits, obits;
822 struct file *fp;
823
824 fdp = p->p_fd;
825 n = 0;
826 for (msk = 0; msk < 3; msk++) {
827 for (i = 0; i < nfd; i += NFDBITS) {
828 ibits = *ibitp++;
829 obits = 0;
830 while ((j = ffs(ibits)) && (fd = i + --j) < nfd) {
831 ibits &= ~(1 << j);
832 if ((fp = fd_getfile(fdp, fd)) == NULL)
833 return (EBADF);
834 FILE_USE(fp);
835 if ((*fp->f_ops->fo_poll)(fp, flag[msk], l)) {
836 obits |= (1 << j);
837 n++;
838 }
839 FILE_UNUSE(fp, l);
840 }
841 *obitp++ = obits;
842 }
843 }
844 *retval = n;
845 return (0);
846 }
847
848 /*
849 * Poll system call.
850 */
851 int
852 sys_poll(struct lwp *l, void *v, register_t *retval)
853 {
854 struct sys_poll_args /* {
855 syscallarg(struct pollfd *) fds;
856 syscallarg(u_int) nfds;
857 syscallarg(int) timeout;
858 } */ *uap = v;
859 struct proc *p;
860 caddr_t bits;
861 char smallbits[32 * sizeof(struct pollfd)];
862 struct timeval atv;
863 int s, ncoll, error, timo;
864 size_t ni;
865
866 error = 0;
867 p = l->l_proc;
868 if (SCARG(uap, nfds) > p->p_fd->fd_nfiles) {
869 /* forgiving; slightly wrong */
870 SCARG(uap, nfds) = p->p_fd->fd_nfiles;
871 }
872 ni = SCARG(uap, nfds) * sizeof(struct pollfd);
873 if (ni > sizeof(smallbits))
874 bits = malloc(ni, M_TEMP, M_WAITOK);
875 else
876 bits = smallbits;
877
878 error = copyin(SCARG(uap, fds), bits, ni);
879 if (error)
880 goto done;
881
882 timo = 0;
883 if (SCARG(uap, timeout) != INFTIM) {
884 atv.tv_sec = SCARG(uap, timeout) / 1000;
885 atv.tv_usec = (SCARG(uap, timeout) % 1000) * 1000;
886 if (itimerfix(&atv)) {
887 error = EINVAL;
888 goto done;
889 }
890 s = splclock();
891 timeradd(&atv, &time, &atv);
892 splx(s);
893 }
894
895 retry:
896 ncoll = nselcoll;
897 l->l_flag |= L_SELECT;
898 error = pollscan(l, (struct pollfd *)bits, SCARG(uap, nfds), retval);
899 if (error || *retval)
900 goto done;
901 if (SCARG(uap, timeout) != INFTIM) {
902 /*
903 * We have to recalculate the timeout on every retry.
904 */
905 timo = hzto(&atv);
906 if (timo <= 0)
907 goto done;
908 }
909 s = splsched();
910 if ((l->l_flag & L_SELECT) == 0 || nselcoll != ncoll) {
911 splx(s);
912 goto retry;
913 }
914 l->l_flag &= ~L_SELECT;
915 error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "select", timo);
916 splx(s);
917 if (error == 0)
918 goto retry;
919 done:
920 l->l_flag &= ~L_SELECT;
921 /* poll is not restarted after signals... */
922 if (error == ERESTART)
923 error = EINTR;
924 if (error == EWOULDBLOCK)
925 error = 0;
926 if (error == 0) {
927 error = copyout(bits, SCARG(uap, fds), ni);
928 if (error)
929 goto out;
930 }
931 out:
932 if (ni > sizeof(smallbits))
933 free(bits, M_TEMP);
934 return (error);
935 }
936
937 int
938 pollscan(struct lwp *l, struct pollfd *fds, int nfd, register_t *retval)
939 {
940 struct proc *p = l->l_proc;
941 struct filedesc *fdp;
942 int i, n;
943 struct file *fp;
944
945 fdp = p->p_fd;
946 n = 0;
947 for (i = 0; i < nfd; i++, fds++) {
948 if (fds->fd >= fdp->fd_nfiles) {
949 fds->revents = POLLNVAL;
950 n++;
951 } else if (fds->fd < 0) {
952 fds->revents = 0;
953 } else {
954 if ((fp = fd_getfile(fdp, fds->fd)) == NULL) {
955 fds->revents = POLLNVAL;
956 n++;
957 } else {
958 FILE_USE(fp);
959 fds->revents = (*fp->f_ops->fo_poll)(fp,
960 fds->events | POLLERR | POLLHUP, l);
961 if (fds->revents != 0)
962 n++;
963 FILE_UNUSE(fp, l);
964 }
965 }
966 }
967 *retval = n;
968 return (0);
969 }
970
971 /*ARGSUSED*/
972 int
973 seltrue(dev_t dev, int events, struct lwp *l)
974 {
975
976 return (events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM));
977 }
978
979 /*
980 * Record a select request.
981 */
982 void
983 selrecord(struct lwp *selector, struct selinfo *sip)
984 {
985 struct lwp *l;
986 struct proc *p;
987 pid_t mypid;
988
989 mypid = selector->l_proc->p_pid;
990 if (sip->sel_pid == mypid)
991 return;
992 if (sip->sel_pid && (p = pfind(sip->sel_pid))) {
993 LIST_FOREACH(l, &p->p_lwps, l_sibling) {
994 if (l->l_wchan == (caddr_t)&selwait) {
995 sip->sel_collision = 1;
996 return;
997 }
998 }
999 }
1000
1001 sip->sel_pid = mypid;
1002 }
1003
1004 /*
1005 * Do a wakeup when a selectable event occurs.
1006 */
1007 void
1008 selwakeup(sip)
1009 struct selinfo *sip;
1010 {
1011 struct lwp *l;
1012 struct proc *p;
1013 int s;
1014
1015 if (sip->sel_pid == 0)
1016 return;
1017 if (sip->sel_collision) {
1018 sip->sel_pid = 0;
1019 nselcoll++;
1020 sip->sel_collision = 0;
1021 wakeup((caddr_t)&selwait);
1022 return;
1023 }
1024 p = pfind(sip->sel_pid);
1025 sip->sel_pid = 0;
1026 if (p != NULL) {
1027 LIST_FOREACH(l, &p->p_lwps, l_sibling) {
1028 SCHED_LOCK(s);
1029 if (l->l_wchan == (caddr_t)&selwait) {
1030 if (l->l_stat == LSSLEEP)
1031 setrunnable(l);
1032 else
1033 unsleep(l);
1034 } else if (l->l_flag & L_SELECT)
1035 l->l_flag &= ~L_SELECT;
1036 SCHED_UNLOCK(s);
1037 }
1038 }
1039 }
1040