sys_generic.c revision 1.54.2.7 1 /* $NetBSD: sys_generic.c,v 1.54.2.7 2002/05/29 21:33:14 nathanw Exp $ */
2
3 /*
4 * Copyright (c) 1982, 1986, 1989, 1993
5 * The Regents of the University of California. All rights reserved.
6 * (c) UNIX System Laboratories, Inc.
7 * All or some portions of this file are derived from material licensed
8 * to the University of California by American Telephone and Telegraph
9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
10 * the permission of UNIX System Laboratories, Inc.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. All advertising materials mentioning features or use of this software
21 * must display the following acknowledgement:
22 * This product includes software developed by the University of
23 * California, Berkeley and its contributors.
24 * 4. Neither the name of the University nor the names of its contributors
25 * may be used to endorse or promote products derived from this software
26 * without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
29 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
32 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
33 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
34 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
35 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
37 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38 * SUCH DAMAGE.
39 *
40 * @(#)sys_generic.c 8.9 (Berkeley) 2/14/95
41 */
42
43 #include <sys/cdefs.h>
44 __KERNEL_RCSID(0, "$NetBSD: sys_generic.c,v 1.54.2.7 2002/05/29 21:33:14 nathanw Exp $");
45
46 #include "opt_ktrace.h"
47
48 #include <sys/param.h>
49 #include <sys/systm.h>
50 #include <sys/filedesc.h>
51 #include <sys/ioctl.h>
52 #include <sys/file.h>
53 #include <sys/lwp.h>
54 #include <sys/proc.h>
55 #include <sys/socketvar.h>
56 #include <sys/signalvar.h>
57 #include <sys/uio.h>
58 #include <sys/kernel.h>
59 #include <sys/stat.h>
60 #include <sys/malloc.h>
61 #include <sys/poll.h>
62 #ifdef KTRACE
63 #include <sys/ktrace.h>
64 #endif
65
66 #include <sys/mount.h>
67 #include <sys/sa.h>
68 #include <sys/syscallargs.h>
69
70 int selscan __P((struct proc *, fd_mask *, fd_mask *, int, register_t *));
71 int pollscan __P((struct proc *, struct pollfd *, int, register_t *));
72
73 /*
74 * Read system call.
75 */
76 /* ARGSUSED */
77 int
78 sys_read(struct lwp *l, void *v, register_t *retval)
79 {
80 struct sys_read_args /* {
81 syscallarg(int) fd;
82 syscallarg(void *) buf;
83 syscallarg(size_t) nbyte;
84 } */ *uap = v;
85 int fd;
86 struct file *fp;
87 struct proc *p;
88 struct filedesc *fdp;
89
90 fd = SCARG(uap, fd);
91 p = l->l_proc;
92 fdp = p->p_fd;
93
94 if ((fp = fd_getfile(fdp, fd)) == NULL)
95 return (EBADF);
96
97 if ((fp->f_flag & FREAD) == 0)
98 return (EBADF);
99
100 FILE_USE(fp);
101
102 /* dofileread() will unuse the descriptor for us */
103 return (dofileread(p, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
104 &fp->f_offset, FOF_UPDATE_OFFSET, retval));
105 }
106
107 int
108 dofileread(struct proc *p, int fd, struct file *fp, void *buf, size_t nbyte,
109 off_t *offset, int flags, register_t *retval)
110 {
111 struct uio auio;
112 struct iovec aiov;
113 long cnt, error;
114 #ifdef KTRACE
115 struct iovec ktriov;
116 #endif
117 error = 0;
118
119 aiov.iov_base = (caddr_t)buf;
120 aiov.iov_len = nbyte;
121 auio.uio_iov = &aiov;
122 auio.uio_iovcnt = 1;
123 auio.uio_resid = nbyte;
124 auio.uio_rw = UIO_READ;
125 auio.uio_segflg = UIO_USERSPACE;
126 auio.uio_procp = p;
127
128 /*
129 * Reads return ssize_t because -1 is returned on error. Therefore
130 * we must restrict the length to SSIZE_MAX to avoid garbage return
131 * values.
132 */
133 if (auio.uio_resid > SSIZE_MAX) {
134 error = EINVAL;
135 goto out;
136 }
137
138 #ifdef KTRACE
139 /*
140 * if tracing, save a copy of iovec
141 */
142 if (KTRPOINT(p, KTR_GENIO))
143 ktriov = aiov;
144 #endif
145 cnt = auio.uio_resid;
146 error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred, flags);
147 if (error)
148 if (auio.uio_resid != cnt && (error == ERESTART ||
149 error == EINTR || error == EWOULDBLOCK))
150 error = 0;
151 cnt -= auio.uio_resid;
152 #ifdef KTRACE
153 if (KTRPOINT(p, KTR_GENIO) && error == 0)
154 ktrgenio(p, fd, UIO_READ, &ktriov, cnt, error);
155 #endif
156 *retval = cnt;
157 out:
158 FILE_UNUSE(fp, p);
159 return (error);
160 }
161
162 /*
163 * Scatter read system call.
164 */
165 int
166 sys_readv(struct lwp *l, void *v, register_t *retval)
167 {
168 struct sys_readv_args /* {
169 syscallarg(int) fd;
170 syscallarg(const struct iovec *) iovp;
171 syscallarg(int) iovcnt;
172 } */ *uap = v;
173 int fd;
174 struct file *fp;
175 struct proc *p;
176 struct filedesc *fdp;
177
178 fd = SCARG(uap, fd);
179 p = l->l_proc;
180 fdp = p->p_fd;
181
182 if ((fp = fd_getfile(fdp, fd)) == NULL)
183 return (EBADF);
184
185 if ((fp->f_flag & FREAD) == 0)
186 return (EBADF);
187
188 FILE_USE(fp);
189
190 /* dofilereadv() will unuse the descriptor for us */
191 return (dofilereadv(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt),
192 &fp->f_offset, FOF_UPDATE_OFFSET, retval));
193 }
194
195 int
196 dofilereadv(struct proc *p, int fd, struct file *fp, const struct iovec *iovp,
197 int iovcnt, off_t *offset, int flags, register_t *retval)
198 {
199 struct uio auio;
200 struct iovec *iov, *needfree, aiov[UIO_SMALLIOV];
201 long i, cnt, error;
202 u_int iovlen;
203 #ifdef KTRACE
204 struct iovec *ktriov;
205 #endif
206
207 error = 0;
208 #ifdef KTRACE
209 ktriov = NULL;
210 #endif
211 /* note: can't use iovlen until iovcnt is validated */
212 iovlen = iovcnt * sizeof(struct iovec);
213 if ((u_int)iovcnt > UIO_SMALLIOV) {
214 if ((u_int)iovcnt > IOV_MAX) {
215 error = EINVAL;
216 goto out;
217 }
218 iov = malloc(iovlen, M_IOV, M_WAITOK);
219 needfree = iov;
220 } else if ((u_int)iovcnt > 0) {
221 iov = aiov;
222 needfree = NULL;
223 } else {
224 error = EINVAL;
225 goto out;
226 }
227
228 auio.uio_iov = iov;
229 auio.uio_iovcnt = iovcnt;
230 auio.uio_rw = UIO_READ;
231 auio.uio_segflg = UIO_USERSPACE;
232 auio.uio_procp = p;
233 error = copyin(iovp, iov, iovlen);
234 if (error)
235 goto done;
236 auio.uio_resid = 0;
237 for (i = 0; i < iovcnt; i++) {
238 auio.uio_resid += iov->iov_len;
239 /*
240 * Reads return ssize_t because -1 is returned on error.
241 * Therefore we must restrict the length to SSIZE_MAX to
242 * avoid garbage return values.
243 */
244 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) {
245 error = EINVAL;
246 goto done;
247 }
248 iov++;
249 }
250 #ifdef KTRACE
251 /*
252 * if tracing, save a copy of iovec
253 */
254 if (KTRPOINT(p, KTR_GENIO)) {
255 ktriov = malloc(iovlen, M_TEMP, M_WAITOK);
256 memcpy((caddr_t)ktriov, (caddr_t)auio.uio_iov, iovlen);
257 }
258 #endif
259 cnt = auio.uio_resid;
260 error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred, flags);
261 if (error)
262 if (auio.uio_resid != cnt && (error == ERESTART ||
263 error == EINTR || error == EWOULDBLOCK))
264 error = 0;
265 cnt -= auio.uio_resid;
266 #ifdef KTRACE
267 if (ktriov != NULL) {
268 if (error == 0)
269 ktrgenio(p, fd, UIO_READ, ktriov, cnt, error);
270 free(ktriov, M_TEMP);
271 }
272 #endif
273 *retval = cnt;
274 done:
275 if (needfree)
276 free(needfree, M_IOV);
277 out:
278 FILE_UNUSE(fp, p);
279 return (error);
280 }
281
282 /*
283 * Write system call
284 */
285 int
286 sys_write(struct lwp *l, void *v, register_t *retval)
287 {
288 struct sys_write_args /* {
289 syscallarg(int) fd;
290 syscallarg(const void *) buf;
291 syscallarg(size_t) nbyte;
292 } */ *uap = v;
293 int fd;
294 struct file *fp;
295 struct proc *p;
296 struct filedesc *fdp;
297
298 fd = SCARG(uap, fd);
299 p = l->l_proc;
300 fdp = p->p_fd;
301
302 if ((fp = fd_getfile(fdp, fd)) == NULL)
303 return (EBADF);
304
305 if ((fp->f_flag & FWRITE) == 0)
306 return (EBADF);
307
308 FILE_USE(fp);
309
310 /* dofilewrite() will unuse the descriptor for us */
311 return (dofilewrite(p, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
312 &fp->f_offset, FOF_UPDATE_OFFSET, retval));
313 }
314
315 int
316 dofilewrite(struct proc *p, int fd, struct file *fp, const void *buf,
317 size_t nbyte, off_t *offset, int flags, register_t *retval)
318 {
319 struct uio auio;
320 struct iovec aiov;
321 long cnt, error;
322 #ifdef KTRACE
323 struct iovec ktriov;
324 #endif
325
326 error = 0;
327 aiov.iov_base = (caddr_t)buf; /* XXX kills const */
328 aiov.iov_len = nbyte;
329 auio.uio_iov = &aiov;
330 auio.uio_iovcnt = 1;
331 auio.uio_resid = nbyte;
332 auio.uio_rw = UIO_WRITE;
333 auio.uio_segflg = UIO_USERSPACE;
334 auio.uio_procp = p;
335
336 /*
337 * Writes return ssize_t because -1 is returned on error. Therefore
338 * we must restrict the length to SSIZE_MAX to avoid garbage return
339 * values.
340 */
341 if (auio.uio_resid > SSIZE_MAX) {
342 error = EINVAL;
343 goto out;
344 }
345
346 #ifdef KTRACE
347 /*
348 * if tracing, save a copy of iovec
349 */
350 if (KTRPOINT(p, KTR_GENIO))
351 ktriov = aiov;
352 #endif
353 cnt = auio.uio_resid;
354 error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred, flags);
355 if (error) {
356 if (auio.uio_resid != cnt && (error == ERESTART ||
357 error == EINTR || error == EWOULDBLOCK))
358 error = 0;
359 if (error == EPIPE)
360 psignal(p, SIGPIPE);
361 }
362 cnt -= auio.uio_resid;
363 #ifdef KTRACE
364 if (KTRPOINT(p, KTR_GENIO) && error == 0)
365 ktrgenio(p, fd, UIO_WRITE, &ktriov, cnt, error);
366 #endif
367 *retval = cnt;
368 out:
369 FILE_UNUSE(fp, p);
370 return (error);
371 }
372
373 /*
374 * Gather write system call
375 */
376 int
377 sys_writev(struct lwp *l, void *v, register_t *retval)
378 {
379 struct sys_writev_args /* {
380 syscallarg(int) fd;
381 syscallarg(const struct iovec *) iovp;
382 syscallarg(int) iovcnt;
383 } */ *uap = v;
384 int fd;
385 struct file *fp;
386 struct proc *p;
387 struct filedesc *fdp;
388
389 fd = SCARG(uap, fd);
390 p = l->l_proc;
391 fdp = p->p_fd;
392
393 if ((fp = fd_getfile(fdp, fd)) == NULL)
394 return (EBADF);
395
396 if ((fp->f_flag & FWRITE) == 0)
397 return (EBADF);
398
399 FILE_USE(fp);
400
401 /* dofilewritev() will unuse the descriptor for us */
402 return (dofilewritev(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt),
403 &fp->f_offset, FOF_UPDATE_OFFSET, retval));
404 }
405
406 int
407 dofilewritev(struct proc *p, int fd, struct file *fp, const struct iovec *iovp,
408 int iovcnt, off_t *offset, int flags, register_t *retval)
409 {
410 struct uio auio;
411 struct iovec *iov, *needfree, aiov[UIO_SMALLIOV];
412 long i, cnt, error;
413 u_int iovlen;
414 #ifdef KTRACE
415 struct iovec *ktriov;
416 #endif
417
418 error = 0;
419 #ifdef KTRACE
420 ktriov = NULL;
421 #endif
422 /* note: can't use iovlen until iovcnt is validated */
423 iovlen = iovcnt * sizeof(struct iovec);
424 if ((u_int)iovcnt > UIO_SMALLIOV) {
425 if ((u_int)iovcnt > IOV_MAX) {
426 error = EINVAL;
427 goto out;
428 }
429 iov = malloc(iovlen, M_IOV, M_WAITOK);
430 needfree = iov;
431 } else if ((u_int)iovcnt > 0) {
432 iov = aiov;
433 needfree = NULL;
434 } else {
435 error = EINVAL;
436 goto out;
437 }
438
439 auio.uio_iov = iov;
440 auio.uio_iovcnt = iovcnt;
441 auio.uio_rw = UIO_WRITE;
442 auio.uio_segflg = UIO_USERSPACE;
443 auio.uio_procp = p;
444 error = copyin(iovp, iov, iovlen);
445 if (error)
446 goto done;
447 auio.uio_resid = 0;
448 for (i = 0; i < iovcnt; i++) {
449 auio.uio_resid += iov->iov_len;
450 /*
451 * Writes return ssize_t because -1 is returned on error.
452 * Therefore we must restrict the length to SSIZE_MAX to
453 * avoid garbage return values.
454 */
455 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) {
456 error = EINVAL;
457 goto done;
458 }
459 iov++;
460 }
461 #ifdef KTRACE
462 /*
463 * if tracing, save a copy of iovec
464 */
465 if (KTRPOINT(p, KTR_GENIO)) {
466 ktriov = malloc(iovlen, M_TEMP, M_WAITOK);
467 memcpy((caddr_t)ktriov, (caddr_t)auio.uio_iov, iovlen);
468 }
469 #endif
470 cnt = auio.uio_resid;
471 error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred, flags);
472 if (error) {
473 if (auio.uio_resid != cnt && (error == ERESTART ||
474 error == EINTR || error == EWOULDBLOCK))
475 error = 0;
476 if (error == EPIPE)
477 psignal(p, SIGPIPE);
478 }
479 cnt -= auio.uio_resid;
480 #ifdef KTRACE
481 if (KTRPOINT(p, KTR_GENIO))
482 if (error == 0) {
483 ktrgenio(p, fd, UIO_WRITE, ktriov, cnt, error);
484 free(ktriov, M_TEMP);
485 }
486 #endif
487 *retval = cnt;
488 done:
489 if (needfree)
490 free(needfree, M_IOV);
491 out:
492 FILE_UNUSE(fp, p);
493 return (error);
494 }
495
496 /*
497 * Ioctl system call
498 */
499 /* ARGSUSED */
500 int
501 sys_ioctl(struct lwp *l, void *v, register_t *retval)
502 {
503 struct sys_ioctl_args /* {
504 syscallarg(int) fd;
505 syscallarg(u_long) com;
506 syscallarg(caddr_t) data;
507 } */ *uap = v;
508 struct file *fp;
509 struct proc *p;
510 struct filedesc *fdp;
511 u_long com;
512 int error;
513 u_int size;
514 caddr_t data, memp;
515 int tmp;
516 #define STK_PARAMS 128
517 u_long stkbuf[STK_PARAMS/sizeof(u_long)];
518
519 error = 0;
520 p = l->l_proc;
521 fdp = p->p_fd;
522
523 if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL)
524 return (EBADF);
525
526 FILE_USE(fp);
527
528 if ((fp->f_flag & (FREAD | FWRITE)) == 0) {
529 error = EBADF;
530 goto out;
531 }
532
533 switch (com = SCARG(uap, com)) {
534 case FIONCLEX:
535 fdp->fd_ofileflags[SCARG(uap, fd)] &= ~UF_EXCLOSE;
536 goto out;
537
538 case FIOCLEX:
539 fdp->fd_ofileflags[SCARG(uap, fd)] |= UF_EXCLOSE;
540 goto out;
541 }
542
543 /*
544 * Interpret high order word to find amount of data to be
545 * copied to/from the user's address space.
546 */
547 size = IOCPARM_LEN(com);
548 if (size > IOCPARM_MAX) {
549 error = ENOTTY;
550 goto out;
551 }
552 memp = NULL;
553 if (size > sizeof(stkbuf)) {
554 memp = (caddr_t)malloc((u_long)size, M_IOCTLOPS, M_WAITOK);
555 data = memp;
556 } else
557 data = (caddr_t)stkbuf;
558 if (com&IOC_IN) {
559 if (size) {
560 error = copyin(SCARG(uap, data), data, size);
561 if (error) {
562 if (memp)
563 free(memp, M_IOCTLOPS);
564 goto out;
565 }
566 } else
567 *(caddr_t *)data = SCARG(uap, data);
568 } else if ((com&IOC_OUT) && size)
569 /*
570 * Zero the buffer so the user always
571 * gets back something deterministic.
572 */
573 memset(data, 0, size);
574 else if (com&IOC_VOID)
575 *(caddr_t *)data = SCARG(uap, data);
576
577 switch (com) {
578
579 case FIONBIO:
580 if ((tmp = *(int *)data) != 0)
581 fp->f_flag |= FNONBLOCK;
582 else
583 fp->f_flag &= ~FNONBLOCK;
584 error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p);
585 break;
586
587 case FIOASYNC:
588 if ((tmp = *(int *)data) != 0)
589 fp->f_flag |= FASYNC;
590 else
591 fp->f_flag &= ~FASYNC;
592 error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, (caddr_t)&tmp, p);
593 break;
594
595 case FIOSETOWN:
596 tmp = *(int *)data;
597 if (fp->f_type == DTYPE_SOCKET) {
598 ((struct socket *)fp->f_data)->so_pgid = tmp;
599 error = 0;
600 break;
601 }
602 if (tmp <= 0) {
603 tmp = -tmp;
604 } else {
605 struct proc *p1 = pfind(tmp);
606 if (p1 == 0) {
607 error = ESRCH;
608 break;
609 }
610 tmp = p1->p_pgrp->pg_id;
611 }
612 error = (*fp->f_ops->fo_ioctl)
613 (fp, TIOCSPGRP, (caddr_t)&tmp, p);
614 break;
615
616 case FIOGETOWN:
617 if (fp->f_type == DTYPE_SOCKET) {
618 error = 0;
619 *(int *)data = ((struct socket *)fp->f_data)->so_pgid;
620 break;
621 }
622 error = (*fp->f_ops->fo_ioctl)(fp, TIOCGPGRP, data, p);
623 if (error == 0)
624 *(int *)data = -*(int *)data;
625 break;
626
627 default:
628 error = (*fp->f_ops->fo_ioctl)(fp, com, data, p);
629 /*
630 * Copy any data to user, size was
631 * already set and checked above.
632 */
633 if (error == 0 && (com&IOC_OUT) && size)
634 error = copyout(data, SCARG(uap, data), size);
635 break;
636 }
637 if (memp)
638 free(memp, M_IOCTLOPS);
639 out:
640 FILE_UNUSE(fp, p);
641 switch (error) {
642 case -1:
643 printf("sys_ioctl: _IO%s%s('%c', %lu, %lu) returned -1: "
644 "pid=%d comm=%s\n",
645 (com & IOC_IN) ? "W" : "", (com & IOC_OUT) ? "R" : "",
646 (char)IOCGROUP(com), (com & 0xff), IOCPARM_LEN(com),
647 p->p_pid, p->p_comm);
648 /* FALLTHROUGH */
649 case EPASSTHROUGH:
650 error = ENOTTY;
651 /* FALLTHROUGH */
652 default:
653 return (error);
654 }
655 }
656
657 int selwait, nselcoll;
658
659 /*
660 * Select system call.
661 */
662 int
663 sys_select(struct lwp *l, void *v, register_t *retval)
664 {
665 struct sys_select_args /* {
666 syscallarg(int) nd;
667 syscallarg(fd_set *) in;
668 syscallarg(fd_set *) ou;
669 syscallarg(fd_set *) ex;
670 syscallarg(struct timeval *) tv;
671 } */ *uap = v;
672 struct proc *p;
673 caddr_t bits;
674 char smallbits[howmany(FD_SETSIZE, NFDBITS) *
675 sizeof(fd_mask) * 6];
676 struct timeval atv;
677 int s, ncoll, error, timo;
678 size_t ni;
679
680 error = 0;
681 p = l->l_proc;
682 if (SCARG(uap, nd) < 0)
683 return (EINVAL);
684 if (SCARG(uap, nd) > p->p_fd->fd_nfiles) {
685 /* forgiving; slightly wrong */
686 SCARG(uap, nd) = p->p_fd->fd_nfiles;
687 }
688 ni = howmany(SCARG(uap, nd), NFDBITS) * sizeof(fd_mask);
689 if (ni * 6 > sizeof(smallbits))
690 bits = malloc(ni * 6, M_TEMP, M_WAITOK);
691 else
692 bits = smallbits;
693
694 #define getbits(name, x) \
695 if (SCARG(uap, name)) { \
696 error = copyin(SCARG(uap, name), bits + ni * x, ni); \
697 if (error) \
698 goto done; \
699 } else \
700 memset(bits + ni * x, 0, ni);
701 getbits(in, 0);
702 getbits(ou, 1);
703 getbits(ex, 2);
704 #undef getbits
705
706 if (SCARG(uap, tv)) {
707 error = copyin(SCARG(uap, tv), (caddr_t)&atv,
708 sizeof(atv));
709 if (error)
710 goto done;
711 if (itimerfix(&atv)) {
712 error = EINVAL;
713 goto done;
714 }
715 s = splclock();
716 timeradd(&atv, &time, &atv);
717 splx(s);
718 } else
719 timo = 0;
720 retry:
721 ncoll = nselcoll;
722 l->l_flag |= L_SELECT;
723 error = selscan(p, (fd_mask *)(bits + ni * 0),
724 (fd_mask *)(bits + ni * 3), SCARG(uap, nd), retval);
725 if (error || *retval)
726 goto done;
727 if (SCARG(uap, tv)) {
728 /*
729 * We have to recalculate the timeout on every retry.
730 */
731 timo = hzto(&atv);
732 if (timo <= 0)
733 goto done;
734 }
735 s = splsched();
736 if ((l->l_flag & L_SELECT) == 0 || nselcoll != ncoll) {
737 splx(s);
738 goto retry;
739 }
740 l->l_flag &= ~L_SELECT;
741 error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "select", timo);
742 splx(s);
743 if (error == 0)
744 goto retry;
745 done:
746 l->l_flag &= ~L_SELECT;
747 /* select is not restarted after signals... */
748 if (error == ERESTART)
749 error = EINTR;
750 if (error == EWOULDBLOCK)
751 error = 0;
752 if (error == 0) {
753
754 #define putbits(name, x) \
755 if (SCARG(uap, name)) { \
756 error = copyout(bits + ni * x, SCARG(uap, name), ni); \
757 if (error) \
758 goto out; \
759 }
760 putbits(in, 3);
761 putbits(ou, 4);
762 putbits(ex, 5);
763 #undef putbits
764 }
765 out:
766 if (ni * 6 > sizeof(smallbits))
767 free(bits, M_TEMP);
768 return (error);
769 }
770
771 int
772 selscan(struct proc *p, fd_mask *ibitp, fd_mask *obitp, int nfd,
773 register_t *retval)
774 {
775 struct filedesc *fdp;
776 int msk, i, j, fd, n;
777 fd_mask ibits, obits;
778 struct file *fp;
779 static int flag[3] = { POLLRDNORM | POLLHUP | POLLERR,
780 POLLWRNORM | POLLHUP | POLLERR,
781 POLLRDBAND };
782
783 fdp = p->p_fd;
784 n = 0;
785 for (msk = 0; msk < 3; msk++) {
786 for (i = 0; i < nfd; i += NFDBITS) {
787 ibits = *ibitp++;
788 obits = 0;
789 while ((j = ffs(ibits)) && (fd = i + --j) < nfd) {
790 ibits &= ~(1 << j);
791 if ((fp = fd_getfile(fdp, fd)) == NULL)
792 return (EBADF);
793 FILE_USE(fp);
794 if ((*fp->f_ops->fo_poll)(fp, flag[msk], p)) {
795 obits |= (1 << j);
796 n++;
797 }
798 FILE_UNUSE(fp, p);
799 }
800 *obitp++ = obits;
801 }
802 }
803 *retval = n;
804 return (0);
805 }
806
807 /*
808 * Poll system call.
809 */
810 int
811 sys_poll(struct lwp *l, void *v, register_t *retval)
812 {
813 struct sys_poll_args /* {
814 syscallarg(struct pollfd *) fds;
815 syscallarg(u_int) nfds;
816 syscallarg(int) timeout;
817 } */ *uap = v;
818 struct proc *p;
819 caddr_t bits;
820 char smallbits[32 * sizeof(struct pollfd)];
821 struct timeval atv;
822 int s, ncoll, error, timo;
823 size_t ni;
824
825 error = 0;
826 p = l->l_proc;
827 if (SCARG(uap, nfds) > p->p_fd->fd_nfiles) {
828 /* forgiving; slightly wrong */
829 SCARG(uap, nfds) = p->p_fd->fd_nfiles;
830 }
831 ni = SCARG(uap, nfds) * sizeof(struct pollfd);
832 if (ni > sizeof(smallbits))
833 bits = malloc(ni, M_TEMP, M_WAITOK);
834 else
835 bits = smallbits;
836
837 error = copyin(SCARG(uap, fds), bits, ni);
838 if (error)
839 goto done;
840
841 if (SCARG(uap, timeout) != INFTIM) {
842 atv.tv_sec = SCARG(uap, timeout) / 1000;
843 atv.tv_usec = (SCARG(uap, timeout) % 1000) * 1000;
844 if (itimerfix(&atv)) {
845 error = EINVAL;
846 goto done;
847 }
848 s = splclock();
849 timeradd(&atv, &time, &atv);
850 splx(s);
851 } else
852 timo = 0;
853 retry:
854 ncoll = nselcoll;
855 l->l_flag |= L_SELECT;
856 error = pollscan(p, (struct pollfd *)bits, SCARG(uap, nfds), retval);
857 if (error || *retval)
858 goto done;
859 if (SCARG(uap, timeout) != INFTIM) {
860 /*
861 * We have to recalculate the timeout on every retry.
862 */
863 timo = hzto(&atv);
864 if (timo <= 0)
865 goto done;
866 }
867 s = splsched();
868 if ((l->l_flag & L_SELECT) == 0 || nselcoll != ncoll) {
869 splx(s);
870 goto retry;
871 }
872 l->l_flag &= ~L_SELECT;
873 error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "select", timo);
874 splx(s);
875 if (error == 0)
876 goto retry;
877 done:
878 l->l_flag &= ~L_SELECT;
879 /* poll is not restarted after signals... */
880 if (error == ERESTART)
881 error = EINTR;
882 if (error == EWOULDBLOCK)
883 error = 0;
884 if (error == 0) {
885 error = copyout(bits, SCARG(uap, fds), ni);
886 if (error)
887 goto out;
888 }
889 out:
890 if (ni > sizeof(smallbits))
891 free(bits, M_TEMP);
892 return (error);
893 }
894
895 int
896 pollscan(struct proc *p, struct pollfd *fds, int nfd, register_t *retval)
897 {
898 struct filedesc *fdp;
899 int i, n;
900 struct file *fp;
901
902 fdp = p->p_fd;
903 n = 0;
904 for (i = 0; i < nfd; i++, fds++) {
905 if (fds->fd >= fdp->fd_nfiles) {
906 fds->revents = POLLNVAL;
907 n++;
908 } else if (fds->fd < 0) {
909 fds->revents = 0;
910 } else {
911 if ((fp = fd_getfile(fdp, fds->fd)) == NULL) {
912 fds->revents = POLLNVAL;
913 n++;
914 } else {
915 FILE_USE(fp);
916 fds->revents = (*fp->f_ops->fo_poll)(fp,
917 fds->events | POLLERR | POLLHUP, p);
918 if (fds->revents != 0)
919 n++;
920 FILE_UNUSE(fp, p);
921 }
922 }
923 }
924 *retval = n;
925 return (0);
926 }
927
928 /*ARGSUSED*/
929 int
930 seltrue(dev_t dev, int events, struct proc *p)
931 {
932
933 return (events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM));
934 }
935
936 /*
937 * Record a select request.
938 */
939 void
940 selrecord(struct proc *selector, struct selinfo *sip)
941 {
942 struct lwp *l;
943 struct proc *p;
944 pid_t mypid;
945 int collision;
946
947 mypid = selector->p_pid;
948 if (sip->si_pid == mypid)
949 return;
950
951 collision = 0;
952 if (sip->si_pid && (p = pfind(sip->si_pid))) {
953 for (l = LIST_FIRST(&p->p_lwps); l != NULL;
954 l = LIST_NEXT(l, l_sibling)) {
955 if (l->l_wchan == (caddr_t)&selwait) {
956 collision = 1;
957 sip->si_flags |= SI_COLL;
958 }
959 }
960 }
961
962 if (collision == 0) {
963 sip->si_flags &= ~SI_COLL;
964 sip->si_pid = mypid;
965 }
966 }
967
968 /*
969 * Do a wakeup when a selectable event occurs.
970 */
971 void
972 selwakeup(sip)
973 struct selinfo *sip;
974 {
975 struct lwp *l;
976 struct proc *p;
977 int s;
978
979 if (sip->si_pid == 0)
980 return;
981 if (sip->si_flags & SI_COLL) {
982 nselcoll++;
983 sip->si_flags &= ~SI_COLL;
984 wakeup((caddr_t)&selwait);
985 }
986 p = pfind(sip->si_pid);
987 sip->si_pid = 0;
988 if (p != NULL) {
989 for (l = LIST_FIRST(&p->p_lwps); l != NULL;
990 l = LIST_NEXT(l, l_sibling)) {
991 SCHED_LOCK(s);
992 if (l->l_wchan == (caddr_t)&selwait) {
993 if (l->l_stat == LSSLEEP)
994 setrunnable(l);
995 else
996 unsleep(l);
997 } else if (l->l_flag & L_SELECT)
998 l->l_flag &= ~L_SELECT;
999 SCHED_UNLOCK(s);
1000 }
1001 }
1002 }
1003