sys_generic.c revision 1.54.2.10 1 /* $NetBSD: sys_generic.c,v 1.54.2.10 2002/08/27 23:47:32 nathanw Exp $ */
2
3 /*
4 * Copyright (c) 1982, 1986, 1989, 1993
5 * The Regents of the University of California. All rights reserved.
6 * (c) UNIX System Laboratories, Inc.
7 * All or some portions of this file are derived from material licensed
8 * to the University of California by American Telephone and Telegraph
9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
10 * the permission of UNIX System Laboratories, Inc.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. All advertising materials mentioning features or use of this software
21 * must display the following acknowledgement:
22 * This product includes software developed by the University of
23 * California, Berkeley and its contributors.
24 * 4. Neither the name of the University nor the names of its contributors
25 * may be used to endorse or promote products derived from this software
26 * without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
29 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
32 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
33 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
34 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
35 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
37 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38 * SUCH DAMAGE.
39 *
40 * @(#)sys_generic.c 8.9 (Berkeley) 2/14/95
41 */
42
43 #include <sys/cdefs.h>
44 __KERNEL_RCSID(0, "$NetBSD: sys_generic.c,v 1.54.2.10 2002/08/27 23:47:32 nathanw Exp $");
45
46 #include "opt_ktrace.h"
47
48 #include <sys/param.h>
49 #include <sys/systm.h>
50 #include <sys/filedesc.h>
51 #include <sys/ioctl.h>
52 #include <sys/file.h>
53 #include <sys/proc.h>
54 #include <sys/socketvar.h>
55 #include <sys/signalvar.h>
56 #include <sys/uio.h>
57 #include <sys/kernel.h>
58 #include <sys/stat.h>
59 #include <sys/malloc.h>
60 #include <sys/poll.h>
61 #ifdef KTRACE
62 #include <sys/ktrace.h>
63 #endif
64
65 #include <sys/mount.h>
66 #include <sys/sa.h>
67 #include <sys/syscallargs.h>
68
69 int selscan __P((struct proc *, fd_mask *, fd_mask *, int, register_t *));
70 int pollscan __P((struct proc *, struct pollfd *, int, register_t *));
71
72 /*
73 * Read system call.
74 */
75 /* ARGSUSED */
76 int
77 sys_read(struct lwp *l, void *v, register_t *retval)
78 {
79 struct sys_read_args /* {
80 syscallarg(int) fd;
81 syscallarg(void *) buf;
82 syscallarg(size_t) nbyte;
83 } */ *uap = v;
84 int fd;
85 struct file *fp;
86 struct proc *p;
87 struct filedesc *fdp;
88
89 fd = SCARG(uap, fd);
90 p = l->l_proc;
91 fdp = p->p_fd;
92
93 if ((fp = fd_getfile(fdp, fd)) == NULL)
94 return (EBADF);
95
96 if ((fp->f_flag & FREAD) == 0)
97 return (EBADF);
98
99 FILE_USE(fp);
100
101 /* dofileread() will unuse the descriptor for us */
102 return (dofileread(p, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
103 &fp->f_offset, FOF_UPDATE_OFFSET, retval));
104 }
105
106 int
107 dofileread(struct proc *p, int fd, struct file *fp, void *buf, size_t nbyte,
108 off_t *offset, int flags, register_t *retval)
109 {
110 struct uio auio;
111 struct iovec aiov;
112 size_t cnt;
113 int error;
114 #ifdef KTRACE
115 struct iovec ktriov;
116 #endif
117 error = 0;
118
119 aiov.iov_base = (caddr_t)buf;
120 aiov.iov_len = nbyte;
121 auio.uio_iov = &aiov;
122 auio.uio_iovcnt = 1;
123 auio.uio_resid = nbyte;
124 auio.uio_rw = UIO_READ;
125 auio.uio_segflg = UIO_USERSPACE;
126 auio.uio_procp = p;
127
128 /*
129 * Reads return ssize_t because -1 is returned on error. Therefore
130 * we must restrict the length to SSIZE_MAX to avoid garbage return
131 * values.
132 */
133 if (auio.uio_resid > SSIZE_MAX) {
134 error = EINVAL;
135 goto out;
136 }
137
138 #ifdef KTRACE
139 /*
140 * if tracing, save a copy of iovec
141 */
142 if (KTRPOINT(p, KTR_GENIO))
143 ktriov = aiov;
144 #endif
145 cnt = auio.uio_resid;
146 error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred, flags);
147 if (error)
148 if (auio.uio_resid != cnt && (error == ERESTART ||
149 error == EINTR || error == EWOULDBLOCK))
150 error = 0;
151 cnt -= auio.uio_resid;
152 #ifdef KTRACE
153 if (KTRPOINT(p, KTR_GENIO) && error == 0)
154 ktrgenio(p, fd, UIO_READ, &ktriov, cnt, error);
155 #endif
156 *retval = cnt;
157 out:
158 FILE_UNUSE(fp, p);
159 return (error);
160 }
161
162 /*
163 * Scatter read system call.
164 */
165 int
166 sys_readv(struct lwp *l, void *v, register_t *retval)
167 {
168 struct sys_readv_args /* {
169 syscallarg(int) fd;
170 syscallarg(const struct iovec *) iovp;
171 syscallarg(int) iovcnt;
172 } */ *uap = v;
173 int fd;
174 struct file *fp;
175 struct proc *p;
176 struct filedesc *fdp;
177
178 fd = SCARG(uap, fd);
179 p = l->l_proc;
180 fdp = p->p_fd;
181
182 if ((fp = fd_getfile(fdp, fd)) == NULL)
183 return (EBADF);
184
185 if ((fp->f_flag & FREAD) == 0)
186 return (EBADF);
187
188 FILE_USE(fp);
189
190 /* dofilereadv() will unuse the descriptor for us */
191 return (dofilereadv(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt),
192 &fp->f_offset, FOF_UPDATE_OFFSET, retval));
193 }
194
195 int
196 dofilereadv(struct proc *p, int fd, struct file *fp, const struct iovec *iovp,
197 int iovcnt, off_t *offset, int flags, register_t *retval)
198 {
199 struct uio auio;
200 struct iovec *iov, *needfree, aiov[UIO_SMALLIOV];
201 int i, error;
202 size_t cnt;
203 u_int iovlen;
204 #ifdef KTRACE
205 struct iovec *ktriov;
206 #endif
207
208 error = 0;
209 #ifdef KTRACE
210 ktriov = NULL;
211 #endif
212 /* note: can't use iovlen until iovcnt is validated */
213 iovlen = iovcnt * sizeof(struct iovec);
214 if ((u_int)iovcnt > UIO_SMALLIOV) {
215 if ((u_int)iovcnt > IOV_MAX) {
216 error = EINVAL;
217 goto out;
218 }
219 iov = malloc(iovlen, M_IOV, M_WAITOK);
220 needfree = iov;
221 } else if ((u_int)iovcnt > 0) {
222 iov = aiov;
223 needfree = NULL;
224 } else {
225 error = EINVAL;
226 goto out;
227 }
228
229 auio.uio_iov = iov;
230 auio.uio_iovcnt = iovcnt;
231 auio.uio_rw = UIO_READ;
232 auio.uio_segflg = UIO_USERSPACE;
233 auio.uio_procp = p;
234 error = copyin(iovp, iov, iovlen);
235 if (error)
236 goto done;
237 auio.uio_resid = 0;
238 for (i = 0; i < iovcnt; i++) {
239 auio.uio_resid += iov->iov_len;
240 /*
241 * Reads return ssize_t because -1 is returned on error.
242 * Therefore we must restrict the length to SSIZE_MAX to
243 * avoid garbage return values.
244 */
245 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) {
246 error = EINVAL;
247 goto done;
248 }
249 iov++;
250 }
251 #ifdef KTRACE
252 /*
253 * if tracing, save a copy of iovec
254 */
255 if (KTRPOINT(p, KTR_GENIO)) {
256 ktriov = malloc(iovlen, M_TEMP, M_WAITOK);
257 memcpy((caddr_t)ktriov, (caddr_t)auio.uio_iov, iovlen);
258 }
259 #endif
260 cnt = auio.uio_resid;
261 error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred, flags);
262 if (error)
263 if (auio.uio_resid != cnt && (error == ERESTART ||
264 error == EINTR || error == EWOULDBLOCK))
265 error = 0;
266 cnt -= auio.uio_resid;
267 #ifdef KTRACE
268 if (ktriov != NULL) {
269 if (error == 0)
270 ktrgenio(p, fd, UIO_READ, ktriov, cnt, error);
271 free(ktriov, M_TEMP);
272 }
273 #endif
274 *retval = cnt;
275 done:
276 if (needfree)
277 free(needfree, M_IOV);
278 out:
279 FILE_UNUSE(fp, p);
280 return (error);
281 }
282
283 /*
284 * Write system call
285 */
286 int
287 sys_write(struct lwp *l, void *v, register_t *retval)
288 {
289 struct sys_write_args /* {
290 syscallarg(int) fd;
291 syscallarg(const void *) buf;
292 syscallarg(size_t) nbyte;
293 } */ *uap = v;
294 int fd;
295 struct file *fp;
296 struct proc *p;
297 struct filedesc *fdp;
298
299 fd = SCARG(uap, fd);
300 p = l->l_proc;
301 fdp = p->p_fd;
302
303 if ((fp = fd_getfile(fdp, fd)) == NULL)
304 return (EBADF);
305
306 if ((fp->f_flag & FWRITE) == 0)
307 return (EBADF);
308
309 FILE_USE(fp);
310
311 /* dofilewrite() will unuse the descriptor for us */
312 return (dofilewrite(p, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
313 &fp->f_offset, FOF_UPDATE_OFFSET, retval));
314 }
315
316 int
317 dofilewrite(struct proc *p, int fd, struct file *fp, const void *buf,
318 size_t nbyte, off_t *offset, int flags, register_t *retval)
319 {
320 struct uio auio;
321 struct iovec aiov;
322 size_t cnt;
323 int error;
324 #ifdef KTRACE
325 struct iovec ktriov;
326 #endif
327
328 error = 0;
329 aiov.iov_base = (caddr_t)buf; /* XXX kills const */
330 aiov.iov_len = nbyte;
331 auio.uio_iov = &aiov;
332 auio.uio_iovcnt = 1;
333 auio.uio_resid = nbyte;
334 auio.uio_rw = UIO_WRITE;
335 auio.uio_segflg = UIO_USERSPACE;
336 auio.uio_procp = p;
337
338 /*
339 * Writes return ssize_t because -1 is returned on error. Therefore
340 * we must restrict the length to SSIZE_MAX to avoid garbage return
341 * values.
342 */
343 if (auio.uio_resid > SSIZE_MAX) {
344 error = EINVAL;
345 goto out;
346 }
347
348 #ifdef KTRACE
349 /*
350 * if tracing, save a copy of iovec
351 */
352 if (KTRPOINT(p, KTR_GENIO))
353 ktriov = aiov;
354 #endif
355 cnt = auio.uio_resid;
356 error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred, flags);
357 if (error) {
358 if (auio.uio_resid != cnt && (error == ERESTART ||
359 error == EINTR || error == EWOULDBLOCK))
360 error = 0;
361 if (error == EPIPE)
362 psignal(p, SIGPIPE);
363 }
364 cnt -= auio.uio_resid;
365 #ifdef KTRACE
366 if (KTRPOINT(p, KTR_GENIO) && error == 0)
367 ktrgenio(p, fd, UIO_WRITE, &ktriov, cnt, error);
368 #endif
369 *retval = cnt;
370 out:
371 FILE_UNUSE(fp, p);
372 return (error);
373 }
374
375 /*
376 * Gather write system call
377 */
378 int
379 sys_writev(struct lwp *l, void *v, register_t *retval)
380 {
381 struct sys_writev_args /* {
382 syscallarg(int) fd;
383 syscallarg(const struct iovec *) iovp;
384 syscallarg(int) iovcnt;
385 } */ *uap = v;
386 int fd;
387 struct file *fp;
388 struct proc *p;
389 struct filedesc *fdp;
390
391 fd = SCARG(uap, fd);
392 p = l->l_proc;
393 fdp = p->p_fd;
394
395 if ((fp = fd_getfile(fdp, fd)) == NULL)
396 return (EBADF);
397
398 if ((fp->f_flag & FWRITE) == 0)
399 return (EBADF);
400
401 FILE_USE(fp);
402
403 /* dofilewritev() will unuse the descriptor for us */
404 return (dofilewritev(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt),
405 &fp->f_offset, FOF_UPDATE_OFFSET, retval));
406 }
407
408 int
409 dofilewritev(struct proc *p, int fd, struct file *fp, const struct iovec *iovp,
410 int iovcnt, off_t *offset, int flags, register_t *retval)
411 {
412 struct uio auio;
413 struct iovec *iov, *needfree, aiov[UIO_SMALLIOV];
414 int i, error;
415 size_t cnt;
416 u_int iovlen;
417 #ifdef KTRACE
418 struct iovec *ktriov;
419 #endif
420
421 error = 0;
422 #ifdef KTRACE
423 ktriov = NULL;
424 #endif
425 /* note: can't use iovlen until iovcnt is validated */
426 iovlen = iovcnt * sizeof(struct iovec);
427 if ((u_int)iovcnt > UIO_SMALLIOV) {
428 if ((u_int)iovcnt > IOV_MAX) {
429 error = EINVAL;
430 goto out;
431 }
432 iov = malloc(iovlen, M_IOV, M_WAITOK);
433 needfree = iov;
434 } else if ((u_int)iovcnt > 0) {
435 iov = aiov;
436 needfree = NULL;
437 } else {
438 error = EINVAL;
439 goto out;
440 }
441
442 auio.uio_iov = iov;
443 auio.uio_iovcnt = iovcnt;
444 auio.uio_rw = UIO_WRITE;
445 auio.uio_segflg = UIO_USERSPACE;
446 auio.uio_procp = p;
447 error = copyin(iovp, iov, iovlen);
448 if (error)
449 goto done;
450 auio.uio_resid = 0;
451 for (i = 0; i < iovcnt; i++) {
452 auio.uio_resid += iov->iov_len;
453 /*
454 * Writes return ssize_t because -1 is returned on error.
455 * Therefore we must restrict the length to SSIZE_MAX to
456 * avoid garbage return values.
457 */
458 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) {
459 error = EINVAL;
460 goto done;
461 }
462 iov++;
463 }
464 #ifdef KTRACE
465 /*
466 * if tracing, save a copy of iovec
467 */
468 if (KTRPOINT(p, KTR_GENIO)) {
469 ktriov = malloc(iovlen, M_TEMP, M_WAITOK);
470 memcpy((caddr_t)ktriov, (caddr_t)auio.uio_iov, iovlen);
471 }
472 #endif
473 cnt = auio.uio_resid;
474 error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred, flags);
475 if (error) {
476 if (auio.uio_resid != cnt && (error == ERESTART ||
477 error == EINTR || error == EWOULDBLOCK))
478 error = 0;
479 if (error == EPIPE)
480 psignal(p, SIGPIPE);
481 }
482 cnt -= auio.uio_resid;
483 #ifdef KTRACE
484 if (KTRPOINT(p, KTR_GENIO))
485 if (error == 0) {
486 ktrgenio(p, fd, UIO_WRITE, ktriov, cnt, error);
487 free(ktriov, M_TEMP);
488 }
489 #endif
490 *retval = cnt;
491 done:
492 if (needfree)
493 free(needfree, M_IOV);
494 out:
495 FILE_UNUSE(fp, p);
496 return (error);
497 }
498
499 /*
500 * Ioctl system call
501 */
502 /* ARGSUSED */
503 int
504 sys_ioctl(struct lwp *l, void *v, register_t *retval)
505 {
506 struct sys_ioctl_args /* {
507 syscallarg(int) fd;
508 syscallarg(u_long) com;
509 syscallarg(caddr_t) data;
510 } */ *uap = v;
511 struct file *fp;
512 struct proc *p;
513 struct filedesc *fdp;
514 u_long com;
515 int error;
516 u_int size;
517 caddr_t data, memp;
518 int tmp;
519 #define STK_PARAMS 128
520 u_long stkbuf[STK_PARAMS/sizeof(u_long)];
521
522 error = 0;
523 p = l->l_proc;
524 fdp = p->p_fd;
525
526 if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL)
527 return (EBADF);
528
529 FILE_USE(fp);
530
531 if ((fp->f_flag & (FREAD | FWRITE)) == 0) {
532 error = EBADF;
533 goto out;
534 }
535
536 switch (com = SCARG(uap, com)) {
537 case FIONCLEX:
538 fdp->fd_ofileflags[SCARG(uap, fd)] &= ~UF_EXCLOSE;
539 goto out;
540
541 case FIOCLEX:
542 fdp->fd_ofileflags[SCARG(uap, fd)] |= UF_EXCLOSE;
543 goto out;
544 }
545
546 /*
547 * Interpret high order word to find amount of data to be
548 * copied to/from the user's address space.
549 */
550 size = IOCPARM_LEN(com);
551 if (size > IOCPARM_MAX) {
552 error = ENOTTY;
553 goto out;
554 }
555 memp = NULL;
556 if (size > sizeof(stkbuf)) {
557 memp = (caddr_t)malloc((u_long)size, M_IOCTLOPS, M_WAITOK);
558 data = memp;
559 } else
560 data = (caddr_t)stkbuf;
561 if (com&IOC_IN) {
562 if (size) {
563 error = copyin(SCARG(uap, data), data, size);
564 if (error) {
565 if (memp)
566 free(memp, M_IOCTLOPS);
567 goto out;
568 }
569 } else
570 *(caddr_t *)data = SCARG(uap, data);
571 } else if ((com&IOC_OUT) && size)
572 /*
573 * Zero the buffer so the user always
574 * gets back something deterministic.
575 */
576 memset(data, 0, size);
577 else if (com&IOC_VOID)
578 *(caddr_t *)data = SCARG(uap, data);
579
580 switch (com) {
581
582 case FIONBIO:
583 if ((tmp = *(int *)data) != 0)
584 fp->f_flag |= FNONBLOCK;
585 else
586 fp->f_flag &= ~FNONBLOCK;
587 error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p);
588 break;
589
590 case FIOASYNC:
591 if ((tmp = *(int *)data) != 0)
592 fp->f_flag |= FASYNC;
593 else
594 fp->f_flag &= ~FASYNC;
595 error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, (caddr_t)&tmp, p);
596 break;
597
598 case FIOSETOWN:
599 tmp = *(int *)data;
600 if (fp->f_type == DTYPE_SOCKET) {
601 ((struct socket *)fp->f_data)->so_pgid = tmp;
602 error = 0;
603 break;
604 }
605 if (tmp <= 0) {
606 tmp = -tmp;
607 } else {
608 struct proc *p1 = pfind(tmp);
609 if (p1 == 0) {
610 error = ESRCH;
611 break;
612 }
613 tmp = p1->p_pgrp->pg_id;
614 }
615 error = (*fp->f_ops->fo_ioctl)
616 (fp, TIOCSPGRP, (caddr_t)&tmp, p);
617 break;
618
619 case FIOGETOWN:
620 if (fp->f_type == DTYPE_SOCKET) {
621 error = 0;
622 *(int *)data = ((struct socket *)fp->f_data)->so_pgid;
623 break;
624 }
625 error = (*fp->f_ops->fo_ioctl)(fp, TIOCGPGRP, data, p);
626 if (error == 0)
627 *(int *)data = -*(int *)data;
628 break;
629
630 default:
631 error = (*fp->f_ops->fo_ioctl)(fp, com, data, p);
632 /*
633 * Copy any data to user, size was
634 * already set and checked above.
635 */
636 if (error == 0 && (com&IOC_OUT) && size)
637 error = copyout(data, SCARG(uap, data), size);
638 break;
639 }
640 if (memp)
641 free(memp, M_IOCTLOPS);
642 out:
643 FILE_UNUSE(fp, p);
644 switch (error) {
645 case -1:
646 printf("sys_ioctl: _IO%s%s('%c', %lu, %lu) returned -1: "
647 "pid=%d comm=%s\n",
648 (com & IOC_IN) ? "W" : "", (com & IOC_OUT) ? "R" : "",
649 (char)IOCGROUP(com), (com & 0xff), IOCPARM_LEN(com),
650 p->p_pid, p->p_comm);
651 /* FALLTHROUGH */
652 case EPASSTHROUGH:
653 error = ENOTTY;
654 /* FALLTHROUGH */
655 default:
656 return (error);
657 }
658 }
659
660 int selwait, nselcoll;
661
662 /*
663 * Select system call.
664 */
665 int
666 sys_select(struct lwp *l, void *v, register_t *retval)
667 {
668 struct sys_select_args /* {
669 syscallarg(int) nd;
670 syscallarg(fd_set *) in;
671 syscallarg(fd_set *) ou;
672 syscallarg(fd_set *) ex;
673 syscallarg(struct timeval *) tv;
674 } */ *uap = v;
675 struct proc *p;
676 caddr_t bits;
677 char smallbits[howmany(FD_SETSIZE, NFDBITS) *
678 sizeof(fd_mask) * 6];
679 struct timeval atv;
680 int s, ncoll, error, timo;
681 size_t ni;
682
683 error = 0;
684 p = l->l_proc;
685 if (SCARG(uap, nd) < 0)
686 return (EINVAL);
687 if (SCARG(uap, nd) > p->p_fd->fd_nfiles) {
688 /* forgiving; slightly wrong */
689 SCARG(uap, nd) = p->p_fd->fd_nfiles;
690 }
691 ni = howmany(SCARG(uap, nd), NFDBITS) * sizeof(fd_mask);
692 if (ni * 6 > sizeof(smallbits))
693 bits = malloc(ni * 6, M_TEMP, M_WAITOK);
694 else
695 bits = smallbits;
696
697 #define getbits(name, x) \
698 if (SCARG(uap, name)) { \
699 error = copyin(SCARG(uap, name), bits + ni * x, ni); \
700 if (error) \
701 goto done; \
702 } else \
703 memset(bits + ni * x, 0, ni);
704 getbits(in, 0);
705 getbits(ou, 1);
706 getbits(ex, 2);
707 #undef getbits
708
709 if (SCARG(uap, tv)) {
710 error = copyin(SCARG(uap, tv), (caddr_t)&atv,
711 sizeof(atv));
712 if (error)
713 goto done;
714 if (itimerfix(&atv)) {
715 error = EINVAL;
716 goto done;
717 }
718 s = splclock();
719 timeradd(&atv, &time, &atv);
720 splx(s);
721 } else
722 timo = 0;
723 retry:
724 ncoll = nselcoll;
725 l->l_flag |= L_SELECT;
726 error = selscan(p, (fd_mask *)(bits + ni * 0),
727 (fd_mask *)(bits + ni * 3), SCARG(uap, nd), retval);
728 if (error || *retval)
729 goto done;
730 if (SCARG(uap, tv)) {
731 /*
732 * We have to recalculate the timeout on every retry.
733 */
734 timo = hzto(&atv);
735 if (timo <= 0)
736 goto done;
737 }
738 s = splsched();
739 if ((l->l_flag & L_SELECT) == 0 || nselcoll != ncoll) {
740 splx(s);
741 goto retry;
742 }
743 l->l_flag &= ~L_SELECT;
744 error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "select", timo);
745 splx(s);
746 if (error == 0)
747 goto retry;
748 done:
749 l->l_flag &= ~L_SELECT;
750 /* select is not restarted after signals... */
751 if (error == ERESTART)
752 error = EINTR;
753 if (error == EWOULDBLOCK)
754 error = 0;
755 if (error == 0) {
756
757 #define putbits(name, x) \
758 if (SCARG(uap, name)) { \
759 error = copyout(bits + ni * x, SCARG(uap, name), ni); \
760 if (error) \
761 goto out; \
762 }
763 putbits(in, 3);
764 putbits(ou, 4);
765 putbits(ex, 5);
766 #undef putbits
767 }
768 out:
769 if (ni * 6 > sizeof(smallbits))
770 free(bits, M_TEMP);
771 return (error);
772 }
773
774 int
775 selscan(struct proc *p, fd_mask *ibitp, fd_mask *obitp, int nfd,
776 register_t *retval)
777 {
778 struct filedesc *fdp;
779 int msk, i, j, fd, n;
780 fd_mask ibits, obits;
781 struct file *fp;
782 static const int flag[3] = { POLLRDNORM | POLLHUP | POLLERR,
783 POLLWRNORM | POLLHUP | POLLERR,
784 POLLRDBAND };
785
786 fdp = p->p_fd;
787 n = 0;
788 for (msk = 0; msk < 3; msk++) {
789 for (i = 0; i < nfd; i += NFDBITS) {
790 ibits = *ibitp++;
791 obits = 0;
792 while ((j = ffs(ibits)) && (fd = i + --j) < nfd) {
793 ibits &= ~(1 << j);
794 if ((fp = fd_getfile(fdp, fd)) == NULL)
795 return (EBADF);
796 FILE_USE(fp);
797 if ((*fp->f_ops->fo_poll)(fp, flag[msk], p)) {
798 obits |= (1 << j);
799 n++;
800 }
801 FILE_UNUSE(fp, p);
802 }
803 *obitp++ = obits;
804 }
805 }
806 *retval = n;
807 return (0);
808 }
809
810 /*
811 * Poll system call.
812 */
813 int
814 sys_poll(struct lwp *l, void *v, register_t *retval)
815 {
816 struct sys_poll_args /* {
817 syscallarg(struct pollfd *) fds;
818 syscallarg(u_int) nfds;
819 syscallarg(int) timeout;
820 } */ *uap = v;
821 struct proc *p;
822 caddr_t bits;
823 char smallbits[32 * sizeof(struct pollfd)];
824 struct timeval atv;
825 int s, ncoll, error, timo;
826 size_t ni;
827
828 error = 0;
829 p = l->l_proc;
830 if (SCARG(uap, nfds) > p->p_fd->fd_nfiles) {
831 /* forgiving; slightly wrong */
832 SCARG(uap, nfds) = p->p_fd->fd_nfiles;
833 }
834 ni = SCARG(uap, nfds) * sizeof(struct pollfd);
835 if (ni > sizeof(smallbits))
836 bits = malloc(ni, M_TEMP, M_WAITOK);
837 else
838 bits = smallbits;
839
840 error = copyin(SCARG(uap, fds), bits, ni);
841 if (error)
842 goto done;
843
844 if (SCARG(uap, timeout) != INFTIM) {
845 atv.tv_sec = SCARG(uap, timeout) / 1000;
846 atv.tv_usec = (SCARG(uap, timeout) % 1000) * 1000;
847 if (itimerfix(&atv)) {
848 error = EINVAL;
849 goto done;
850 }
851 s = splclock();
852 timeradd(&atv, &time, &atv);
853 splx(s);
854 } else
855 timo = 0;
856 retry:
857 ncoll = nselcoll;
858 l->l_flag |= L_SELECT;
859 error = pollscan(p, (struct pollfd *)bits, SCARG(uap, nfds), retval);
860 if (error || *retval)
861 goto done;
862 if (SCARG(uap, timeout) != INFTIM) {
863 /*
864 * We have to recalculate the timeout on every retry.
865 */
866 timo = hzto(&atv);
867 if (timo <= 0)
868 goto done;
869 }
870 s = splsched();
871 if ((l->l_flag & L_SELECT) == 0 || nselcoll != ncoll) {
872 splx(s);
873 goto retry;
874 }
875 l->l_flag &= ~L_SELECT;
876 error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "select", timo);
877 splx(s);
878 if (error == 0)
879 goto retry;
880 done:
881 l->l_flag &= ~L_SELECT;
882 /* poll is not restarted after signals... */
883 if (error == ERESTART)
884 error = EINTR;
885 if (error == EWOULDBLOCK)
886 error = 0;
887 if (error == 0) {
888 error = copyout(bits, SCARG(uap, fds), ni);
889 if (error)
890 goto out;
891 }
892 out:
893 if (ni > sizeof(smallbits))
894 free(bits, M_TEMP);
895 return (error);
896 }
897
898 int
899 pollscan(struct proc *p, struct pollfd *fds, int nfd, register_t *retval)
900 {
901 struct filedesc *fdp;
902 int i, n;
903 struct file *fp;
904
905 fdp = p->p_fd;
906 n = 0;
907 for (i = 0; i < nfd; i++, fds++) {
908 if (fds->fd >= fdp->fd_nfiles) {
909 fds->revents = POLLNVAL;
910 n++;
911 } else if (fds->fd < 0) {
912 fds->revents = 0;
913 } else {
914 if ((fp = fd_getfile(fdp, fds->fd)) == NULL) {
915 fds->revents = POLLNVAL;
916 n++;
917 } else {
918 FILE_USE(fp);
919 fds->revents = (*fp->f_ops->fo_poll)(fp,
920 fds->events | POLLERR | POLLHUP, p);
921 if (fds->revents != 0)
922 n++;
923 FILE_UNUSE(fp, p);
924 }
925 }
926 }
927 *retval = n;
928 return (0);
929 }
930
931 /*ARGSUSED*/
932 int
933 seltrue(dev_t dev, int events, struct proc *p)
934 {
935
936 return (events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM));
937 }
938
939 /*
940 * Record a select request.
941 */
942 void
943 selrecord(struct proc *selector, struct selinfo *sip)
944 {
945 struct lwp *l;
946 struct proc *p;
947 pid_t mypid;
948 int collision;
949
950 mypid = selector->p_pid;
951 if (sip->si_pid == mypid)
952 return;
953
954 collision = 0;
955 if (sip->si_pid && (p = pfind(sip->si_pid))) {
956 for (l = LIST_FIRST(&p->p_lwps); l != NULL;
957 l = LIST_NEXT(l, l_sibling)) {
958 if (l->l_wchan == (caddr_t)&selwait) {
959 collision = 1;
960 sip->si_flags |= SI_COLL;
961 }
962 }
963 }
964
965 if (collision == 0) {
966 sip->si_flags &= ~SI_COLL;
967 sip->si_pid = mypid;
968 }
969 }
970
971 /*
972 * Do a wakeup when a selectable event occurs.
973 */
974 void
975 selwakeup(sip)
976 struct selinfo *sip;
977 {
978 struct lwp *l;
979 struct proc *p;
980 int s;
981
982 if (sip->si_pid == 0)
983 return;
984 if (sip->si_flags & SI_COLL) {
985 nselcoll++;
986 sip->si_flags &= ~SI_COLL;
987 wakeup((caddr_t)&selwait);
988 }
989 p = pfind(sip->si_pid);
990 sip->si_pid = 0;
991 if (p != NULL) {
992 for (l = LIST_FIRST(&p->p_lwps); l != NULL;
993 l = LIST_NEXT(l, l_sibling)) {
994 SCHED_LOCK(s);
995 if (l->l_wchan == (caddr_t)&selwait) {
996 if (l->l_stat == LSSLEEP)
997 setrunnable(l);
998 else
999 unsleep(l);
1000 } else if (l->l_flag & L_SELECT)
1001 l->l_flag &= ~L_SELECT;
1002 SCHED_UNLOCK(s);
1003 }
1004 }
1005 }
1006