sys_generic.c revision 1.61 1 /* $NetBSD: sys_generic.c,v 1.61 2002/03/17 19:41:07 atatat Exp $ */
2
3 /*
4 * Copyright (c) 1982, 1986, 1989, 1993
5 * The Regents of the University of California. All rights reserved.
6 * (c) UNIX System Laboratories, Inc.
7 * All or some portions of this file are derived from material licensed
8 * to the University of California by American Telephone and Telegraph
9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
10 * the permission of UNIX System Laboratories, Inc.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. All advertising materials mentioning features or use of this software
21 * must display the following acknowledgement:
22 * This product includes software developed by the University of
23 * California, Berkeley and its contributors.
24 * 4. Neither the name of the University nor the names of its contributors
25 * may be used to endorse or promote products derived from this software
26 * without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
29 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
32 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
33 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
34 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
35 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
37 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38 * SUCH DAMAGE.
39 *
40 * @(#)sys_generic.c 8.9 (Berkeley) 2/14/95
41 */
42
43 #include <sys/cdefs.h>
44 __KERNEL_RCSID(0, "$NetBSD: sys_generic.c,v 1.61 2002/03/17 19:41:07 atatat Exp $");
45
46 #include "opt_ktrace.h"
47
48 #include <sys/param.h>
49 #include <sys/systm.h>
50 #include <sys/filedesc.h>
51 #include <sys/ioctl.h>
52 #include <sys/file.h>
53 #include <sys/proc.h>
54 #include <sys/socketvar.h>
55 #include <sys/signalvar.h>
56 #include <sys/uio.h>
57 #include <sys/kernel.h>
58 #include <sys/stat.h>
59 #include <sys/malloc.h>
60 #include <sys/poll.h>
61 #ifdef KTRACE
62 #include <sys/ktrace.h>
63 #endif
64
65 #include <sys/mount.h>
66 #include <sys/syscallargs.h>
67
68 int selscan __P((struct proc *, fd_mask *, fd_mask *, int, register_t *));
69 int pollscan __P((struct proc *, struct pollfd *, int, register_t *));
70
71 /*
72 * Read system call.
73 */
74 /* ARGSUSED */
75 int
76 sys_read(struct proc *p, void *v, register_t *retval)
77 {
78 struct sys_read_args /* {
79 syscallarg(int) fd;
80 syscallarg(void *) buf;
81 syscallarg(size_t) nbyte;
82 } */ *uap = v;
83 int fd;
84 struct file *fp;
85 struct filedesc *fdp;
86
87 fd = SCARG(uap, fd);
88 fdp = p->p_fd;
89
90 if ((fp = fd_getfile(fdp, fd)) == NULL)
91 return (EBADF);
92
93 if ((fp->f_flag & FREAD) == 0)
94 return (EBADF);
95
96 FILE_USE(fp);
97
98 /* dofileread() will unuse the descriptor for us */
99 return (dofileread(p, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
100 &fp->f_offset, FOF_UPDATE_OFFSET, retval));
101 }
102
103 int
104 dofileread(struct proc *p, int fd, struct file *fp, void *buf, size_t nbyte,
105 off_t *offset, int flags, register_t *retval)
106 {
107 struct uio auio;
108 struct iovec aiov;
109 long cnt, error;
110 #ifdef KTRACE
111 struct iovec ktriov;
112 #endif
113 error = 0;
114
115 aiov.iov_base = (caddr_t)buf;
116 aiov.iov_len = nbyte;
117 auio.uio_iov = &aiov;
118 auio.uio_iovcnt = 1;
119 auio.uio_resid = nbyte;
120 auio.uio_rw = UIO_READ;
121 auio.uio_segflg = UIO_USERSPACE;
122 auio.uio_procp = p;
123
124 /*
125 * Reads return ssize_t because -1 is returned on error. Therefore
126 * we must restrict the length to SSIZE_MAX to avoid garbage return
127 * values.
128 */
129 if (auio.uio_resid > SSIZE_MAX) {
130 error = EINVAL;
131 goto out;
132 }
133
134 #ifdef KTRACE
135 /*
136 * if tracing, save a copy of iovec
137 */
138 if (KTRPOINT(p, KTR_GENIO))
139 ktriov = aiov;
140 #endif
141 cnt = auio.uio_resid;
142 error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred, flags);
143 if (error)
144 if (auio.uio_resid != cnt && (error == ERESTART ||
145 error == EINTR || error == EWOULDBLOCK))
146 error = 0;
147 cnt -= auio.uio_resid;
148 #ifdef KTRACE
149 if (KTRPOINT(p, KTR_GENIO) && error == 0)
150 ktrgenio(p, fd, UIO_READ, &ktriov, cnt, error);
151 #endif
152 *retval = cnt;
153 out:
154 FILE_UNUSE(fp, p);
155 return (error);
156 }
157
158 /*
159 * Scatter read system call.
160 */
161 int
162 sys_readv(struct proc *p, void *v, register_t *retval)
163 {
164 struct sys_readv_args /* {
165 syscallarg(int) fd;
166 syscallarg(const struct iovec *) iovp;
167 syscallarg(int) iovcnt;
168 } */ *uap = v;
169 int fd;
170 struct file *fp;
171 struct filedesc *fdp;
172
173 fd = SCARG(uap, fd);
174 fdp = p->p_fd;
175
176 if ((fp = fd_getfile(fdp, fd)) == NULL)
177 return (EBADF);
178
179 if ((fp->f_flag & FREAD) == 0)
180 return (EBADF);
181
182 FILE_USE(fp);
183
184 /* dofilereadv() will unuse the descriptor for us */
185 return (dofilereadv(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt),
186 &fp->f_offset, FOF_UPDATE_OFFSET, retval));
187 }
188
189 int
190 dofilereadv(struct proc *p, int fd, struct file *fp, const struct iovec *iovp,
191 int iovcnt, off_t *offset, int flags, register_t *retval)
192 {
193 struct uio auio;
194 struct iovec *iov, *needfree, aiov[UIO_SMALLIOV];
195 long i, cnt, error;
196 u_int iovlen;
197 #ifdef KTRACE
198 struct iovec *ktriov;
199 #endif
200
201 error = 0;
202 #ifdef KTRACE
203 ktriov = NULL;
204 #endif
205 /* note: can't use iovlen until iovcnt is validated */
206 iovlen = iovcnt * sizeof(struct iovec);
207 if ((u_int)iovcnt > UIO_SMALLIOV) {
208 if ((u_int)iovcnt > IOV_MAX) {
209 error = EINVAL;
210 goto out;
211 }
212 iov = malloc(iovlen, M_IOV, M_WAITOK);
213 needfree = iov;
214 } else if ((u_int)iovcnt > 0) {
215 iov = aiov;
216 needfree = NULL;
217 } else {
218 error = EINVAL;
219 goto out;
220 }
221
222 auio.uio_iov = iov;
223 auio.uio_iovcnt = iovcnt;
224 auio.uio_rw = UIO_READ;
225 auio.uio_segflg = UIO_USERSPACE;
226 auio.uio_procp = p;
227 error = copyin(iovp, iov, iovlen);
228 if (error)
229 goto done;
230 auio.uio_resid = 0;
231 for (i = 0; i < iovcnt; i++) {
232 auio.uio_resid += iov->iov_len;
233 /*
234 * Reads return ssize_t because -1 is returned on error.
235 * Therefore we must restrict the length to SSIZE_MAX to
236 * avoid garbage return values.
237 */
238 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) {
239 error = EINVAL;
240 goto done;
241 }
242 iov++;
243 }
244 #ifdef KTRACE
245 /*
246 * if tracing, save a copy of iovec
247 */
248 if (KTRPOINT(p, KTR_GENIO)) {
249 ktriov = malloc(iovlen, M_TEMP, M_WAITOK);
250 memcpy((caddr_t)ktriov, (caddr_t)auio.uio_iov, iovlen);
251 }
252 #endif
253 cnt = auio.uio_resid;
254 error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred, flags);
255 if (error)
256 if (auio.uio_resid != cnt && (error == ERESTART ||
257 error == EINTR || error == EWOULDBLOCK))
258 error = 0;
259 cnt -= auio.uio_resid;
260 #ifdef KTRACE
261 if (ktriov != NULL) {
262 if (error == 0)
263 ktrgenio(p, fd, UIO_READ, ktriov, cnt, error);
264 free(ktriov, M_TEMP);
265 }
266 #endif
267 *retval = cnt;
268 done:
269 if (needfree)
270 free(needfree, M_IOV);
271 out:
272 FILE_UNUSE(fp, p);
273 return (error);
274 }
275
276 /*
277 * Write system call
278 */
279 int
280 sys_write(struct proc *p, void *v, register_t *retval)
281 {
282 struct sys_write_args /* {
283 syscallarg(int) fd;
284 syscallarg(const void *) buf;
285 syscallarg(size_t) nbyte;
286 } */ *uap = v;
287 int fd;
288 struct file *fp;
289 struct filedesc *fdp;
290
291 fd = SCARG(uap, fd);
292 fdp = p->p_fd;
293
294 if ((fp = fd_getfile(fdp, fd)) == NULL)
295 return (EBADF);
296
297 if ((fp->f_flag & FWRITE) == 0)
298 return (EBADF);
299
300 FILE_USE(fp);
301
302 /* dofilewrite() will unuse the descriptor for us */
303 return (dofilewrite(p, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
304 &fp->f_offset, FOF_UPDATE_OFFSET, retval));
305 }
306
307 int
308 dofilewrite(struct proc *p, int fd, struct file *fp, const void *buf,
309 size_t nbyte, off_t *offset, int flags, register_t *retval)
310 {
311 struct uio auio;
312 struct iovec aiov;
313 long cnt, error;
314 #ifdef KTRACE
315 struct iovec ktriov;
316 #endif
317
318 error = 0;
319 aiov.iov_base = (caddr_t)buf; /* XXX kills const */
320 aiov.iov_len = nbyte;
321 auio.uio_iov = &aiov;
322 auio.uio_iovcnt = 1;
323 auio.uio_resid = nbyte;
324 auio.uio_rw = UIO_WRITE;
325 auio.uio_segflg = UIO_USERSPACE;
326 auio.uio_procp = p;
327
328 /*
329 * Writes return ssize_t because -1 is returned on error. Therefore
330 * we must restrict the length to SSIZE_MAX to avoid garbage return
331 * values.
332 */
333 if (auio.uio_resid > SSIZE_MAX) {
334 error = EINVAL;
335 goto out;
336 }
337
338 #ifdef KTRACE
339 /*
340 * if tracing, save a copy of iovec
341 */
342 if (KTRPOINT(p, KTR_GENIO))
343 ktriov = aiov;
344 #endif
345 cnt = auio.uio_resid;
346 error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred, flags);
347 if (error) {
348 if (auio.uio_resid != cnt && (error == ERESTART ||
349 error == EINTR || error == EWOULDBLOCK))
350 error = 0;
351 if (error == EPIPE)
352 psignal(p, SIGPIPE);
353 }
354 cnt -= auio.uio_resid;
355 #ifdef KTRACE
356 if (KTRPOINT(p, KTR_GENIO) && error == 0)
357 ktrgenio(p, fd, UIO_WRITE, &ktriov, cnt, error);
358 #endif
359 *retval = cnt;
360 out:
361 FILE_UNUSE(fp, p);
362 return (error);
363 }
364
365 /*
366 * Gather write system call
367 */
368 int
369 sys_writev(struct proc *p, void *v, register_t *retval)
370 {
371 struct sys_writev_args /* {
372 syscallarg(int) fd;
373 syscallarg(const struct iovec *) iovp;
374 syscallarg(int) iovcnt;
375 } */ *uap = v;
376 int fd;
377 struct file *fp;
378 struct filedesc *fdp;
379
380 fd = SCARG(uap, fd);
381 fdp = p->p_fd;
382
383 if ((fp = fd_getfile(fdp, fd)) == NULL)
384 return (EBADF);
385
386 if ((fp->f_flag & FWRITE) == 0)
387 return (EBADF);
388
389 FILE_USE(fp);
390
391 /* dofilewritev() will unuse the descriptor for us */
392 return (dofilewritev(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt),
393 &fp->f_offset, FOF_UPDATE_OFFSET, retval));
394 }
395
396 int
397 dofilewritev(struct proc *p, int fd, struct file *fp, const struct iovec *iovp,
398 int iovcnt, off_t *offset, int flags, register_t *retval)
399 {
400 struct uio auio;
401 struct iovec *iov, *needfree, aiov[UIO_SMALLIOV];
402 long i, cnt, error;
403 u_int iovlen;
404 #ifdef KTRACE
405 struct iovec *ktriov;
406 #endif
407
408 error = 0;
409 #ifdef KTRACE
410 ktriov = NULL;
411 #endif
412 /* note: can't use iovlen until iovcnt is validated */
413 iovlen = iovcnt * sizeof(struct iovec);
414 if ((u_int)iovcnt > UIO_SMALLIOV) {
415 if ((u_int)iovcnt > IOV_MAX)
416 return (EINVAL);
417 iov = malloc(iovlen, M_IOV, M_WAITOK);
418 needfree = iov;
419 } else if ((u_int)iovcnt > 0) {
420 iov = aiov;
421 needfree = NULL;
422 } else {
423 error = EINVAL;
424 goto out;
425 }
426
427 auio.uio_iov = iov;
428 auio.uio_iovcnt = iovcnt;
429 auio.uio_rw = UIO_WRITE;
430 auio.uio_segflg = UIO_USERSPACE;
431 auio.uio_procp = p;
432 error = copyin(iovp, iov, iovlen);
433 if (error)
434 goto done;
435 auio.uio_resid = 0;
436 for (i = 0; i < iovcnt; i++) {
437 auio.uio_resid += iov->iov_len;
438 /*
439 * Writes return ssize_t because -1 is returned on error.
440 * Therefore we must restrict the length to SSIZE_MAX to
441 * avoid garbage return values.
442 */
443 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) {
444 error = EINVAL;
445 goto done;
446 }
447 iov++;
448 }
449 #ifdef KTRACE
450 /*
451 * if tracing, save a copy of iovec
452 */
453 if (KTRPOINT(p, KTR_GENIO)) {
454 ktriov = malloc(iovlen, M_TEMP, M_WAITOK);
455 memcpy((caddr_t)ktriov, (caddr_t)auio.uio_iov, iovlen);
456 }
457 #endif
458 cnt = auio.uio_resid;
459 error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred, flags);
460 if (error) {
461 if (auio.uio_resid != cnt && (error == ERESTART ||
462 error == EINTR || error == EWOULDBLOCK))
463 error = 0;
464 if (error == EPIPE)
465 psignal(p, SIGPIPE);
466 }
467 cnt -= auio.uio_resid;
468 #ifdef KTRACE
469 if (KTRPOINT(p, KTR_GENIO))
470 if (error == 0) {
471 ktrgenio(p, fd, UIO_WRITE, ktriov, cnt, error);
472 free(ktriov, M_TEMP);
473 }
474 #endif
475 *retval = cnt;
476 done:
477 if (needfree)
478 free(needfree, M_IOV);
479 out:
480 FILE_UNUSE(fp, p);
481 return (error);
482 }
483
484 /*
485 * Ioctl system call
486 */
487 /* ARGSUSED */
488 int
489 sys_ioctl(struct proc *p, void *v, register_t *retval)
490 {
491 struct sys_ioctl_args /* {
492 syscallarg(int) fd;
493 syscallarg(u_long) com;
494 syscallarg(caddr_t) data;
495 } */ *uap = v;
496 struct file *fp;
497 struct filedesc *fdp;
498 u_long com;
499 int error;
500 u_int size;
501 caddr_t data, memp;
502 int tmp;
503 #define STK_PARAMS 128
504 u_long stkbuf[STK_PARAMS/sizeof(u_long)];
505
506 error = 0;
507 fdp = p->p_fd;
508
509 if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL)
510 return (EBADF);
511
512 FILE_USE(fp);
513
514 if ((fp->f_flag & (FREAD | FWRITE)) == 0) {
515 error = EBADF;
516 goto out;
517 }
518
519 switch (com = SCARG(uap, com)) {
520 case FIONCLEX:
521 fdp->fd_ofileflags[SCARG(uap, fd)] &= ~UF_EXCLOSE;
522 goto out;
523
524 case FIOCLEX:
525 fdp->fd_ofileflags[SCARG(uap, fd)] |= UF_EXCLOSE;
526 goto out;
527 }
528
529 /*
530 * Interpret high order word to find amount of data to be
531 * copied to/from the user's address space.
532 */
533 size = IOCPARM_LEN(com);
534 if (size > IOCPARM_MAX) {
535 error = ENOTTY;
536 goto out;
537 }
538 memp = NULL;
539 if (size > sizeof(stkbuf)) {
540 memp = (caddr_t)malloc((u_long)size, M_IOCTLOPS, M_WAITOK);
541 data = memp;
542 } else
543 data = (caddr_t)stkbuf;
544 if (com&IOC_IN) {
545 if (size) {
546 error = copyin(SCARG(uap, data), data, size);
547 if (error) {
548 if (memp)
549 free(memp, M_IOCTLOPS);
550 goto out;
551 }
552 } else
553 *(caddr_t *)data = SCARG(uap, data);
554 } else if ((com&IOC_OUT) && size)
555 /*
556 * Zero the buffer so the user always
557 * gets back something deterministic.
558 */
559 memset(data, 0, size);
560 else if (com&IOC_VOID)
561 *(caddr_t *)data = SCARG(uap, data);
562
563 switch (com) {
564
565 case FIONBIO:
566 if ((tmp = *(int *)data) != 0)
567 fp->f_flag |= FNONBLOCK;
568 else
569 fp->f_flag &= ~FNONBLOCK;
570 error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p);
571 break;
572
573 case FIOASYNC:
574 if ((tmp = *(int *)data) != 0)
575 fp->f_flag |= FASYNC;
576 else
577 fp->f_flag &= ~FASYNC;
578 error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, (caddr_t)&tmp, p);
579 break;
580
581 case FIOSETOWN:
582 tmp = *(int *)data;
583 if (fp->f_type == DTYPE_SOCKET) {
584 ((struct socket *)fp->f_data)->so_pgid = tmp;
585 error = 0;
586 break;
587 }
588 if (tmp <= 0) {
589 tmp = -tmp;
590 } else {
591 struct proc *p1 = pfind(tmp);
592 if (p1 == 0) {
593 error = ESRCH;
594 break;
595 }
596 tmp = p1->p_pgrp->pg_id;
597 }
598 error = (*fp->f_ops->fo_ioctl)
599 (fp, TIOCSPGRP, (caddr_t)&tmp, p);
600 break;
601
602 case FIOGETOWN:
603 if (fp->f_type == DTYPE_SOCKET) {
604 error = 0;
605 *(int *)data = ((struct socket *)fp->f_data)->so_pgid;
606 break;
607 }
608 error = (*fp->f_ops->fo_ioctl)(fp, TIOCGPGRP, data, p);
609 if (error == 0)
610 *(int *)data = -*(int *)data;
611 break;
612
613 default:
614 error = (*fp->f_ops->fo_ioctl)(fp, com, data, p);
615 /*
616 * Copy any data to user, size was
617 * already set and checked above.
618 */
619 if (error == 0 && (com&IOC_OUT) && size)
620 error = copyout(data, SCARG(uap, data), size);
621 break;
622 }
623 if (memp)
624 free(memp, M_IOCTLOPS);
625 out:
626 FILE_UNUSE(fp, p);
627 switch (error) {
628 case -1:
629 printf("sys_ioctl: _IO%s%s('%c', %lu, %lu) returned -1: "
630 "pid=%d comm=%s\n",
631 (com & IOC_IN) ? "W" : "", (com & IOC_OUT) ? "R" : "",
632 (char)IOCGROUP(com), (com & 0xff), IOCPARM_LEN(com),
633 p->p_pid, p->p_comm);
634 /* FALLTHROUGH */
635 case EPASSTHROUGH:
636 error = ENOTTY;
637 /* FALLTHROUGH */
638 default:
639 return (error);
640 }
641 }
642
643 int selwait, nselcoll;
644
645 /*
646 * Select system call.
647 */
648 int
649 sys_select(struct proc *p, void *v, register_t *retval)
650 {
651 struct sys_select_args /* {
652 syscallarg(int) nd;
653 syscallarg(fd_set *) in;
654 syscallarg(fd_set *) ou;
655 syscallarg(fd_set *) ex;
656 syscallarg(struct timeval *) tv;
657 } */ *uap = v;
658 caddr_t bits;
659 char smallbits[howmany(FD_SETSIZE, NFDBITS) *
660 sizeof(fd_mask) * 6];
661 struct timeval atv;
662 int s, ncoll, error, timo;
663 size_t ni;
664
665 error = 0;
666 if (SCARG(uap, nd) < 0)
667 return (EINVAL);
668 if (SCARG(uap, nd) > p->p_fd->fd_nfiles) {
669 /* forgiving; slightly wrong */
670 SCARG(uap, nd) = p->p_fd->fd_nfiles;
671 }
672 ni = howmany(SCARG(uap, nd), NFDBITS) * sizeof(fd_mask);
673 if (ni * 6 > sizeof(smallbits))
674 bits = malloc(ni * 6, M_TEMP, M_WAITOK);
675 else
676 bits = smallbits;
677
678 #define getbits(name, x) \
679 if (SCARG(uap, name)) { \
680 error = copyin(SCARG(uap, name), bits + ni * x, ni); \
681 if (error) \
682 goto done; \
683 } else \
684 memset(bits + ni * x, 0, ni);
685 getbits(in, 0);
686 getbits(ou, 1);
687 getbits(ex, 2);
688 #undef getbits
689
690 if (SCARG(uap, tv)) {
691 error = copyin(SCARG(uap, tv), (caddr_t)&atv,
692 sizeof(atv));
693 if (error)
694 goto done;
695 if (itimerfix(&atv)) {
696 error = EINVAL;
697 goto done;
698 }
699 s = splclock();
700 timeradd(&atv, &time, &atv);
701 splx(s);
702 } else
703 timo = 0;
704 retry:
705 ncoll = nselcoll;
706 p->p_flag |= P_SELECT;
707 error = selscan(p, (fd_mask *)(bits + ni * 0),
708 (fd_mask *)(bits + ni * 3), SCARG(uap, nd), retval);
709 if (error || *retval)
710 goto done;
711 if (SCARG(uap, tv)) {
712 /*
713 * We have to recalculate the timeout on every retry.
714 */
715 timo = hzto(&atv);
716 if (timo <= 0)
717 goto done;
718 }
719 s = splsched();
720 if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) {
721 splx(s);
722 goto retry;
723 }
724 p->p_flag &= ~P_SELECT;
725 error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "select", timo);
726 splx(s);
727 if (error == 0)
728 goto retry;
729 done:
730 p->p_flag &= ~P_SELECT;
731 /* select is not restarted after signals... */
732 if (error == ERESTART)
733 error = EINTR;
734 if (error == EWOULDBLOCK)
735 error = 0;
736 if (error == 0) {
737
738 #define putbits(name, x) \
739 if (SCARG(uap, name)) { \
740 error = copyout(bits + ni * x, SCARG(uap, name), ni); \
741 if (error) \
742 goto out; \
743 }
744 putbits(in, 3);
745 putbits(ou, 4);
746 putbits(ex, 5);
747 #undef putbits
748 }
749 out:
750 if (ni * 6 > sizeof(smallbits))
751 free(bits, M_TEMP);
752 return (error);
753 }
754
755 int
756 selscan(struct proc *p, fd_mask *ibitp, fd_mask *obitp, int nfd,
757 register_t *retval)
758 {
759 struct filedesc *fdp;
760 int msk, i, j, fd, n;
761 fd_mask ibits, obits;
762 struct file *fp;
763 static int flag[3] = { POLLRDNORM | POLLHUP | POLLERR,
764 POLLWRNORM | POLLHUP | POLLERR,
765 POLLRDBAND };
766
767 fdp = p->p_fd;
768 n = 0;
769 for (msk = 0; msk < 3; msk++) {
770 for (i = 0; i < nfd; i += NFDBITS) {
771 ibits = *ibitp++;
772 obits = 0;
773 while ((j = ffs(ibits)) && (fd = i + --j) < nfd) {
774 ibits &= ~(1 << j);
775 if ((fp = fd_getfile(fdp, fd)) == NULL)
776 return (EBADF);
777 FILE_USE(fp);
778 if ((*fp->f_ops->fo_poll)(fp, flag[msk], p)) {
779 obits |= (1 << j);
780 n++;
781 }
782 FILE_UNUSE(fp, p);
783 }
784 *obitp++ = obits;
785 }
786 }
787 *retval = n;
788 return (0);
789 }
790
791 /*
792 * Poll system call.
793 */
794 int
795 sys_poll(struct proc *p, void *v, register_t *retval)
796 {
797 struct sys_poll_args /* {
798 syscallarg(struct pollfd *) fds;
799 syscallarg(u_int) nfds;
800 syscallarg(int) timeout;
801 } */ *uap = v;
802 caddr_t bits;
803 char smallbits[32 * sizeof(struct pollfd)];
804 struct timeval atv;
805 int s, ncoll, error, timo;
806 size_t ni;
807
808 error = 0;
809 if (SCARG(uap, nfds) > p->p_fd->fd_nfiles) {
810 /* forgiving; slightly wrong */
811 SCARG(uap, nfds) = p->p_fd->fd_nfiles;
812 }
813 ni = SCARG(uap, nfds) * sizeof(struct pollfd);
814 if (ni > sizeof(smallbits))
815 bits = malloc(ni, M_TEMP, M_WAITOK);
816 else
817 bits = smallbits;
818
819 error = copyin(SCARG(uap, fds), bits, ni);
820 if (error)
821 goto done;
822
823 if (SCARG(uap, timeout) != INFTIM) {
824 atv.tv_sec = SCARG(uap, timeout) / 1000;
825 atv.tv_usec = (SCARG(uap, timeout) % 1000) * 1000;
826 if (itimerfix(&atv)) {
827 error = EINVAL;
828 goto done;
829 }
830 s = splclock();
831 timeradd(&atv, &time, &atv);
832 splx(s);
833 } else
834 timo = 0;
835 retry:
836 ncoll = nselcoll;
837 p->p_flag |= P_SELECT;
838 error = pollscan(p, (struct pollfd *)bits, SCARG(uap, nfds), retval);
839 if (error || *retval)
840 goto done;
841 if (SCARG(uap, timeout) != INFTIM) {
842 /*
843 * We have to recalculate the timeout on every retry.
844 */
845 timo = hzto(&atv);
846 if (timo <= 0)
847 goto done;
848 }
849 s = splsched();
850 if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) {
851 splx(s);
852 goto retry;
853 }
854 p->p_flag &= ~P_SELECT;
855 error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "select", timo);
856 splx(s);
857 if (error == 0)
858 goto retry;
859 done:
860 p->p_flag &= ~P_SELECT;
861 /* poll is not restarted after signals... */
862 if (error == ERESTART)
863 error = EINTR;
864 if (error == EWOULDBLOCK)
865 error = 0;
866 if (error == 0) {
867 error = copyout(bits, SCARG(uap, fds), ni);
868 if (error)
869 goto out;
870 }
871 out:
872 if (ni > sizeof(smallbits))
873 free(bits, M_TEMP);
874 return (error);
875 }
876
877 int
878 pollscan(struct proc *p, struct pollfd *fds, int nfd, register_t *retval)
879 {
880 struct filedesc *fdp;
881 int i, n;
882 struct file *fp;
883
884 fdp = p->p_fd;
885 n = 0;
886 for (i = 0; i < nfd; i++, fds++) {
887 if (fds->fd >= fdp->fd_nfiles) {
888 fds->revents = POLLNVAL;
889 n++;
890 } else if (fds->fd < 0) {
891 fds->revents = 0;
892 } else {
893 if ((fp = fd_getfile(fdp, fds->fd)) == NULL) {
894 fds->revents = POLLNVAL;
895 n++;
896 } else {
897 FILE_USE(fp);
898 fds->revents = (*fp->f_ops->fo_poll)(fp,
899 fds->events | POLLERR | POLLHUP, p);
900 if (fds->revents != 0)
901 n++;
902 FILE_UNUSE(fp, p);
903 }
904 }
905 }
906 *retval = n;
907 return (0);
908 }
909
910 /*ARGSUSED*/
911 int
912 seltrue(dev_t dev, int events, struct proc *p)
913 {
914
915 return (events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM));
916 }
917
918 /*
919 * Record a select request.
920 */
921 void
922 selrecord(struct proc *selector, struct selinfo *sip)
923 {
924 struct proc *p;
925 pid_t mypid;
926
927 mypid = selector->p_pid;
928 if (sip->si_pid == mypid)
929 return;
930 if (sip->si_pid && (p = pfind(sip->si_pid)) &&
931 p->p_wchan == (caddr_t)&selwait)
932 sip->si_flags |= SI_COLL;
933 else {
934 sip->si_flags &= ~SI_COLL;
935 sip->si_pid = mypid;
936 }
937 }
938
939 /*
940 * Do a wakeup when a selectable event occurs.
941 */
942 void
943 selwakeup(sip)
944 struct selinfo *sip;
945 {
946 struct proc *p;
947 int s;
948
949 if (sip->si_pid == 0)
950 return;
951 if (sip->si_flags & SI_COLL) {
952 nselcoll++;
953 sip->si_flags &= ~SI_COLL;
954 wakeup((caddr_t)&selwait);
955 }
956 p = pfind(sip->si_pid);
957 sip->si_pid = 0;
958 if (p != NULL) {
959 SCHED_LOCK(s);
960 if (p->p_wchan == (caddr_t)&selwait) {
961 if (p->p_stat == SSLEEP)
962 setrunnable(p);
963 else
964 unsleep(p);
965 } else if (p->p_flag & P_SELECT)
966 p->p_flag &= ~P_SELECT;
967 SCHED_UNLOCK(s);
968 }
969 }
970