sys_generic.c revision 1.53 1 /* $NetBSD: sys_generic.c,v 1.53 2001/02/26 22:28:23 lukem Exp $ */
2
3 /*
4 * Copyright (c) 1982, 1986, 1989, 1993
5 * The Regents of the University of California. All rights reserved.
6 * (c) UNIX System Laboratories, Inc.
7 * All or some portions of this file are derived from material licensed
8 * to the University of California by American Telephone and Telegraph
9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
10 * the permission of UNIX System Laboratories, Inc.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. All advertising materials mentioning features or use of this software
21 * must display the following acknowledgement:
22 * This product includes software developed by the University of
23 * California, Berkeley and its contributors.
24 * 4. Neither the name of the University nor the names of its contributors
25 * may be used to endorse or promote products derived from this software
26 * without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
29 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
32 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
33 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
34 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
35 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
37 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38 * SUCH DAMAGE.
39 *
40 * @(#)sys_generic.c 8.9 (Berkeley) 2/14/95
41 */
42
43 #include "opt_ktrace.h"
44
45 #include <sys/param.h>
46 #include <sys/systm.h>
47 #include <sys/filedesc.h>
48 #include <sys/ioctl.h>
49 #include <sys/file.h>
50 #include <sys/proc.h>
51 #include <sys/socketvar.h>
52 #include <sys/signalvar.h>
53 #include <sys/uio.h>
54 #include <sys/kernel.h>
55 #include <sys/stat.h>
56 #include <sys/malloc.h>
57 #include <sys/poll.h>
58 #ifdef KTRACE
59 #include <sys/ktrace.h>
60 #endif
61
62 #include <sys/mount.h>
63 #include <sys/syscallargs.h>
64
65 int selscan __P((struct proc *, fd_mask *, fd_mask *, int, register_t *));
66 int pollscan __P((struct proc *, struct pollfd *, int, register_t *));
67
68 /*
69 * Read system call.
70 */
71 /* ARGSUSED */
72 int
73 sys_read(struct proc *p, void *v, register_t *retval)
74 {
75 struct sys_read_args /* {
76 syscallarg(int) fd;
77 syscallarg(void *) buf;
78 syscallarg(size_t) nbyte;
79 } */ *uap = v;
80 int fd;
81 struct file *fp;
82 struct filedesc *fdp;
83
84 fd = SCARG(uap, fd);
85 fdp = p->p_fd;
86 if ((u_int)fd >= fdp->fd_nfiles ||
87 (fp = fdp->fd_ofiles[fd]) == NULL ||
88 (fp->f_iflags & FIF_WANTCLOSE) != 0 ||
89 (fp->f_flag & FREAD) == 0)
90 return (EBADF);
91
92 FILE_USE(fp);
93
94 /* dofileread() will unuse the descriptor for us */
95 return (dofileread(p, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
96 &fp->f_offset, FOF_UPDATE_OFFSET, retval));
97 }
98
99 int
100 dofileread(struct proc *p, int fd, struct file *fp, void *buf, size_t nbyte,
101 off_t *offset, int flags, register_t *retval)
102 {
103 struct uio auio;
104 struct iovec aiov;
105 long cnt, error;
106 #ifdef KTRACE
107 struct iovec ktriov;
108 #endif
109 error = 0;
110
111 aiov.iov_base = (caddr_t)buf;
112 aiov.iov_len = nbyte;
113 auio.uio_iov = &aiov;
114 auio.uio_iovcnt = 1;
115 auio.uio_resid = nbyte;
116 auio.uio_rw = UIO_READ;
117 auio.uio_segflg = UIO_USERSPACE;
118 auio.uio_procp = p;
119
120 /*
121 * Reads return ssize_t because -1 is returned on error. Therefore
122 * we must restrict the length to SSIZE_MAX to avoid garbage return
123 * values.
124 */
125 if (auio.uio_resid > SSIZE_MAX) {
126 error = EINVAL;
127 goto out;
128 }
129
130 #ifdef KTRACE
131 /*
132 * if tracing, save a copy of iovec
133 */
134 if (KTRPOINT(p, KTR_GENIO))
135 ktriov = aiov;
136 #endif
137 cnt = auio.uio_resid;
138 error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred, flags);
139 if (error)
140 if (auio.uio_resid != cnt && (error == ERESTART ||
141 error == EINTR || error == EWOULDBLOCK))
142 error = 0;
143 cnt -= auio.uio_resid;
144 #ifdef KTRACE
145 if (KTRPOINT(p, KTR_GENIO) && error == 0)
146 ktrgenio(p, fd, UIO_READ, &ktriov, cnt, error);
147 #endif
148 *retval = cnt;
149 out:
150 FILE_UNUSE(fp, p);
151 return (error);
152 }
153
154 /*
155 * Scatter read system call.
156 */
157 int
158 sys_readv(struct proc *p, void *v, register_t *retval)
159 {
160 struct sys_readv_args /* {
161 syscallarg(int) fd;
162 syscallarg(const struct iovec *) iovp;
163 syscallarg(int) iovcnt;
164 } */ *uap = v;
165 int fd;
166 struct file *fp;
167 struct filedesc *fdp;
168
169 fd = SCARG(uap, fd);
170 fdp = p->p_fd;
171 if ((u_int)fd >= fdp->fd_nfiles ||
172 (fp = fdp->fd_ofiles[fd]) == NULL ||
173 (fp->f_iflags & FIF_WANTCLOSE) != 0 ||
174 (fp->f_flag & FREAD) == 0)
175 return (EBADF);
176
177 FILE_USE(fp);
178
179 /* dofilereadv() will unuse the descriptor for us */
180 return (dofilereadv(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt),
181 &fp->f_offset, FOF_UPDATE_OFFSET, retval));
182 }
183
184 int
185 dofilereadv(struct proc *p, int fd, struct file *fp, const struct iovec *iovp,
186 int iovcnt, off_t *offset, int flags, register_t *retval)
187 {
188 struct uio auio;
189 struct iovec *iov, *needfree, aiov[UIO_SMALLIOV];
190 long i, cnt, error;
191 u_int iovlen;
192 #ifdef KTRACE
193 struct iovec *ktriov;
194 #endif
195
196 error = 0;
197 #ifdef KTRACE
198 ktriov = NULL;
199 #endif
200 /* note: can't use iovlen until iovcnt is validated */
201 iovlen = iovcnt * sizeof(struct iovec);
202 if ((u_int)iovcnt > UIO_SMALLIOV) {
203 if ((u_int)iovcnt > IOV_MAX) {
204 error = EINVAL;
205 goto out;
206 }
207 iov = malloc(iovlen, M_IOV, M_WAITOK);
208 needfree = iov;
209 } else if ((u_int)iovcnt > 0) {
210 iov = aiov;
211 needfree = NULL;
212 } else {
213 error = EINVAL;
214 goto out;
215 }
216
217 auio.uio_iov = iov;
218 auio.uio_iovcnt = iovcnt;
219 auio.uio_rw = UIO_READ;
220 auio.uio_segflg = UIO_USERSPACE;
221 auio.uio_procp = p;
222 error = copyin(iovp, iov, iovlen);
223 if (error)
224 goto done;
225 auio.uio_resid = 0;
226 for (i = 0; i < iovcnt; i++) {
227 auio.uio_resid += iov->iov_len;
228 /*
229 * Reads return ssize_t because -1 is returned on error.
230 * Therefore we must restrict the length to SSIZE_MAX to
231 * avoid garbage return values.
232 */
233 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) {
234 error = EINVAL;
235 goto done;
236 }
237 iov++;
238 }
239 #ifdef KTRACE
240 /*
241 * if tracing, save a copy of iovec
242 */
243 if (KTRPOINT(p, KTR_GENIO)) {
244 ktriov = malloc(iovlen, M_TEMP, M_WAITOK);
245 memcpy((caddr_t)ktriov, (caddr_t)auio.uio_iov, iovlen);
246 }
247 #endif
248 cnt = auio.uio_resid;
249 error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred, flags);
250 if (error)
251 if (auio.uio_resid != cnt && (error == ERESTART ||
252 error == EINTR || error == EWOULDBLOCK))
253 error = 0;
254 cnt -= auio.uio_resid;
255 #ifdef KTRACE
256 if (KTRPOINT(p, KTR_GENIO))
257 if (error == 0) {
258 ktrgenio(p, fd, UIO_READ, ktriov, cnt, error);
259 free(ktriov, M_TEMP);
260 }
261 #endif
262 *retval = cnt;
263 done:
264 if (needfree)
265 free(needfree, M_IOV);
266 out:
267 FILE_UNUSE(fp, p);
268 return (error);
269 }
270
271 /*
272 * Write system call
273 */
274 int
275 sys_write(struct proc *p, void *v, register_t *retval)
276 {
277 struct sys_write_args /* {
278 syscallarg(int) fd;
279 syscallarg(const void *) buf;
280 syscallarg(size_t) nbyte;
281 } */ *uap = v;
282 int fd;
283 struct file *fp;
284 struct filedesc *fdp;
285
286 fd = SCARG(uap, fd);
287 fdp = p->p_fd;
288 if ((u_int)fd >= fdp->fd_nfiles ||
289 (fp = fdp->fd_ofiles[fd]) == NULL ||
290 (fp->f_iflags & FIF_WANTCLOSE) != 0 ||
291 (fp->f_flag & FWRITE) == 0)
292 return (EBADF);
293
294 FILE_USE(fp);
295
296 /* dofilewrite() will unuse the descriptor for us */
297 return (dofilewrite(p, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
298 &fp->f_offset, FOF_UPDATE_OFFSET, retval));
299 }
300
301 int
302 dofilewrite(struct proc *p, int fd, struct file *fp, const void *buf,
303 size_t nbyte, off_t *offset, int flags, register_t *retval)
304 {
305 struct uio auio;
306 struct iovec aiov;
307 long cnt, error;
308 #ifdef KTRACE
309 struct iovec ktriov;
310 #endif
311
312 error = 0;
313 aiov.iov_base = (caddr_t)buf; /* XXX kills const */
314 aiov.iov_len = nbyte;
315 auio.uio_iov = &aiov;
316 auio.uio_iovcnt = 1;
317 auio.uio_resid = nbyte;
318 auio.uio_rw = UIO_WRITE;
319 auio.uio_segflg = UIO_USERSPACE;
320 auio.uio_procp = p;
321
322 /*
323 * Writes return ssize_t because -1 is returned on error. Therefore
324 * we must restrict the length to SSIZE_MAX to avoid garbage return
325 * values.
326 */
327 if (auio.uio_resid > SSIZE_MAX) {
328 error = EINVAL;
329 goto out;
330 }
331
332 #ifdef KTRACE
333 /*
334 * if tracing, save a copy of iovec
335 */
336 if (KTRPOINT(p, KTR_GENIO))
337 ktriov = aiov;
338 #endif
339 cnt = auio.uio_resid;
340 error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred, flags);
341 if (error) {
342 if (auio.uio_resid != cnt && (error == ERESTART ||
343 error == EINTR || error == EWOULDBLOCK))
344 error = 0;
345 if (error == EPIPE)
346 psignal(p, SIGPIPE);
347 }
348 cnt -= auio.uio_resid;
349 #ifdef KTRACE
350 if (KTRPOINT(p, KTR_GENIO) && error == 0)
351 ktrgenio(p, fd, UIO_WRITE, &ktriov, cnt, error);
352 #endif
353 *retval = cnt;
354 out:
355 FILE_UNUSE(fp, p);
356 return (error);
357 }
358
359 /*
360 * Gather write system call
361 */
362 int
363 sys_writev(struct proc *p, void *v, register_t *retval)
364 {
365 struct sys_writev_args /* {
366 syscallarg(int) fd;
367 syscallarg(const struct iovec *) iovp;
368 syscallarg(int) iovcnt;
369 } */ *uap = v;
370 int fd;
371 struct file *fp;
372 struct filedesc *fdp;
373
374 fd = SCARG(uap, fd);
375 fdp = p->p_fd;
376 if ((u_int)fd >= fdp->fd_nfiles ||
377 (fp = fdp->fd_ofiles[fd]) == NULL ||
378 (fp->f_iflags & FIF_WANTCLOSE) != 0 ||
379 (fp->f_flag & FWRITE) == 0)
380 return (EBADF);
381
382 FILE_USE(fp);
383
384 /* dofilewritev() will unuse the descriptor for us */
385 return (dofilewritev(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt),
386 &fp->f_offset, FOF_UPDATE_OFFSET, retval));
387 }
388
389 int
390 dofilewritev(struct proc *p, int fd, struct file *fp, const struct iovec *iovp,
391 int iovcnt, off_t *offset, int flags, register_t *retval)
392 {
393 struct uio auio;
394 struct iovec *iov, *needfree, aiov[UIO_SMALLIOV];
395 long i, cnt, error;
396 u_int iovlen;
397 #ifdef KTRACE
398 struct iovec *ktriov;
399 #endif
400
401 error = 0;
402 #ifdef KTRACE
403 ktriov = NULL;
404 #endif
405 /* note: can't use iovlen until iovcnt is validated */
406 iovlen = iovcnt * sizeof(struct iovec);
407 if ((u_int)iovcnt > UIO_SMALLIOV) {
408 if ((u_int)iovcnt > IOV_MAX)
409 return (EINVAL);
410 iov = malloc(iovlen, M_IOV, M_WAITOK);
411 needfree = iov;
412 } else if ((u_int)iovcnt > 0) {
413 iov = aiov;
414 needfree = NULL;
415 } else {
416 error = EINVAL;
417 goto out;
418 }
419
420 auio.uio_iov = iov;
421 auio.uio_iovcnt = iovcnt;
422 auio.uio_rw = UIO_WRITE;
423 auio.uio_segflg = UIO_USERSPACE;
424 auio.uio_procp = p;
425 error = copyin(iovp, iov, iovlen);
426 if (error)
427 goto done;
428 auio.uio_resid = 0;
429 for (i = 0; i < iovcnt; i++) {
430 auio.uio_resid += iov->iov_len;
431 /*
432 * Writes return ssize_t because -1 is returned on error.
433 * Therefore we must restrict the length to SSIZE_MAX to
434 * avoid garbage return values.
435 */
436 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) {
437 error = EINVAL;
438 goto done;
439 }
440 iov++;
441 }
442 #ifdef KTRACE
443 /*
444 * if tracing, save a copy of iovec
445 */
446 if (KTRPOINT(p, KTR_GENIO)) {
447 ktriov = malloc(iovlen, M_TEMP, M_WAITOK);
448 memcpy((caddr_t)ktriov, (caddr_t)auio.uio_iov, iovlen);
449 }
450 #endif
451 cnt = auio.uio_resid;
452 error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred, flags);
453 if (error) {
454 if (auio.uio_resid != cnt && (error == ERESTART ||
455 error == EINTR || error == EWOULDBLOCK))
456 error = 0;
457 if (error == EPIPE)
458 psignal(p, SIGPIPE);
459 }
460 cnt -= auio.uio_resid;
461 #ifdef KTRACE
462 if (KTRPOINT(p, KTR_GENIO))
463 if (error == 0) {
464 ktrgenio(p, fd, UIO_WRITE, ktriov, cnt, error);
465 free(ktriov, M_TEMP);
466 }
467 #endif
468 *retval = cnt;
469 done:
470 if (needfree)
471 free(needfree, M_IOV);
472 out:
473 FILE_UNUSE(fp, p);
474 return (error);
475 }
476
477 /*
478 * Ioctl system call
479 */
480 /* ARGSUSED */
481 int
482 sys_ioctl(struct proc *p, void *v, register_t *retval)
483 {
484 struct sys_ioctl_args /* {
485 syscallarg(int) fd;
486 syscallarg(u_long) com;
487 syscallarg(caddr_t) data;
488 } */ *uap = v;
489 struct file *fp;
490 struct filedesc *fdp;
491 u_long com;
492 int error;
493 u_int size;
494 caddr_t data, memp;
495 int tmp;
496 #define STK_PARAMS 128
497 u_long stkbuf[STK_PARAMS/sizeof(u_long)];
498
499 error = 0;
500 fdp = p->p_fd;
501 if ((u_int)SCARG(uap, fd) >= fdp->fd_nfiles ||
502 (fp = fdp->fd_ofiles[SCARG(uap, fd)]) == NULL ||
503 (fp->f_iflags & FIF_WANTCLOSE) != 0)
504 return (EBADF);
505
506 FILE_USE(fp);
507
508 if ((fp->f_flag & (FREAD | FWRITE)) == 0) {
509 error = EBADF;
510 goto out;
511 }
512
513 switch (com = SCARG(uap, com)) {
514 case FIONCLEX:
515 fdp->fd_ofileflags[SCARG(uap, fd)] &= ~UF_EXCLOSE;
516 goto out;
517
518 case FIOCLEX:
519 fdp->fd_ofileflags[SCARG(uap, fd)] |= UF_EXCLOSE;
520 goto out;
521 }
522
523 /*
524 * Interpret high order word to find amount of data to be
525 * copied to/from the user's address space.
526 */
527 size = IOCPARM_LEN(com);
528 if (size > IOCPARM_MAX) {
529 error = ENOTTY;
530 goto out;
531 }
532 memp = NULL;
533 if (size > sizeof(stkbuf)) {
534 memp = (caddr_t)malloc((u_long)size, M_IOCTLOPS, M_WAITOK);
535 data = memp;
536 } else
537 data = (caddr_t)stkbuf;
538 if (com&IOC_IN) {
539 if (size) {
540 error = copyin(SCARG(uap, data), data, size);
541 if (error) {
542 if (memp)
543 free(memp, M_IOCTLOPS);
544 goto out;
545 }
546 } else
547 *(caddr_t *)data = SCARG(uap, data);
548 } else if ((com&IOC_OUT) && size)
549 /*
550 * Zero the buffer so the user always
551 * gets back something deterministic.
552 */
553 memset(data, 0, size);
554 else if (com&IOC_VOID)
555 *(caddr_t *)data = SCARG(uap, data);
556
557 switch (com) {
558
559 case FIONBIO:
560 if ((tmp = *(int *)data) != 0)
561 fp->f_flag |= FNONBLOCK;
562 else
563 fp->f_flag &= ~FNONBLOCK;
564 error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p);
565 break;
566
567 case FIOASYNC:
568 if ((tmp = *(int *)data) != 0)
569 fp->f_flag |= FASYNC;
570 else
571 fp->f_flag &= ~FASYNC;
572 error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, (caddr_t)&tmp, p);
573 break;
574
575 case FIOSETOWN:
576 tmp = *(int *)data;
577 if (fp->f_type == DTYPE_SOCKET) {
578 ((struct socket *)fp->f_data)->so_pgid = tmp;
579 error = 0;
580 break;
581 }
582 if (tmp <= 0) {
583 tmp = -tmp;
584 } else {
585 struct proc *p1 = pfind(tmp);
586 if (p1 == 0) {
587 error = ESRCH;
588 break;
589 }
590 tmp = p1->p_pgrp->pg_id;
591 }
592 error = (*fp->f_ops->fo_ioctl)
593 (fp, TIOCSPGRP, (caddr_t)&tmp, p);
594 break;
595
596 case FIOGETOWN:
597 if (fp->f_type == DTYPE_SOCKET) {
598 error = 0;
599 *(int *)data = ((struct socket *)fp->f_data)->so_pgid;
600 break;
601 }
602 error = (*fp->f_ops->fo_ioctl)(fp, TIOCGPGRP, data, p);
603 *(int *)data = -*(int *)data;
604 break;
605
606 default:
607 error = (*fp->f_ops->fo_ioctl)(fp, com, data, p);
608 /*
609 * Copy any data to user, size was
610 * already set and checked above.
611 */
612 if (error == 0 && (com&IOC_OUT) && size)
613 error = copyout(data, SCARG(uap, data), size);
614 break;
615 }
616 if (memp)
617 free(memp, M_IOCTLOPS);
618 out:
619 FILE_UNUSE(fp, p);
620 return (error);
621 }
622
623 int selwait, nselcoll;
624
625 /*
626 * Select system call.
627 */
628 int
629 sys_select(struct proc *p, void *v, register_t *retval)
630 {
631 struct sys_select_args /* {
632 syscallarg(int) nd;
633 syscallarg(fd_set *) in;
634 syscallarg(fd_set *) ou;
635 syscallarg(fd_set *) ex;
636 syscallarg(struct timeval *) tv;
637 } */ *uap = v;
638 caddr_t bits;
639 char smallbits[howmany(FD_SETSIZE, NFDBITS) *
640 sizeof(fd_mask) * 6];
641 struct timeval atv;
642 int s, ncoll, error, timo;
643 size_t ni;
644
645 error = 0;
646 if (SCARG(uap, nd) < 0)
647 return (EINVAL);
648 if (SCARG(uap, nd) > p->p_fd->fd_nfiles) {
649 /* forgiving; slightly wrong */
650 SCARG(uap, nd) = p->p_fd->fd_nfiles;
651 }
652 ni = howmany(SCARG(uap, nd), NFDBITS) * sizeof(fd_mask);
653 if (ni * 6 > sizeof(smallbits))
654 bits = malloc(ni * 6, M_TEMP, M_WAITOK);
655 else
656 bits = smallbits;
657
658 #define getbits(name, x) \
659 if (SCARG(uap, name)) { \
660 error = copyin(SCARG(uap, name), bits + ni * x, ni); \
661 if (error) \
662 goto done; \
663 } else \
664 memset(bits + ni * x, 0, ni);
665 getbits(in, 0);
666 getbits(ou, 1);
667 getbits(ex, 2);
668 #undef getbits
669
670 if (SCARG(uap, tv)) {
671 error = copyin(SCARG(uap, tv), (caddr_t)&atv,
672 sizeof(atv));
673 if (error)
674 goto done;
675 if (itimerfix(&atv)) {
676 error = EINVAL;
677 goto done;
678 }
679 s = splclock();
680 timeradd(&atv, &time, &atv);
681 splx(s);
682 } else
683 timo = 0;
684 retry:
685 ncoll = nselcoll;
686 p->p_flag |= P_SELECT;
687 error = selscan(p, (fd_mask *)(bits + ni * 0),
688 (fd_mask *)(bits + ni * 3), SCARG(uap, nd), retval);
689 if (error || *retval)
690 goto done;
691 if (SCARG(uap, tv)) {
692 /*
693 * We have to recalculate the timeout on every retry.
694 */
695 timo = hzto(&atv);
696 if (timo <= 0)
697 goto done;
698 }
699 s = splsched();
700 if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) {
701 splx(s);
702 goto retry;
703 }
704 p->p_flag &= ~P_SELECT;
705 error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "select", timo);
706 splx(s);
707 if (error == 0)
708 goto retry;
709 done:
710 p->p_flag &= ~P_SELECT;
711 /* select is not restarted after signals... */
712 if (error == ERESTART)
713 error = EINTR;
714 if (error == EWOULDBLOCK)
715 error = 0;
716 if (error == 0) {
717
718 #define putbits(name, x) \
719 if (SCARG(uap, name)) { \
720 error = copyout(bits + ni * x, SCARG(uap, name), ni); \
721 if (error) \
722 goto out; \
723 }
724 putbits(in, 3);
725 putbits(ou, 4);
726 putbits(ex, 5);
727 #undef putbits
728 }
729 out:
730 if (ni * 6 > sizeof(smallbits))
731 free(bits, M_TEMP);
732 return (error);
733 }
734
735 int
736 selscan(struct proc *p, fd_mask *ibitp, fd_mask *obitp, int nfd,
737 register_t *retval)
738 {
739 struct filedesc *fdp;
740 int msk, i, j, fd, n;
741 fd_mask ibits, obits;
742 struct file *fp;
743 static int flag[3] = { POLLRDNORM | POLLHUP | POLLERR,
744 POLLWRNORM | POLLHUP | POLLERR,
745 POLLRDBAND };
746
747 fdp = p->p_fd;
748 n = 0;
749 for (msk = 0; msk < 3; msk++) {
750 for (i = 0; i < nfd; i += NFDBITS) {
751 ibits = *ibitp++;
752 obits = 0;
753 while ((j = ffs(ibits)) && (fd = i + --j) < nfd) {
754 ibits &= ~(1 << j);
755 fp = fdp->fd_ofiles[fd];
756 if (fp == NULL ||
757 (fp->f_iflags & FIF_WANTCLOSE) != 0)
758 return (EBADF);
759 FILE_USE(fp);
760 if ((*fp->f_ops->fo_poll)(fp, flag[msk], p)) {
761 obits |= (1 << j);
762 n++;
763 }
764 FILE_UNUSE(fp, p);
765 }
766 *obitp++ = obits;
767 }
768 }
769 *retval = n;
770 return (0);
771 }
772
773 /*
774 * Poll system call.
775 */
776 int
777 sys_poll(struct proc *p, void *v, register_t *retval)
778 {
779 struct sys_poll_args /* {
780 syscallarg(struct pollfd *) fds;
781 syscallarg(u_int) nfds;
782 syscallarg(int) timeout;
783 } */ *uap = v;
784 caddr_t bits;
785 char smallbits[32 * sizeof(struct pollfd)];
786 struct timeval atv;
787 int s, ncoll, error, timo;
788 size_t ni;
789
790 error = 0;
791 if (SCARG(uap, nfds) > p->p_fd->fd_nfiles) {
792 /* forgiving; slightly wrong */
793 SCARG(uap, nfds) = p->p_fd->fd_nfiles;
794 }
795 ni = SCARG(uap, nfds) * sizeof(struct pollfd);
796 if (ni > sizeof(smallbits))
797 bits = malloc(ni, M_TEMP, M_WAITOK);
798 else
799 bits = smallbits;
800
801 error = copyin(SCARG(uap, fds), bits, ni);
802 if (error)
803 goto done;
804
805 if (SCARG(uap, timeout) != INFTIM) {
806 atv.tv_sec = SCARG(uap, timeout) / 1000;
807 atv.tv_usec = (SCARG(uap, timeout) % 1000) * 1000;
808 if (itimerfix(&atv)) {
809 error = EINVAL;
810 goto done;
811 }
812 s = splclock();
813 timeradd(&atv, &time, &atv);
814 splx(s);
815 } else
816 timo = 0;
817 retry:
818 ncoll = nselcoll;
819 p->p_flag |= P_SELECT;
820 error = pollscan(p, (struct pollfd *)bits, SCARG(uap, nfds), retval);
821 if (error || *retval)
822 goto done;
823 if (SCARG(uap, timeout) != INFTIM) {
824 /*
825 * We have to recalculate the timeout on every retry.
826 */
827 timo = hzto(&atv);
828 if (timo <= 0)
829 goto done;
830 }
831 s = splsched();
832 if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) {
833 splx(s);
834 goto retry;
835 }
836 p->p_flag &= ~P_SELECT;
837 error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "select", timo);
838 splx(s);
839 if (error == 0)
840 goto retry;
841 done:
842 p->p_flag &= ~P_SELECT;
843 /* poll is not restarted after signals... */
844 if (error == ERESTART)
845 error = EINTR;
846 if (error == EWOULDBLOCK)
847 error = 0;
848 if (error == 0) {
849 error = copyout(bits, SCARG(uap, fds), ni);
850 if (error)
851 goto out;
852 }
853 out:
854 if (ni > sizeof(smallbits))
855 free(bits, M_TEMP);
856 return (error);
857 }
858
859 int
860 pollscan(struct proc *p, struct pollfd *fds, int nfd, register_t *retval)
861 {
862 struct filedesc *fdp;
863 int i, n;
864 struct file *fp;
865
866 fdp = p->p_fd;
867 for (i = 0; i < nfd; i++, fds++) {
868 if ((u_int)fds->fd >= fdp->fd_nfiles) {
869 fds->revents = POLLNVAL;
870 n++;
871 } else {
872 fp = fdp->fd_ofiles[fds->fd];
873 if (fp == NULL ||
874 (fp->f_iflags & FIF_WANTCLOSE) != 0) {
875 fds->revents = POLLNVAL;
876 n++;
877 } else {
878 FILE_USE(fp);
879 fds->revents = (*fp->f_ops->fo_poll)(fp,
880 fds->events | POLLERR | POLLHUP, p);
881 if (fds->revents != 0)
882 n++;
883 FILE_UNUSE(fp, p);
884 }
885 }
886 }
887 *retval = n;
888 return (0);
889 }
890
891 /*ARGSUSED*/
892 int
893 seltrue(dev_t dev, int events, struct proc *p)
894 {
895
896 return (events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM));
897 }
898
899 /*
900 * Record a select request.
901 */
902 void
903 selrecord(struct proc *selector, struct selinfo *sip)
904 {
905 struct proc *p;
906 pid_t mypid;
907
908 mypid = selector->p_pid;
909 if (sip->si_pid == mypid)
910 return;
911 if (sip->si_pid && (p = pfind(sip->si_pid)) &&
912 p->p_wchan == (caddr_t)&selwait)
913 sip->si_flags |= SI_COLL;
914 else
915 sip->si_pid = mypid;
916 }
917
918 /*
919 * Do a wakeup when a selectable event occurs.
920 */
921 void
922 selwakeup(sip)
923 struct selinfo *sip;
924 {
925 struct proc *p;
926 int s;
927
928 if (sip->si_pid == 0)
929 return;
930 if (sip->si_flags & SI_COLL) {
931 nselcoll++;
932 sip->si_flags &= ~SI_COLL;
933 wakeup((caddr_t)&selwait);
934 }
935 p = pfind(sip->si_pid);
936 sip->si_pid = 0;
937 if (p != NULL) {
938 SCHED_LOCK(s);
939 if (p->p_wchan == (caddr_t)&selwait) {
940 if (p->p_stat == SSLEEP)
941 setrunnable(p);
942 else
943 unsleep(p);
944 } else if (p->p_flag & P_SELECT)
945 p->p_flag &= ~P_SELECT;
946 SCHED_UNLOCK(s);
947 }
948 }
949