sys_generic.c revision 1.55 1 /* $NetBSD: sys_generic.c,v 1.55 2001/05/24 06:52:43 lukem Exp $ */
2
3 /*
4 * Copyright (c) 1982, 1986, 1989, 1993
5 * The Regents of the University of California. All rights reserved.
6 * (c) UNIX System Laboratories, Inc.
7 * All or some portions of this file are derived from material licensed
8 * to the University of California by American Telephone and Telegraph
9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
10 * the permission of UNIX System Laboratories, Inc.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. All advertising materials mentioning features or use of this software
21 * must display the following acknowledgement:
22 * This product includes software developed by the University of
23 * California, Berkeley and its contributors.
24 * 4. Neither the name of the University nor the names of its contributors
25 * may be used to endorse or promote products derived from this software
26 * without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
29 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
32 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
33 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
34 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
35 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
37 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38 * SUCH DAMAGE.
39 *
40 * @(#)sys_generic.c 8.9 (Berkeley) 2/14/95
41 */
42
43 #include "opt_ktrace.h"
44
45 #include <sys/param.h>
46 #include <sys/systm.h>
47 #include <sys/filedesc.h>
48 #include <sys/ioctl.h>
49 #include <sys/file.h>
50 #include <sys/proc.h>
51 #include <sys/socketvar.h>
52 #include <sys/signalvar.h>
53 #include <sys/uio.h>
54 #include <sys/kernel.h>
55 #include <sys/stat.h>
56 #include <sys/malloc.h>
57 #include <sys/poll.h>
58 #ifdef KTRACE
59 #include <sys/ktrace.h>
60 #endif
61
62 #include <sys/mount.h>
63 #include <sys/syscallargs.h>
64
65 int selscan __P((struct proc *, fd_mask *, fd_mask *, int, register_t *));
66 int pollscan __P((struct proc *, struct pollfd *, int, register_t *));
67
68 /*
69 * Read system call.
70 */
71 /* ARGSUSED */
72 int
73 sys_read(struct proc *p, void *v, register_t *retval)
74 {
75 struct sys_read_args /* {
76 syscallarg(int) fd;
77 syscallarg(void *) buf;
78 syscallarg(size_t) nbyte;
79 } */ *uap = v;
80 int fd;
81 struct file *fp;
82 struct filedesc *fdp;
83
84 fd = SCARG(uap, fd);
85 fdp = p->p_fd;
86 if ((u_int)fd >= fdp->fd_nfiles ||
87 (fp = fdp->fd_ofiles[fd]) == NULL ||
88 (fp->f_iflags & FIF_WANTCLOSE) != 0 ||
89 (fp->f_flag & FREAD) == 0)
90 return (EBADF);
91
92 FILE_USE(fp);
93
94 /* dofileread() will unuse the descriptor for us */
95 return (dofileread(p, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
96 &fp->f_offset, FOF_UPDATE_OFFSET, retval));
97 }
98
99 int
100 dofileread(struct proc *p, int fd, struct file *fp, void *buf, size_t nbyte,
101 off_t *offset, int flags, register_t *retval)
102 {
103 struct uio auio;
104 struct iovec aiov;
105 long cnt, error;
106 #ifdef KTRACE
107 struct iovec ktriov;
108 #endif
109 error = 0;
110
111 aiov.iov_base = (caddr_t)buf;
112 aiov.iov_len = nbyte;
113 auio.uio_iov = &aiov;
114 auio.uio_iovcnt = 1;
115 auio.uio_resid = nbyte;
116 auio.uio_rw = UIO_READ;
117 auio.uio_segflg = UIO_USERSPACE;
118 auio.uio_procp = p;
119
120 /*
121 * Reads return ssize_t because -1 is returned on error. Therefore
122 * we must restrict the length to SSIZE_MAX to avoid garbage return
123 * values.
124 */
125 if (auio.uio_resid > SSIZE_MAX) {
126 error = EINVAL;
127 goto out;
128 }
129
130 #ifdef KTRACE
131 /*
132 * if tracing, save a copy of iovec
133 */
134 if (KTRPOINT(p, KTR_GENIO))
135 ktriov = aiov;
136 #endif
137 cnt = auio.uio_resid;
138 error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred, flags);
139 if (error)
140 if (auio.uio_resid != cnt && (error == ERESTART ||
141 error == EINTR || error == EWOULDBLOCK))
142 error = 0;
143 cnt -= auio.uio_resid;
144 #ifdef KTRACE
145 if (KTRPOINT(p, KTR_GENIO) && error == 0)
146 ktrgenio(p, fd, UIO_READ, &ktriov, cnt, error);
147 #endif
148 *retval = cnt;
149 out:
150 FILE_UNUSE(fp, p);
151 return (error);
152 }
153
154 /*
155 * Scatter read system call.
156 */
157 int
158 sys_readv(struct proc *p, void *v, register_t *retval)
159 {
160 struct sys_readv_args /* {
161 syscallarg(int) fd;
162 syscallarg(const struct iovec *) iovp;
163 syscallarg(int) iovcnt;
164 } */ *uap = v;
165 int fd;
166 struct file *fp;
167 struct filedesc *fdp;
168
169 fd = SCARG(uap, fd);
170 fdp = p->p_fd;
171 if ((u_int)fd >= fdp->fd_nfiles ||
172 (fp = fdp->fd_ofiles[fd]) == NULL ||
173 (fp->f_iflags & FIF_WANTCLOSE) != 0 ||
174 (fp->f_flag & FREAD) == 0)
175 return (EBADF);
176
177 FILE_USE(fp);
178
179 /* dofilereadv() will unuse the descriptor for us */
180 return (dofilereadv(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt),
181 &fp->f_offset, FOF_UPDATE_OFFSET, retval));
182 }
183
184 int
185 dofilereadv(struct proc *p, int fd, struct file *fp, const struct iovec *iovp,
186 int iovcnt, off_t *offset, int flags, register_t *retval)
187 {
188 struct uio auio;
189 struct iovec *iov, *needfree, aiov[UIO_SMALLIOV];
190 long i, cnt, error;
191 u_int iovlen;
192 #ifdef KTRACE
193 struct iovec *ktriov;
194 #endif
195
196 error = 0;
197 #ifdef KTRACE
198 ktriov = NULL;
199 #endif
200 /* note: can't use iovlen until iovcnt is validated */
201 iovlen = iovcnt * sizeof(struct iovec);
202 if ((u_int)iovcnt > UIO_SMALLIOV) {
203 if ((u_int)iovcnt > IOV_MAX) {
204 error = EINVAL;
205 goto out;
206 }
207 iov = malloc(iovlen, M_IOV, M_WAITOK);
208 needfree = iov;
209 } else if ((u_int)iovcnt > 0) {
210 iov = aiov;
211 needfree = NULL;
212 } else {
213 error = EINVAL;
214 goto out;
215 }
216
217 auio.uio_iov = iov;
218 auio.uio_iovcnt = iovcnt;
219 auio.uio_rw = UIO_READ;
220 auio.uio_segflg = UIO_USERSPACE;
221 auio.uio_procp = p;
222 error = copyin(iovp, iov, iovlen);
223 if (error)
224 goto done;
225 auio.uio_resid = 0;
226 for (i = 0; i < iovcnt; i++) {
227 auio.uio_resid += iov->iov_len;
228 /*
229 * Reads return ssize_t because -1 is returned on error.
230 * Therefore we must restrict the length to SSIZE_MAX to
231 * avoid garbage return values.
232 */
233 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) {
234 error = EINVAL;
235 goto done;
236 }
237 iov++;
238 }
239 #ifdef KTRACE
240 /*
241 * if tracing, save a copy of iovec
242 */
243 if (KTRPOINT(p, KTR_GENIO)) {
244 ktriov = malloc(iovlen, M_TEMP, M_WAITOK);
245 memcpy((caddr_t)ktriov, (caddr_t)auio.uio_iov, iovlen);
246 }
247 #endif
248 cnt = auio.uio_resid;
249 error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred, flags);
250 if (error)
251 if (auio.uio_resid != cnt && (error == ERESTART ||
252 error == EINTR || error == EWOULDBLOCK))
253 error = 0;
254 cnt -= auio.uio_resid;
255 #ifdef KTRACE
256 if (KTRPOINT(p, KTR_GENIO))
257 if (error == 0) {
258 ktrgenio(p, fd, UIO_READ, ktriov, cnt, error);
259 free(ktriov, M_TEMP);
260 }
261 #endif
262 *retval = cnt;
263 done:
264 if (needfree)
265 free(needfree, M_IOV);
266 out:
267 FILE_UNUSE(fp, p);
268 return (error);
269 }
270
271 /*
272 * Write system call
273 */
274 int
275 sys_write(struct proc *p, void *v, register_t *retval)
276 {
277 struct sys_write_args /* {
278 syscallarg(int) fd;
279 syscallarg(const void *) buf;
280 syscallarg(size_t) nbyte;
281 } */ *uap = v;
282 int fd;
283 struct file *fp;
284 struct filedesc *fdp;
285
286 fd = SCARG(uap, fd);
287 fdp = p->p_fd;
288 if ((u_int)fd >= fdp->fd_nfiles ||
289 (fp = fdp->fd_ofiles[fd]) == NULL ||
290 (fp->f_iflags & FIF_WANTCLOSE) != 0 ||
291 (fp->f_flag & FWRITE) == 0)
292 return (EBADF);
293
294 FILE_USE(fp);
295
296 /* dofilewrite() will unuse the descriptor for us */
297 return (dofilewrite(p, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
298 &fp->f_offset, FOF_UPDATE_OFFSET, retval));
299 }
300
301 int
302 dofilewrite(struct proc *p, int fd, struct file *fp, const void *buf,
303 size_t nbyte, off_t *offset, int flags, register_t *retval)
304 {
305 struct uio auio;
306 struct iovec aiov;
307 long cnt, error;
308 #ifdef KTRACE
309 struct iovec ktriov;
310 #endif
311
312 error = 0;
313 aiov.iov_base = (caddr_t)buf; /* XXX kills const */
314 aiov.iov_len = nbyte;
315 auio.uio_iov = &aiov;
316 auio.uio_iovcnt = 1;
317 auio.uio_resid = nbyte;
318 auio.uio_rw = UIO_WRITE;
319 auio.uio_segflg = UIO_USERSPACE;
320 auio.uio_procp = p;
321
322 /*
323 * Writes return ssize_t because -1 is returned on error. Therefore
324 * we must restrict the length to SSIZE_MAX to avoid garbage return
325 * values.
326 */
327 if (auio.uio_resid > SSIZE_MAX) {
328 error = EINVAL;
329 goto out;
330 }
331
332 #ifdef KTRACE
333 /*
334 * if tracing, save a copy of iovec
335 */
336 if (KTRPOINT(p, KTR_GENIO))
337 ktriov = aiov;
338 #endif
339 cnt = auio.uio_resid;
340 error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred, flags);
341 if (error) {
342 if (auio.uio_resid != cnt && (error == ERESTART ||
343 error == EINTR || error == EWOULDBLOCK))
344 error = 0;
345 if (error == EPIPE)
346 psignal(p, SIGPIPE);
347 }
348 cnt -= auio.uio_resid;
349 #ifdef KTRACE
350 if (KTRPOINT(p, KTR_GENIO) && error == 0)
351 ktrgenio(p, fd, UIO_WRITE, &ktriov, cnt, error);
352 #endif
353 *retval = cnt;
354 out:
355 FILE_UNUSE(fp, p);
356 return (error);
357 }
358
359 /*
360 * Gather write system call
361 */
362 int
363 sys_writev(struct proc *p, void *v, register_t *retval)
364 {
365 struct sys_writev_args /* {
366 syscallarg(int) fd;
367 syscallarg(const struct iovec *) iovp;
368 syscallarg(int) iovcnt;
369 } */ *uap = v;
370 int fd;
371 struct file *fp;
372 struct filedesc *fdp;
373
374 fd = SCARG(uap, fd);
375 fdp = p->p_fd;
376 if ((u_int)fd >= fdp->fd_nfiles ||
377 (fp = fdp->fd_ofiles[fd]) == NULL ||
378 (fp->f_iflags & FIF_WANTCLOSE) != 0 ||
379 (fp->f_flag & FWRITE) == 0)
380 return (EBADF);
381
382 FILE_USE(fp);
383
384 /* dofilewritev() will unuse the descriptor for us */
385 return (dofilewritev(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt),
386 &fp->f_offset, FOF_UPDATE_OFFSET, retval));
387 }
388
389 int
390 dofilewritev(struct proc *p, int fd, struct file *fp, const struct iovec *iovp,
391 int iovcnt, off_t *offset, int flags, register_t *retval)
392 {
393 struct uio auio;
394 struct iovec *iov, *needfree, aiov[UIO_SMALLIOV];
395 long i, cnt, error;
396 u_int iovlen;
397 #ifdef KTRACE
398 struct iovec *ktriov;
399 #endif
400
401 error = 0;
402 #ifdef KTRACE
403 ktriov = NULL;
404 #endif
405 /* note: can't use iovlen until iovcnt is validated */
406 iovlen = iovcnt * sizeof(struct iovec);
407 if ((u_int)iovcnt > UIO_SMALLIOV) {
408 if ((u_int)iovcnt > IOV_MAX)
409 return (EINVAL);
410 iov = malloc(iovlen, M_IOV, M_WAITOK);
411 needfree = iov;
412 } else if ((u_int)iovcnt > 0) {
413 iov = aiov;
414 needfree = NULL;
415 } else {
416 error = EINVAL;
417 goto out;
418 }
419
420 auio.uio_iov = iov;
421 auio.uio_iovcnt = iovcnt;
422 auio.uio_rw = UIO_WRITE;
423 auio.uio_segflg = UIO_USERSPACE;
424 auio.uio_procp = p;
425 error = copyin(iovp, iov, iovlen);
426 if (error)
427 goto done;
428 auio.uio_resid = 0;
429 for (i = 0; i < iovcnt; i++) {
430 auio.uio_resid += iov->iov_len;
431 /*
432 * Writes return ssize_t because -1 is returned on error.
433 * Therefore we must restrict the length to SSIZE_MAX to
434 * avoid garbage return values.
435 */
436 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) {
437 error = EINVAL;
438 goto done;
439 }
440 iov++;
441 }
442 #ifdef KTRACE
443 /*
444 * if tracing, save a copy of iovec
445 */
446 if (KTRPOINT(p, KTR_GENIO)) {
447 ktriov = malloc(iovlen, M_TEMP, M_WAITOK);
448 memcpy((caddr_t)ktriov, (caddr_t)auio.uio_iov, iovlen);
449 }
450 #endif
451 cnt = auio.uio_resid;
452 error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred, flags);
453 if (error) {
454 if (auio.uio_resid != cnt && (error == ERESTART ||
455 error == EINTR || error == EWOULDBLOCK))
456 error = 0;
457 if (error == EPIPE)
458 psignal(p, SIGPIPE);
459 }
460 cnt -= auio.uio_resid;
461 #ifdef KTRACE
462 if (KTRPOINT(p, KTR_GENIO))
463 if (error == 0) {
464 ktrgenio(p, fd, UIO_WRITE, ktriov, cnt, error);
465 free(ktriov, M_TEMP);
466 }
467 #endif
468 *retval = cnt;
469 done:
470 if (needfree)
471 free(needfree, M_IOV);
472 out:
473 FILE_UNUSE(fp, p);
474 return (error);
475 }
476
477 /*
478 * Ioctl system call
479 */
480 /* ARGSUSED */
481 int
482 sys_ioctl(struct proc *p, void *v, register_t *retval)
483 {
484 struct sys_ioctl_args /* {
485 syscallarg(int) fd;
486 syscallarg(u_long) com;
487 syscallarg(caddr_t) data;
488 } */ *uap = v;
489 struct file *fp;
490 struct filedesc *fdp;
491 u_long com;
492 int error;
493 u_int size;
494 caddr_t data, memp;
495 int tmp;
496 #define STK_PARAMS 128
497 u_long stkbuf[STK_PARAMS/sizeof(u_long)];
498
499 error = 0;
500 fdp = p->p_fd;
501 if ((u_int)SCARG(uap, fd) >= fdp->fd_nfiles ||
502 (fp = fdp->fd_ofiles[SCARG(uap, fd)]) == NULL ||
503 (fp->f_iflags & FIF_WANTCLOSE) != 0)
504 return (EBADF);
505
506 FILE_USE(fp);
507
508 if ((fp->f_flag & (FREAD | FWRITE)) == 0) {
509 error = EBADF;
510 goto out;
511 }
512
513 switch (com = SCARG(uap, com)) {
514 case FIONCLEX:
515 fdp->fd_ofileflags[SCARG(uap, fd)] &= ~UF_EXCLOSE;
516 goto out;
517
518 case FIOCLEX:
519 fdp->fd_ofileflags[SCARG(uap, fd)] |= UF_EXCLOSE;
520 goto out;
521 }
522
523 /*
524 * Interpret high order word to find amount of data to be
525 * copied to/from the user's address space.
526 */
527 size = IOCPARM_LEN(com);
528 if (size > IOCPARM_MAX) {
529 error = ENOTTY;
530 goto out;
531 }
532 memp = NULL;
533 if (size > sizeof(stkbuf)) {
534 memp = (caddr_t)malloc((u_long)size, M_IOCTLOPS, M_WAITOK);
535 data = memp;
536 } else
537 data = (caddr_t)stkbuf;
538 if (com&IOC_IN) {
539 if (size) {
540 error = copyin(SCARG(uap, data), data, size);
541 if (error) {
542 if (memp)
543 free(memp, M_IOCTLOPS);
544 goto out;
545 }
546 } else
547 *(caddr_t *)data = SCARG(uap, data);
548 } else if ((com&IOC_OUT) && size)
549 /*
550 * Zero the buffer so the user always
551 * gets back something deterministic.
552 */
553 memset(data, 0, size);
554 else if (com&IOC_VOID)
555 *(caddr_t *)data = SCARG(uap, data);
556
557 switch (com) {
558
559 case FIONBIO:
560 if ((tmp = *(int *)data) != 0)
561 fp->f_flag |= FNONBLOCK;
562 else
563 fp->f_flag &= ~FNONBLOCK;
564 error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p);
565 break;
566
567 case FIOASYNC:
568 if ((tmp = *(int *)data) != 0)
569 fp->f_flag |= FASYNC;
570 else
571 fp->f_flag &= ~FASYNC;
572 error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, (caddr_t)&tmp, p);
573 break;
574
575 case FIOSETOWN:
576 tmp = *(int *)data;
577 if (fp->f_type == DTYPE_SOCKET) {
578 ((struct socket *)fp->f_data)->so_pgid = tmp;
579 error = 0;
580 break;
581 }
582 if (tmp <= 0) {
583 tmp = -tmp;
584 } else {
585 struct proc *p1 = pfind(tmp);
586 if (p1 == 0) {
587 error = ESRCH;
588 break;
589 }
590 tmp = p1->p_pgrp->pg_id;
591 }
592 error = (*fp->f_ops->fo_ioctl)
593 (fp, TIOCSPGRP, (caddr_t)&tmp, p);
594 break;
595
596 case FIOGETOWN:
597 if (fp->f_type == DTYPE_SOCKET) {
598 error = 0;
599 *(int *)data = ((struct socket *)fp->f_data)->so_pgid;
600 break;
601 }
602 error = (*fp->f_ops->fo_ioctl)(fp, TIOCGPGRP, data, p);
603 if (error == 0)
604 *(int *)data = -*(int *)data;
605 break;
606
607 default:
608 error = (*fp->f_ops->fo_ioctl)(fp, com, data, p);
609 /*
610 * Copy any data to user, size was
611 * already set and checked above.
612 */
613 if (error == 0 && (com&IOC_OUT) && size)
614 error = copyout(data, SCARG(uap, data), size);
615 break;
616 }
617 if (memp)
618 free(memp, M_IOCTLOPS);
619 out:
620 FILE_UNUSE(fp, p);
621 return (error);
622 }
623
624 int selwait, nselcoll;
625
626 /*
627 * Select system call.
628 */
629 int
630 sys_select(struct proc *p, void *v, register_t *retval)
631 {
632 struct sys_select_args /* {
633 syscallarg(int) nd;
634 syscallarg(fd_set *) in;
635 syscallarg(fd_set *) ou;
636 syscallarg(fd_set *) ex;
637 syscallarg(struct timeval *) tv;
638 } */ *uap = v;
639 caddr_t bits;
640 char smallbits[howmany(FD_SETSIZE, NFDBITS) *
641 sizeof(fd_mask) * 6];
642 struct timeval atv;
643 int s, ncoll, error, timo;
644 size_t ni;
645
646 error = 0;
647 if (SCARG(uap, nd) < 0)
648 return (EINVAL);
649 if (SCARG(uap, nd) > p->p_fd->fd_nfiles) {
650 /* forgiving; slightly wrong */
651 SCARG(uap, nd) = p->p_fd->fd_nfiles;
652 }
653 ni = howmany(SCARG(uap, nd), NFDBITS) * sizeof(fd_mask);
654 if (ni * 6 > sizeof(smallbits))
655 bits = malloc(ni * 6, M_TEMP, M_WAITOK);
656 else
657 bits = smallbits;
658
659 #define getbits(name, x) \
660 if (SCARG(uap, name)) { \
661 error = copyin(SCARG(uap, name), bits + ni * x, ni); \
662 if (error) \
663 goto done; \
664 } else \
665 memset(bits + ni * x, 0, ni);
666 getbits(in, 0);
667 getbits(ou, 1);
668 getbits(ex, 2);
669 #undef getbits
670
671 if (SCARG(uap, tv)) {
672 error = copyin(SCARG(uap, tv), (caddr_t)&atv,
673 sizeof(atv));
674 if (error)
675 goto done;
676 if (itimerfix(&atv)) {
677 error = EINVAL;
678 goto done;
679 }
680 s = splclock();
681 timeradd(&atv, &time, &atv);
682 splx(s);
683 } else
684 timo = 0;
685 retry:
686 ncoll = nselcoll;
687 p->p_flag |= P_SELECT;
688 error = selscan(p, (fd_mask *)(bits + ni * 0),
689 (fd_mask *)(bits + ni * 3), SCARG(uap, nd), retval);
690 if (error || *retval)
691 goto done;
692 if (SCARG(uap, tv)) {
693 /*
694 * We have to recalculate the timeout on every retry.
695 */
696 timo = hzto(&atv);
697 if (timo <= 0)
698 goto done;
699 }
700 s = splsched();
701 if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) {
702 splx(s);
703 goto retry;
704 }
705 p->p_flag &= ~P_SELECT;
706 error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "select", timo);
707 splx(s);
708 if (error == 0)
709 goto retry;
710 done:
711 p->p_flag &= ~P_SELECT;
712 /* select is not restarted after signals... */
713 if (error == ERESTART)
714 error = EINTR;
715 if (error == EWOULDBLOCK)
716 error = 0;
717 if (error == 0) {
718
719 #define putbits(name, x) \
720 if (SCARG(uap, name)) { \
721 error = copyout(bits + ni * x, SCARG(uap, name), ni); \
722 if (error) \
723 goto out; \
724 }
725 putbits(in, 3);
726 putbits(ou, 4);
727 putbits(ex, 5);
728 #undef putbits
729 }
730 out:
731 if (ni * 6 > sizeof(smallbits))
732 free(bits, M_TEMP);
733 return (error);
734 }
735
736 int
737 selscan(struct proc *p, fd_mask *ibitp, fd_mask *obitp, int nfd,
738 register_t *retval)
739 {
740 struct filedesc *fdp;
741 int msk, i, j, fd, n;
742 fd_mask ibits, obits;
743 struct file *fp;
744 static int flag[3] = { POLLRDNORM | POLLHUP | POLLERR,
745 POLLWRNORM | POLLHUP | POLLERR,
746 POLLRDBAND };
747
748 fdp = p->p_fd;
749 n = 0;
750 for (msk = 0; msk < 3; msk++) {
751 for (i = 0; i < nfd; i += NFDBITS) {
752 ibits = *ibitp++;
753 obits = 0;
754 while ((j = ffs(ibits)) && (fd = i + --j) < nfd) {
755 ibits &= ~(1 << j);
756 fp = fdp->fd_ofiles[fd];
757 if (fp == NULL ||
758 (fp->f_iflags & FIF_WANTCLOSE) != 0)
759 return (EBADF);
760 FILE_USE(fp);
761 if ((*fp->f_ops->fo_poll)(fp, flag[msk], p)) {
762 obits |= (1 << j);
763 n++;
764 }
765 FILE_UNUSE(fp, p);
766 }
767 *obitp++ = obits;
768 }
769 }
770 *retval = n;
771 return (0);
772 }
773
774 /*
775 * Poll system call.
776 */
777 int
778 sys_poll(struct proc *p, void *v, register_t *retval)
779 {
780 struct sys_poll_args /* {
781 syscallarg(struct pollfd *) fds;
782 syscallarg(u_int) nfds;
783 syscallarg(int) timeout;
784 } */ *uap = v;
785 caddr_t bits;
786 char smallbits[32 * sizeof(struct pollfd)];
787 struct timeval atv;
788 int s, ncoll, error, timo;
789 size_t ni;
790
791 error = 0;
792 if (SCARG(uap, nfds) > p->p_fd->fd_nfiles) {
793 /* forgiving; slightly wrong */
794 SCARG(uap, nfds) = p->p_fd->fd_nfiles;
795 }
796 ni = SCARG(uap, nfds) * sizeof(struct pollfd);
797 if (ni > sizeof(smallbits))
798 bits = malloc(ni, M_TEMP, M_WAITOK);
799 else
800 bits = smallbits;
801
802 error = copyin(SCARG(uap, fds), bits, ni);
803 if (error)
804 goto done;
805
806 if (SCARG(uap, timeout) != INFTIM) {
807 atv.tv_sec = SCARG(uap, timeout) / 1000;
808 atv.tv_usec = (SCARG(uap, timeout) % 1000) * 1000;
809 if (itimerfix(&atv)) {
810 error = EINVAL;
811 goto done;
812 }
813 s = splclock();
814 timeradd(&atv, &time, &atv);
815 splx(s);
816 } else
817 timo = 0;
818 retry:
819 ncoll = nselcoll;
820 p->p_flag |= P_SELECT;
821 error = pollscan(p, (struct pollfd *)bits, SCARG(uap, nfds), retval);
822 if (error || *retval)
823 goto done;
824 if (SCARG(uap, timeout) != INFTIM) {
825 /*
826 * We have to recalculate the timeout on every retry.
827 */
828 timo = hzto(&atv);
829 if (timo <= 0)
830 goto done;
831 }
832 s = splsched();
833 if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) {
834 splx(s);
835 goto retry;
836 }
837 p->p_flag &= ~P_SELECT;
838 error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "select", timo);
839 splx(s);
840 if (error == 0)
841 goto retry;
842 done:
843 p->p_flag &= ~P_SELECT;
844 /* poll is not restarted after signals... */
845 if (error == ERESTART)
846 error = EINTR;
847 if (error == EWOULDBLOCK)
848 error = 0;
849 if (error == 0) {
850 error = copyout(bits, SCARG(uap, fds), ni);
851 if (error)
852 goto out;
853 }
854 out:
855 if (ni > sizeof(smallbits))
856 free(bits, M_TEMP);
857 return (error);
858 }
859
860 int
861 pollscan(struct proc *p, struct pollfd *fds, int nfd, register_t *retval)
862 {
863 struct filedesc *fdp;
864 int i, n;
865 struct file *fp;
866
867 fdp = p->p_fd;
868 n = 0;
869 for (i = 0; i < nfd; i++, fds++) {
870 if ((u_int)fds->fd >= fdp->fd_nfiles) {
871 fds->revents = POLLNVAL;
872 n++;
873 } else {
874 fp = fdp->fd_ofiles[fds->fd];
875 if (fp == NULL ||
876 (fp->f_iflags & FIF_WANTCLOSE) != 0) {
877 fds->revents = POLLNVAL;
878 n++;
879 } else {
880 FILE_USE(fp);
881 fds->revents = (*fp->f_ops->fo_poll)(fp,
882 fds->events | POLLERR | POLLHUP, p);
883 if (fds->revents != 0)
884 n++;
885 FILE_UNUSE(fp, p);
886 }
887 }
888 }
889 *retval = n;
890 return (0);
891 }
892
893 /*ARGSUSED*/
894 int
895 seltrue(dev_t dev, int events, struct proc *p)
896 {
897
898 return (events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM));
899 }
900
901 /*
902 * Record a select request.
903 */
904 void
905 selrecord(struct proc *selector, struct selinfo *sip)
906 {
907 struct proc *p;
908 pid_t mypid;
909
910 mypid = selector->p_pid;
911 if (sip->si_pid == mypid)
912 return;
913 if (sip->si_pid && (p = pfind(sip->si_pid)) &&
914 p->p_wchan == (caddr_t)&selwait)
915 sip->si_flags |= SI_COLL;
916 else
917 sip->si_pid = mypid;
918 }
919
920 /*
921 * Do a wakeup when a selectable event occurs.
922 */
923 void
924 selwakeup(sip)
925 struct selinfo *sip;
926 {
927 struct proc *p;
928 int s;
929
930 if (sip->si_pid == 0)
931 return;
932 if (sip->si_flags & SI_COLL) {
933 nselcoll++;
934 sip->si_flags &= ~SI_COLL;
935 wakeup((caddr_t)&selwait);
936 }
937 p = pfind(sip->si_pid);
938 sip->si_pid = 0;
939 if (p != NULL) {
940 SCHED_LOCK(s);
941 if (p->p_wchan == (caddr_t)&selwait) {
942 if (p->p_stat == SSLEEP)
943 setrunnable(p);
944 else
945 unsleep(p);
946 } else if (p->p_flag & P_SELECT)
947 p->p_flag &= ~P_SELECT;
948 SCHED_UNLOCK(s);
949 }
950 }
951