sys_generic.c revision 1.54.2.1 1 /* $NetBSD: sys_generic.c,v 1.54.2.1 2001/03/05 22:49:44 nathanw Exp $ */
2
3 /*
4 * Copyright (c) 1982, 1986, 1989, 1993
5 * The Regents of the University of California. All rights reserved.
6 * (c) UNIX System Laboratories, Inc.
7 * All or some portions of this file are derived from material licensed
8 * to the University of California by American Telephone and Telegraph
9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
10 * the permission of UNIX System Laboratories, Inc.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. All advertising materials mentioning features or use of this software
21 * must display the following acknowledgement:
22 * This product includes software developed by the University of
23 * California, Berkeley and its contributors.
24 * 4. Neither the name of the University nor the names of its contributors
25 * may be used to endorse or promote products derived from this software
26 * without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
29 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
32 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
33 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
34 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
35 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
37 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38 * SUCH DAMAGE.
39 *
40 * @(#)sys_generic.c 8.9 (Berkeley) 2/14/95
41 */
42
43 #include "opt_ktrace.h"
44
45 #include <sys/param.h>
46 #include <sys/systm.h>
47 #include <sys/filedesc.h>
48 #include <sys/ioctl.h>
49 #include <sys/file.h>
50 #include <sys/lwp.h>
51 #include <sys/proc.h>
52 #include <sys/socketvar.h>
53 #include <sys/signalvar.h>
54 #include <sys/uio.h>
55 #include <sys/kernel.h>
56 #include <sys/stat.h>
57 #include <sys/malloc.h>
58 #include <sys/poll.h>
59 #ifdef KTRACE
60 #include <sys/ktrace.h>
61 #endif
62
63 #include <sys/mount.h>
64 #include <sys/syscallargs.h>
65
66 int selscan __P((struct proc *, fd_mask *, fd_mask *, int, register_t *));
67 int pollscan __P((struct proc *, struct pollfd *, int, register_t *));
68
69 /*
70 * Read system call.
71 */
72 /* ARGSUSED */
73 int
74 sys_read(struct lwp *l, void *v, register_t *retval)
75 {
76 struct sys_read_args /* {
77 syscallarg(int) fd;
78 syscallarg(void *) buf;
79 syscallarg(size_t) nbyte;
80 } */ *uap = v;
81 int fd;
82 struct file *fp;
83 struct proc *p;
84 struct filedesc *fdp;
85
86 fd = SCARG(uap, fd);
87 p = l->l_proc;
88 fdp = p->p_fd;
89 if ((u_int)fd >= fdp->fd_nfiles ||
90 (fp = fdp->fd_ofiles[fd]) == NULL ||
91 (fp->f_iflags & FIF_WANTCLOSE) != 0 ||
92 (fp->f_flag & FREAD) == 0)
93 return (EBADF);
94
95 FILE_USE(fp);
96
97 /* dofileread() will unuse the descriptor for us */
98 return (dofileread(p, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
99 &fp->f_offset, FOF_UPDATE_OFFSET, retval));
100 }
101
102 int
103 dofileread(struct proc *p, int fd, struct file *fp, void *buf, size_t nbyte,
104 off_t *offset, int flags, register_t *retval)
105 {
106 struct uio auio;
107 struct iovec aiov;
108 long cnt, error;
109 #ifdef KTRACE
110 struct iovec ktriov;
111 #endif
112 error = 0;
113
114 aiov.iov_base = (caddr_t)buf;
115 aiov.iov_len = nbyte;
116 auio.uio_iov = &aiov;
117 auio.uio_iovcnt = 1;
118 auio.uio_resid = nbyte;
119 auio.uio_rw = UIO_READ;
120 auio.uio_segflg = UIO_USERSPACE;
121 auio.uio_procp = p;
122
123 /*
124 * Reads return ssize_t because -1 is returned on error. Therefore
125 * we must restrict the length to SSIZE_MAX to avoid garbage return
126 * values.
127 */
128 if (auio.uio_resid > SSIZE_MAX) {
129 error = EINVAL;
130 goto out;
131 }
132
133 #ifdef KTRACE
134 /*
135 * if tracing, save a copy of iovec
136 */
137 if (KTRPOINT(p, KTR_GENIO))
138 ktriov = aiov;
139 #endif
140 cnt = auio.uio_resid;
141 error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred, flags);
142 if (error)
143 if (auio.uio_resid != cnt && (error == ERESTART ||
144 error == EINTR || error == EWOULDBLOCK))
145 error = 0;
146 cnt -= auio.uio_resid;
147 #ifdef KTRACE
148 if (KTRPOINT(p, KTR_GENIO) && error == 0)
149 ktrgenio(p, fd, UIO_READ, &ktriov, cnt, error);
150 #endif
151 *retval = cnt;
152 out:
153 FILE_UNUSE(fp, p);
154 return (error);
155 }
156
157 /*
158 * Scatter read system call.
159 */
160 int
161 sys_readv(struct lwp *l, void *v, register_t *retval)
162 {
163 struct sys_readv_args /* {
164 syscallarg(int) fd;
165 syscallarg(const struct iovec *) iovp;
166 syscallarg(int) iovcnt;
167 } */ *uap = v;
168 int fd;
169 struct file *fp;
170 struct proc *p;
171 struct filedesc *fdp;
172
173 fd = SCARG(uap, fd);
174 p = l->l_proc;
175 fdp = p->p_fd;
176 if ((u_int)fd >= fdp->fd_nfiles ||
177 (fp = fdp->fd_ofiles[fd]) == NULL ||
178 (fp->f_iflags & FIF_WANTCLOSE) != 0 ||
179 (fp->f_flag & FREAD) == 0)
180 return (EBADF);
181
182 FILE_USE(fp);
183
184 /* dofilereadv() will unuse the descriptor for us */
185 return (dofilereadv(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt),
186 &fp->f_offset, FOF_UPDATE_OFFSET, retval));
187 }
188
189 int
190 dofilereadv(struct proc *p, int fd, struct file *fp, const struct iovec *iovp,
191 int iovcnt, off_t *offset, int flags, register_t *retval)
192 {
193 struct uio auio;
194 struct iovec *iov, *needfree, aiov[UIO_SMALLIOV];
195 long i, cnt, error;
196 u_int iovlen;
197 #ifdef KTRACE
198 struct iovec *ktriov;
199 #endif
200
201 error = 0;
202 #ifdef KTRACE
203 ktriov = NULL;
204 #endif
205 /* note: can't use iovlen until iovcnt is validated */
206 iovlen = iovcnt * sizeof(struct iovec);
207 if ((u_int)iovcnt > UIO_SMALLIOV) {
208 if ((u_int)iovcnt > IOV_MAX) {
209 error = EINVAL;
210 goto out;
211 }
212 iov = malloc(iovlen, M_IOV, M_WAITOK);
213 needfree = iov;
214 } else if ((u_int)iovcnt > 0) {
215 iov = aiov;
216 needfree = NULL;
217 } else {
218 error = EINVAL;
219 goto out;
220 }
221
222 auio.uio_iov = iov;
223 auio.uio_iovcnt = iovcnt;
224 auio.uio_rw = UIO_READ;
225 auio.uio_segflg = UIO_USERSPACE;
226 auio.uio_procp = p;
227 error = copyin(iovp, iov, iovlen);
228 if (error)
229 goto done;
230 auio.uio_resid = 0;
231 for (i = 0; i < iovcnt; i++) {
232 auio.uio_resid += iov->iov_len;
233 /*
234 * Reads return ssize_t because -1 is returned on error.
235 * Therefore we must restrict the length to SSIZE_MAX to
236 * avoid garbage return values.
237 */
238 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) {
239 error = EINVAL;
240 goto done;
241 }
242 iov++;
243 }
244 #ifdef KTRACE
245 /*
246 * if tracing, save a copy of iovec
247 */
248 if (KTRPOINT(p, KTR_GENIO)) {
249 ktriov = malloc(iovlen, M_TEMP, M_WAITOK);
250 memcpy((caddr_t)ktriov, (caddr_t)auio.uio_iov, iovlen);
251 }
252 #endif
253 cnt = auio.uio_resid;
254 error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred, flags);
255 if (error)
256 if (auio.uio_resid != cnt && (error == ERESTART ||
257 error == EINTR || error == EWOULDBLOCK))
258 error = 0;
259 cnt -= auio.uio_resid;
260 #ifdef KTRACE
261 if (KTRPOINT(p, KTR_GENIO))
262 if (error == 0) {
263 ktrgenio(p, fd, UIO_READ, ktriov, cnt, error);
264 free(ktriov, M_TEMP);
265 }
266 #endif
267 *retval = cnt;
268 done:
269 if (needfree)
270 free(needfree, M_IOV);
271 out:
272 FILE_UNUSE(fp, p);
273 return (error);
274 }
275
276 /*
277 * Write system call
278 */
279 int
280 sys_write(struct lwp *l, void *v, register_t *retval)
281 {
282 struct sys_write_args /* {
283 syscallarg(int) fd;
284 syscallarg(const void *) buf;
285 syscallarg(size_t) nbyte;
286 } */ *uap = v;
287 int fd;
288 struct file *fp;
289 struct proc *p;
290 struct filedesc *fdp;
291
292 fd = SCARG(uap, fd);
293 p = l->l_proc;
294 fdp = p->p_fd;
295 if ((u_int)fd >= fdp->fd_nfiles ||
296 (fp = fdp->fd_ofiles[fd]) == NULL ||
297 (fp->f_iflags & FIF_WANTCLOSE) != 0 ||
298 (fp->f_flag & FWRITE) == 0)
299 return (EBADF);
300
301 FILE_USE(fp);
302
303 /* dofilewrite() will unuse the descriptor for us */
304 return (dofilewrite(p, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
305 &fp->f_offset, FOF_UPDATE_OFFSET, retval));
306 }
307
308 int
309 dofilewrite(struct proc *p, int fd, struct file *fp, const void *buf,
310 size_t nbyte, off_t *offset, int flags, register_t *retval)
311 {
312 struct uio auio;
313 struct iovec aiov;
314 long cnt, error;
315 #ifdef KTRACE
316 struct iovec ktriov;
317 #endif
318
319 error = 0;
320 aiov.iov_base = (caddr_t)buf; /* XXX kills const */
321 aiov.iov_len = nbyte;
322 auio.uio_iov = &aiov;
323 auio.uio_iovcnt = 1;
324 auio.uio_resid = nbyte;
325 auio.uio_rw = UIO_WRITE;
326 auio.uio_segflg = UIO_USERSPACE;
327 auio.uio_procp = p;
328
329 /*
330 * Writes return ssize_t because -1 is returned on error. Therefore
331 * we must restrict the length to SSIZE_MAX to avoid garbage return
332 * values.
333 */
334 if (auio.uio_resid > SSIZE_MAX) {
335 error = EINVAL;
336 goto out;
337 }
338
339 #ifdef KTRACE
340 /*
341 * if tracing, save a copy of iovec
342 */
343 if (KTRPOINT(p, KTR_GENIO))
344 ktriov = aiov;
345 #endif
346 cnt = auio.uio_resid;
347 error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred, flags);
348 if (error) {
349 if (auio.uio_resid != cnt && (error == ERESTART ||
350 error == EINTR || error == EWOULDBLOCK))
351 error = 0;
352 if (error == EPIPE)
353 psignal(p, SIGPIPE);
354 }
355 cnt -= auio.uio_resid;
356 #ifdef KTRACE
357 if (KTRPOINT(p, KTR_GENIO) && error == 0)
358 ktrgenio(p, fd, UIO_WRITE, &ktriov, cnt, error);
359 #endif
360 *retval = cnt;
361 out:
362 FILE_UNUSE(fp, p);
363 return (error);
364 }
365
366 /*
367 * Gather write system call
368 */
369 int
370 sys_writev(struct lwp *l, void *v, register_t *retval)
371 {
372 struct sys_writev_args /* {
373 syscallarg(int) fd;
374 syscallarg(const struct iovec *) iovp;
375 syscallarg(int) iovcnt;
376 } */ *uap = v;
377 int fd;
378 struct file *fp;
379 struct proc *p;
380 struct filedesc *fdp;
381
382 fd = SCARG(uap, fd);
383 p = l->l_proc;
384 fdp = p->p_fd;
385 if ((u_int)fd >= fdp->fd_nfiles ||
386 (fp = fdp->fd_ofiles[fd]) == NULL ||
387 (fp->f_iflags & FIF_WANTCLOSE) != 0 ||
388 (fp->f_flag & FWRITE) == 0)
389 return (EBADF);
390
391 FILE_USE(fp);
392
393 /* dofilewritev() will unuse the descriptor for us */
394 return (dofilewritev(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt),
395 &fp->f_offset, FOF_UPDATE_OFFSET, retval));
396 }
397
398 int
399 dofilewritev(struct proc *p, int fd, struct file *fp, const struct iovec *iovp,
400 int iovcnt, off_t *offset, int flags, register_t *retval)
401 {
402 struct uio auio;
403 struct iovec *iov, *needfree, aiov[UIO_SMALLIOV];
404 long i, cnt, error;
405 u_int iovlen;
406 #ifdef KTRACE
407 struct iovec *ktriov;
408 #endif
409
410 error = 0;
411 #ifdef KTRACE
412 ktriov = NULL;
413 #endif
414 /* note: can't use iovlen until iovcnt is validated */
415 iovlen = iovcnt * sizeof(struct iovec);
416 if ((u_int)iovcnt > UIO_SMALLIOV) {
417 if ((u_int)iovcnt > IOV_MAX)
418 return (EINVAL);
419 iov = malloc(iovlen, M_IOV, M_WAITOK);
420 needfree = iov;
421 } else if ((u_int)iovcnt > 0) {
422 iov = aiov;
423 needfree = NULL;
424 } else {
425 error = EINVAL;
426 goto out;
427 }
428
429 auio.uio_iov = iov;
430 auio.uio_iovcnt = iovcnt;
431 auio.uio_rw = UIO_WRITE;
432 auio.uio_segflg = UIO_USERSPACE;
433 auio.uio_procp = p;
434 error = copyin(iovp, iov, iovlen);
435 if (error)
436 goto done;
437 auio.uio_resid = 0;
438 for (i = 0; i < iovcnt; i++) {
439 auio.uio_resid += iov->iov_len;
440 /*
441 * Writes return ssize_t because -1 is returned on error.
442 * Therefore we must restrict the length to SSIZE_MAX to
443 * avoid garbage return values.
444 */
445 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) {
446 error = EINVAL;
447 goto done;
448 }
449 iov++;
450 }
451 #ifdef KTRACE
452 /*
453 * if tracing, save a copy of iovec
454 */
455 if (KTRPOINT(p, KTR_GENIO)) {
456 ktriov = malloc(iovlen, M_TEMP, M_WAITOK);
457 memcpy((caddr_t)ktriov, (caddr_t)auio.uio_iov, iovlen);
458 }
459 #endif
460 cnt = auio.uio_resid;
461 error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred, flags);
462 if (error) {
463 if (auio.uio_resid != cnt && (error == ERESTART ||
464 error == EINTR || error == EWOULDBLOCK))
465 error = 0;
466 if (error == EPIPE)
467 psignal(p, SIGPIPE);
468 }
469 cnt -= auio.uio_resid;
470 #ifdef KTRACE
471 if (KTRPOINT(p, KTR_GENIO))
472 if (error == 0) {
473 ktrgenio(p, fd, UIO_WRITE, ktriov, cnt, error);
474 free(ktriov, M_TEMP);
475 }
476 #endif
477 *retval = cnt;
478 done:
479 if (needfree)
480 free(needfree, M_IOV);
481 out:
482 FILE_UNUSE(fp, p);
483 return (error);
484 }
485
486 /*
487 * Ioctl system call
488 */
489 /* ARGSUSED */
490 int
491 sys_ioctl(struct lwp *l, void *v, register_t *retval)
492 {
493 struct sys_ioctl_args /* {
494 syscallarg(int) fd;
495 syscallarg(u_long) com;
496 syscallarg(caddr_t) data;
497 } */ *uap = v;
498 struct file *fp;
499 struct proc *p;
500 struct filedesc *fdp;
501 u_long com;
502 int error;
503 u_int size;
504 caddr_t data, memp;
505 int tmp;
506 #define STK_PARAMS 128
507 u_long stkbuf[STK_PARAMS/sizeof(u_long)];
508
509 error = 0;
510 p = l->l_proc;
511 fdp = p->p_fd;
512 if ((u_int)SCARG(uap, fd) >= fdp->fd_nfiles ||
513 (fp = fdp->fd_ofiles[SCARG(uap, fd)]) == NULL ||
514 (fp->f_iflags & FIF_WANTCLOSE) != 0)
515 return (EBADF);
516
517 FILE_USE(fp);
518
519 if ((fp->f_flag & (FREAD | FWRITE)) == 0) {
520 error = EBADF;
521 goto out;
522 }
523
524 switch (com = SCARG(uap, com)) {
525 case FIONCLEX:
526 fdp->fd_ofileflags[SCARG(uap, fd)] &= ~UF_EXCLOSE;
527 goto out;
528
529 case FIOCLEX:
530 fdp->fd_ofileflags[SCARG(uap, fd)] |= UF_EXCLOSE;
531 goto out;
532 }
533
534 /*
535 * Interpret high order word to find amount of data to be
536 * copied to/from the user's address space.
537 */
538 size = IOCPARM_LEN(com);
539 if (size > IOCPARM_MAX) {
540 error = ENOTTY;
541 goto out;
542 }
543 memp = NULL;
544 if (size > sizeof(stkbuf)) {
545 memp = (caddr_t)malloc((u_long)size, M_IOCTLOPS, M_WAITOK);
546 data = memp;
547 } else
548 data = (caddr_t)stkbuf;
549 if (com&IOC_IN) {
550 if (size) {
551 error = copyin(SCARG(uap, data), data, size);
552 if (error) {
553 if (memp)
554 free(memp, M_IOCTLOPS);
555 goto out;
556 }
557 } else
558 *(caddr_t *)data = SCARG(uap, data);
559 } else if ((com&IOC_OUT) && size)
560 /*
561 * Zero the buffer so the user always
562 * gets back something deterministic.
563 */
564 memset(data, 0, size);
565 else if (com&IOC_VOID)
566 *(caddr_t *)data = SCARG(uap, data);
567
568 switch (com) {
569
570 case FIONBIO:
571 if ((tmp = *(int *)data) != 0)
572 fp->f_flag |= FNONBLOCK;
573 else
574 fp->f_flag &= ~FNONBLOCK;
575 error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p);
576 break;
577
578 case FIOASYNC:
579 if ((tmp = *(int *)data) != 0)
580 fp->f_flag |= FASYNC;
581 else
582 fp->f_flag &= ~FASYNC;
583 error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, (caddr_t)&tmp, p);
584 break;
585
586 case FIOSETOWN:
587 tmp = *(int *)data;
588 if (fp->f_type == DTYPE_SOCKET) {
589 ((struct socket *)fp->f_data)->so_pgid = tmp;
590 error = 0;
591 break;
592 }
593 if (tmp <= 0) {
594 tmp = -tmp;
595 } else {
596 struct proc *p1 = pfind(tmp);
597 if (p1 == 0) {
598 error = ESRCH;
599 break;
600 }
601 tmp = p1->p_pgrp->pg_id;
602 }
603 error = (*fp->f_ops->fo_ioctl)
604 (fp, TIOCSPGRP, (caddr_t)&tmp, p);
605 break;
606
607 case FIOGETOWN:
608 if (fp->f_type == DTYPE_SOCKET) {
609 error = 0;
610 *(int *)data = ((struct socket *)fp->f_data)->so_pgid;
611 break;
612 }
613 error = (*fp->f_ops->fo_ioctl)(fp, TIOCGPGRP, data, p);
614 *(int *)data = -*(int *)data;
615 break;
616
617 default:
618 error = (*fp->f_ops->fo_ioctl)(fp, com, data, p);
619 /*
620 * Copy any data to user, size was
621 * already set and checked above.
622 */
623 if (error == 0 && (com&IOC_OUT) && size)
624 error = copyout(data, SCARG(uap, data), size);
625 break;
626 }
627 if (memp)
628 free(memp, M_IOCTLOPS);
629 out:
630 FILE_UNUSE(fp, p);
631 return (error);
632 }
633
634 int selwait, nselcoll;
635
636 /*
637 * Select system call.
638 */
639 int
640 sys_select(struct lwp *l, void *v, register_t *retval)
641 {
642 struct sys_select_args /* {
643 syscallarg(int) nd;
644 syscallarg(fd_set *) in;
645 syscallarg(fd_set *) ou;
646 syscallarg(fd_set *) ex;
647 syscallarg(struct timeval *) tv;
648 } */ *uap = v;
649 struct proc *p;
650 caddr_t bits;
651 char smallbits[howmany(FD_SETSIZE, NFDBITS) *
652 sizeof(fd_mask) * 6];
653 struct timeval atv;
654 int s, ncoll, error, timo;
655 size_t ni;
656
657 error = 0;
658 p = l->l_proc;
659 if (SCARG(uap, nd) < 0)
660 return (EINVAL);
661 if (SCARG(uap, nd) > p->p_fd->fd_nfiles) {
662 /* forgiving; slightly wrong */
663 SCARG(uap, nd) = p->p_fd->fd_nfiles;
664 }
665 ni = howmany(SCARG(uap, nd), NFDBITS) * sizeof(fd_mask);
666 if (ni * 6 > sizeof(smallbits))
667 bits = malloc(ni * 6, M_TEMP, M_WAITOK);
668 else
669 bits = smallbits;
670
671 #define getbits(name, x) \
672 if (SCARG(uap, name)) { \
673 error = copyin(SCARG(uap, name), bits + ni * x, ni); \
674 if (error) \
675 goto done; \
676 } else \
677 memset(bits + ni * x, 0, ni);
678 getbits(in, 0);
679 getbits(ou, 1);
680 getbits(ex, 2);
681 #undef getbits
682
683 if (SCARG(uap, tv)) {
684 error = copyin(SCARG(uap, tv), (caddr_t)&atv,
685 sizeof(atv));
686 if (error)
687 goto done;
688 if (itimerfix(&atv)) {
689 error = EINVAL;
690 goto done;
691 }
692 s = splclock();
693 timeradd(&atv, &time, &atv);
694 splx(s);
695 } else
696 timo = 0;
697 retry:
698 ncoll = nselcoll;
699 l->l_flag |= L_SELECT;
700 error = selscan(p, (fd_mask *)(bits + ni * 0),
701 (fd_mask *)(bits + ni * 3), SCARG(uap, nd), retval);
702 if (error || *retval)
703 goto done;
704 if (SCARG(uap, tv)) {
705 /*
706 * We have to recalculate the timeout on every retry.
707 */
708 timo = hzto(&atv);
709 if (timo <= 0)
710 goto done;
711 }
712 s = splsched();
713 if ((l->l_flag & L_SELECT) == 0 || nselcoll != ncoll) {
714 splx(s);
715 goto retry;
716 }
717 l->l_flag &= ~L_SELECT;
718 error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "select", timo);
719 splx(s);
720 if (error == 0)
721 goto retry;
722 done:
723 l->l_flag &= ~L_SELECT;
724 /* select is not restarted after signals... */
725 if (error == ERESTART)
726 error = EINTR;
727 if (error == EWOULDBLOCK)
728 error = 0;
729 if (error == 0) {
730
731 #define putbits(name, x) \
732 if (SCARG(uap, name)) { \
733 error = copyout(bits + ni * x, SCARG(uap, name), ni); \
734 if (error) \
735 goto out; \
736 }
737 putbits(in, 3);
738 putbits(ou, 4);
739 putbits(ex, 5);
740 #undef putbits
741 }
742 out:
743 if (ni * 6 > sizeof(smallbits))
744 free(bits, M_TEMP);
745 return (error);
746 }
747
748 int
749 selscan(struct proc *p, fd_mask *ibitp, fd_mask *obitp, int nfd,
750 register_t *retval)
751 {
752 struct filedesc *fdp;
753 int msk, i, j, fd, n;
754 fd_mask ibits, obits;
755 struct file *fp;
756 static int flag[3] = { POLLRDNORM | POLLHUP | POLLERR,
757 POLLWRNORM | POLLHUP | POLLERR,
758 POLLRDBAND };
759
760 fdp = p->p_fd;
761 n = 0;
762 for (msk = 0; msk < 3; msk++) {
763 for (i = 0; i < nfd; i += NFDBITS) {
764 ibits = *ibitp++;
765 obits = 0;
766 while ((j = ffs(ibits)) && (fd = i + --j) < nfd) {
767 ibits &= ~(1 << j);
768 fp = fdp->fd_ofiles[fd];
769 if (fp == NULL ||
770 (fp->f_iflags & FIF_WANTCLOSE) != 0)
771 return (EBADF);
772 FILE_USE(fp);
773 if ((*fp->f_ops->fo_poll)(fp, flag[msk], p)) {
774 obits |= (1 << j);
775 n++;
776 }
777 FILE_UNUSE(fp, p);
778 }
779 *obitp++ = obits;
780 }
781 }
782 *retval = n;
783 return (0);
784 }
785
786 /*
787 * Poll system call.
788 */
789 int
790 sys_poll(struct lwp *l, void *v, register_t *retval)
791 {
792 struct sys_poll_args /* {
793 syscallarg(struct pollfd *) fds;
794 syscallarg(u_int) nfds;
795 syscallarg(int) timeout;
796 } */ *uap = v;
797 struct proc *p;
798 caddr_t bits;
799 char smallbits[32 * sizeof(struct pollfd)];
800 struct timeval atv;
801 int s, ncoll, error, timo;
802 size_t ni;
803
804 error = 0;
805 p = l->l_proc;
806 if (SCARG(uap, nfds) > p->p_fd->fd_nfiles) {
807 /* forgiving; slightly wrong */
808 SCARG(uap, nfds) = p->p_fd->fd_nfiles;
809 }
810 ni = SCARG(uap, nfds) * sizeof(struct pollfd);
811 if (ni > sizeof(smallbits))
812 bits = malloc(ni, M_TEMP, M_WAITOK);
813 else
814 bits = smallbits;
815
816 error = copyin(SCARG(uap, fds), bits, ni);
817 if (error)
818 goto done;
819
820 if (SCARG(uap, timeout) != INFTIM) {
821 atv.tv_sec = SCARG(uap, timeout) / 1000;
822 atv.tv_usec = (SCARG(uap, timeout) % 1000) * 1000;
823 if (itimerfix(&atv)) {
824 error = EINVAL;
825 goto done;
826 }
827 s = splclock();
828 timeradd(&atv, &time, &atv);
829 splx(s);
830 } else
831 timo = 0;
832 retry:
833 ncoll = nselcoll;
834 l->l_flag |= L_SELECT;
835 error = pollscan(p, (struct pollfd *)bits, SCARG(uap, nfds), retval);
836 if (error || *retval)
837 goto done;
838 if (SCARG(uap, timeout) != INFTIM) {
839 /*
840 * We have to recalculate the timeout on every retry.
841 */
842 timo = hzto(&atv);
843 if (timo <= 0)
844 goto done;
845 }
846 s = splsched();
847 if ((l->l_flag & L_SELECT) == 0 || nselcoll != ncoll) {
848 splx(s);
849 goto retry;
850 }
851 l->l_flag &= ~L_SELECT;
852 error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "select", timo);
853 splx(s);
854 if (error == 0)
855 goto retry;
856 done:
857 l->l_flag &= ~L_SELECT;
858 /* poll is not restarted after signals... */
859 if (error == ERESTART)
860 error = EINTR;
861 if (error == EWOULDBLOCK)
862 error = 0;
863 if (error == 0) {
864 error = copyout(bits, SCARG(uap, fds), ni);
865 if (error)
866 goto out;
867 }
868 out:
869 if (ni > sizeof(smallbits))
870 free(bits, M_TEMP);
871 return (error);
872 }
873
874 int
875 pollscan(struct proc *p, struct pollfd *fds, int nfd, register_t *retval)
876 {
877 struct filedesc *fdp;
878 int i, n;
879 struct file *fp;
880
881 fdp = p->p_fd;
882 n = 0;
883 for (i = 0; i < nfd; i++, fds++) {
884 if ((u_int)fds->fd >= fdp->fd_nfiles) {
885 fds->revents = POLLNVAL;
886 n++;
887 } else {
888 fp = fdp->fd_ofiles[fds->fd];
889 if (fp == NULL ||
890 (fp->f_iflags & FIF_WANTCLOSE) != 0) {
891 fds->revents = POLLNVAL;
892 n++;
893 } else {
894 FILE_USE(fp);
895 fds->revents = (*fp->f_ops->fo_poll)(fp,
896 fds->events | POLLERR | POLLHUP, p);
897 if (fds->revents != 0)
898 n++;
899 FILE_UNUSE(fp, p);
900 }
901 }
902 }
903 *retval = n;
904 return (0);
905 }
906
907 /*ARGSUSED*/
908 int
909 seltrue(dev_t dev, int events, struct proc *p)
910 {
911
912 return (events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM));
913 }
914
915 /*
916 * Record a select request.
917 */
918 void
919 selrecord(struct proc *selector, struct selinfo *sip)
920 {
921 struct lwp *l;
922 struct proc *p;
923 pid_t mypid;
924
925 mypid = selector->p_pid;
926 if (sip->si_pid == mypid)
927 return;
928 if (sip->si_pid && (p = pfind(sip->si_pid))) {
929 for (l = LIST_FIRST(&p->p_lwps); l != NULL;
930 l = LIST_NEXT(l, l_sibling)) {
931 if (l->l_wchan == (caddr_t)&selwait)
932 sip->si_flags |= SI_COLL;
933 }
934 } else
935 sip->si_pid = mypid;
936 }
937
938 /*
939 * Do a wakeup when a selectable event occurs.
940 */
941 void
942 selwakeup(sip)
943 struct selinfo *sip;
944 {
945 struct lwp *l;
946 struct proc *p;
947 int s;
948
949 if (sip->si_pid == 0)
950 return;
951 if (sip->si_flags & SI_COLL) {
952 nselcoll++;
953 sip->si_flags &= ~SI_COLL;
954 wakeup((caddr_t)&selwait);
955 }
956 p = pfind(sip->si_pid);
957 sip->si_pid = 0;
958 if (p != NULL) {
959 for (l = LIST_FIRST(&p->p_lwps); l != NULL;
960 l = LIST_NEXT(l, l_sibling)) {
961 SCHED_LOCK(s);
962 if (l->l_wchan == (caddr_t)&selwait) {
963 if (l->l_stat == LSSLEEP)
964 setrunnable(l);
965 else
966 unsleep(l);
967 } else if (l->l_flag & L_SELECT)
968 l->l_flag &= ~L_SELECT;
969 SCHED_UNLOCK(s);
970 }
971 }
972 }
973