sys_generic.c revision 1.72 1 /* $NetBSD: sys_generic.c,v 1.72 2003/03/26 17:50:16 jdolecek Exp $ */
2
3 /*
4 * Copyright (c) 1982, 1986, 1989, 1993
5 * The Regents of the University of California. All rights reserved.
6 * (c) UNIX System Laboratories, Inc.
7 * All or some portions of this file are derived from material licensed
8 * to the University of California by American Telephone and Telegraph
9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
10 * the permission of UNIX System Laboratories, Inc.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. All advertising materials mentioning features or use of this software
21 * must display the following acknowledgement:
22 * This product includes software developed by the University of
23 * California, Berkeley and its contributors.
24 * 4. Neither the name of the University nor the names of its contributors
25 * may be used to endorse or promote products derived from this software
26 * without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
29 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
32 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
33 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
34 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
35 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
37 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38 * SUCH DAMAGE.
39 *
40 * @(#)sys_generic.c 8.9 (Berkeley) 2/14/95
41 */
42
43 #include <sys/cdefs.h>
44 __KERNEL_RCSID(0, "$NetBSD: sys_generic.c,v 1.72 2003/03/26 17:50:16 jdolecek Exp $");
45
46 #include "opt_ktrace.h"
47
48 #include <sys/param.h>
49 #include <sys/systm.h>
50 #include <sys/filedesc.h>
51 #include <sys/ioctl.h>
52 #include <sys/file.h>
53 #include <sys/proc.h>
54 #include <sys/socketvar.h>
55 #include <sys/signalvar.h>
56 #include <sys/uio.h>
57 #include <sys/kernel.h>
58 #include <sys/stat.h>
59 #include <sys/malloc.h>
60 #include <sys/poll.h>
61 #ifdef KTRACE
62 #include <sys/ktrace.h>
63 #endif
64
65 #include <sys/mount.h>
66 #include <sys/sa.h>
67 #include <sys/syscallargs.h>
68
69 int selscan __P((struct proc *, fd_mask *, fd_mask *, int, register_t *));
70 int pollscan __P((struct proc *, struct pollfd *, int, register_t *));
71
72 /*
73 * Read system call.
74 */
75 /* ARGSUSED */
76 int
77 sys_read(struct lwp *l, void *v, register_t *retval)
78 {
79 struct sys_read_args /* {
80 syscallarg(int) fd;
81 syscallarg(void *) buf;
82 syscallarg(size_t) nbyte;
83 } */ *uap = v;
84 int fd;
85 struct file *fp;
86 struct proc *p;
87 struct filedesc *fdp;
88
89 fd = SCARG(uap, fd);
90 p = l->l_proc;
91 fdp = p->p_fd;
92
93 if ((fp = fd_getfile(fdp, fd)) == NULL)
94 return (EBADF);
95
96 if ((fp->f_flag & FREAD) == 0) {
97 simple_unlock(&fp->f_slock);
98 return (EBADF);
99 }
100
101 FILE_USE(fp);
102
103 /* dofileread() will unuse the descriptor for us */
104 return (dofileread(p, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
105 &fp->f_offset, FOF_UPDATE_OFFSET, retval));
106 }
107
108 int
109 dofileread(struct proc *p, int fd, struct file *fp, void *buf, size_t nbyte,
110 off_t *offset, int flags, register_t *retval)
111 {
112 struct uio auio;
113 struct iovec aiov;
114 size_t cnt;
115 int error;
116 #ifdef KTRACE
117 struct iovec ktriov;
118 #endif
119 error = 0;
120
121 aiov.iov_base = (caddr_t)buf;
122 aiov.iov_len = nbyte;
123 auio.uio_iov = &aiov;
124 auio.uio_iovcnt = 1;
125 auio.uio_resid = nbyte;
126 auio.uio_rw = UIO_READ;
127 auio.uio_segflg = UIO_USERSPACE;
128 auio.uio_procp = p;
129
130 /*
131 * Reads return ssize_t because -1 is returned on error. Therefore
132 * we must restrict the length to SSIZE_MAX to avoid garbage return
133 * values.
134 */
135 if (auio.uio_resid > SSIZE_MAX) {
136 error = EINVAL;
137 goto out;
138 }
139
140 #ifdef KTRACE
141 /*
142 * if tracing, save a copy of iovec
143 */
144 if (KTRPOINT(p, KTR_GENIO))
145 ktriov = aiov;
146 #endif
147 cnt = auio.uio_resid;
148 error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred, flags);
149 if (error)
150 if (auio.uio_resid != cnt && (error == ERESTART ||
151 error == EINTR || error == EWOULDBLOCK))
152 error = 0;
153 cnt -= auio.uio_resid;
154 #ifdef KTRACE
155 if (KTRPOINT(p, KTR_GENIO) && error == 0)
156 ktrgenio(p, fd, UIO_READ, &ktriov, cnt, error);
157 #endif
158 *retval = cnt;
159 out:
160 FILE_UNUSE(fp, p);
161 return (error);
162 }
163
164 /*
165 * Scatter read system call.
166 */
167 int
168 sys_readv(struct lwp *l, void *v, register_t *retval)
169 {
170 struct sys_readv_args /* {
171 syscallarg(int) fd;
172 syscallarg(const struct iovec *) iovp;
173 syscallarg(int) iovcnt;
174 } */ *uap = v;
175 int fd;
176 struct file *fp;
177 struct proc *p;
178 struct filedesc *fdp;
179
180 fd = SCARG(uap, fd);
181 p = l->l_proc;
182 fdp = p->p_fd;
183
184 if ((fp = fd_getfile(fdp, fd)) == NULL)
185 return (EBADF);
186
187 if ((fp->f_flag & FREAD) == 0) {
188 simple_unlock(&fp->f_slock);
189 return (EBADF);
190 }
191
192 FILE_USE(fp);
193
194 /* dofilereadv() will unuse the descriptor for us */
195 return (dofilereadv(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt),
196 &fp->f_offset, FOF_UPDATE_OFFSET, retval));
197 }
198
199 int
200 dofilereadv(struct proc *p, int fd, struct file *fp, const struct iovec *iovp,
201 int iovcnt, off_t *offset, int flags, register_t *retval)
202 {
203 struct uio auio;
204 struct iovec *iov, *needfree, aiov[UIO_SMALLIOV];
205 int i, error;
206 size_t cnt;
207 u_int iovlen;
208 #ifdef KTRACE
209 struct iovec *ktriov;
210 #endif
211
212 error = 0;
213 #ifdef KTRACE
214 ktriov = NULL;
215 #endif
216 /* note: can't use iovlen until iovcnt is validated */
217 iovlen = iovcnt * sizeof(struct iovec);
218 if ((u_int)iovcnt > UIO_SMALLIOV) {
219 if ((u_int)iovcnt > IOV_MAX) {
220 error = EINVAL;
221 goto out;
222 }
223 iov = malloc(iovlen, M_IOV, M_WAITOK);
224 needfree = iov;
225 } else if ((u_int)iovcnt > 0) {
226 iov = aiov;
227 needfree = NULL;
228 } else {
229 error = EINVAL;
230 goto out;
231 }
232
233 auio.uio_iov = iov;
234 auio.uio_iovcnt = iovcnt;
235 auio.uio_rw = UIO_READ;
236 auio.uio_segflg = UIO_USERSPACE;
237 auio.uio_procp = p;
238 error = copyin(iovp, iov, iovlen);
239 if (error)
240 goto done;
241 auio.uio_resid = 0;
242 for (i = 0; i < iovcnt; i++) {
243 auio.uio_resid += iov->iov_len;
244 /*
245 * Reads return ssize_t because -1 is returned on error.
246 * Therefore we must restrict the length to SSIZE_MAX to
247 * avoid garbage return values.
248 */
249 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) {
250 error = EINVAL;
251 goto done;
252 }
253 iov++;
254 }
255 #ifdef KTRACE
256 /*
257 * if tracing, save a copy of iovec
258 */
259 if (KTRPOINT(p, KTR_GENIO)) {
260 ktriov = malloc(iovlen, M_TEMP, M_WAITOK);
261 memcpy((caddr_t)ktriov, (caddr_t)auio.uio_iov, iovlen);
262 }
263 #endif
264 cnt = auio.uio_resid;
265 error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred, flags);
266 if (error)
267 if (auio.uio_resid != cnt && (error == ERESTART ||
268 error == EINTR || error == EWOULDBLOCK))
269 error = 0;
270 cnt -= auio.uio_resid;
271 #ifdef KTRACE
272 if (ktriov != NULL) {
273 if (error == 0)
274 ktrgenio(p, fd, UIO_READ, ktriov, cnt, error);
275 free(ktriov, M_TEMP);
276 }
277 #endif
278 *retval = cnt;
279 done:
280 if (needfree)
281 free(needfree, M_IOV);
282 out:
283 FILE_UNUSE(fp, p);
284 return (error);
285 }
286
287 /*
288 * Write system call
289 */
290 int
291 sys_write(struct lwp *l, void *v, register_t *retval)
292 {
293 struct sys_write_args /* {
294 syscallarg(int) fd;
295 syscallarg(const void *) buf;
296 syscallarg(size_t) nbyte;
297 } */ *uap = v;
298 int fd;
299 struct file *fp;
300 struct proc *p;
301 struct filedesc *fdp;
302
303 fd = SCARG(uap, fd);
304 p = l->l_proc;
305 fdp = p->p_fd;
306
307 if ((fp = fd_getfile(fdp, fd)) == NULL)
308 return (EBADF);
309
310 if ((fp->f_flag & FWRITE) == 0) {
311 simple_unlock(&fp->f_slock);
312 return (EBADF);
313 }
314
315 FILE_USE(fp);
316
317 /* dofilewrite() will unuse the descriptor for us */
318 return (dofilewrite(p, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
319 &fp->f_offset, FOF_UPDATE_OFFSET, retval));
320 }
321
322 int
323 dofilewrite(struct proc *p, int fd, struct file *fp, const void *buf,
324 size_t nbyte, off_t *offset, int flags, register_t *retval)
325 {
326 struct uio auio;
327 struct iovec aiov;
328 size_t cnt;
329 int error;
330 #ifdef KTRACE
331 struct iovec ktriov;
332 #endif
333
334 error = 0;
335 aiov.iov_base = (caddr_t)buf; /* XXX kills const */
336 aiov.iov_len = nbyte;
337 auio.uio_iov = &aiov;
338 auio.uio_iovcnt = 1;
339 auio.uio_resid = nbyte;
340 auio.uio_rw = UIO_WRITE;
341 auio.uio_segflg = UIO_USERSPACE;
342 auio.uio_procp = p;
343
344 /*
345 * Writes return ssize_t because -1 is returned on error. Therefore
346 * we must restrict the length to SSIZE_MAX to avoid garbage return
347 * values.
348 */
349 if (auio.uio_resid > SSIZE_MAX) {
350 error = EINVAL;
351 goto out;
352 }
353
354 #ifdef KTRACE
355 /*
356 * if tracing, save a copy of iovec
357 */
358 if (KTRPOINT(p, KTR_GENIO))
359 ktriov = aiov;
360 #endif
361 cnt = auio.uio_resid;
362 error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred, flags);
363 if (error) {
364 if (auio.uio_resid != cnt && (error == ERESTART ||
365 error == EINTR || error == EWOULDBLOCK))
366 error = 0;
367 if (error == EPIPE)
368 psignal(p, SIGPIPE);
369 }
370 cnt -= auio.uio_resid;
371 #ifdef KTRACE
372 if (KTRPOINT(p, KTR_GENIO) && error == 0)
373 ktrgenio(p, fd, UIO_WRITE, &ktriov, cnt, error);
374 #endif
375 *retval = cnt;
376 out:
377 FILE_UNUSE(fp, p);
378 return (error);
379 }
380
381 /*
382 * Gather write system call
383 */
384 int
385 sys_writev(struct lwp *l, void *v, register_t *retval)
386 {
387 struct sys_writev_args /* {
388 syscallarg(int) fd;
389 syscallarg(const struct iovec *) iovp;
390 syscallarg(int) iovcnt;
391 } */ *uap = v;
392 int fd;
393 struct file *fp;
394 struct proc *p;
395 struct filedesc *fdp;
396
397 fd = SCARG(uap, fd);
398 p = l->l_proc;
399 fdp = p->p_fd;
400
401 if ((fp = fd_getfile(fdp, fd)) == NULL)
402 return (EBADF);
403
404 if ((fp->f_flag & FWRITE) == 0) {
405 simple_unlock(&fp->f_slock);
406 return (EBADF);
407 }
408
409 FILE_USE(fp);
410
411 /* dofilewritev() will unuse the descriptor for us */
412 return (dofilewritev(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt),
413 &fp->f_offset, FOF_UPDATE_OFFSET, retval));
414 }
415
416 int
417 dofilewritev(struct proc *p, int fd, struct file *fp, const struct iovec *iovp,
418 int iovcnt, off_t *offset, int flags, register_t *retval)
419 {
420 struct uio auio;
421 struct iovec *iov, *needfree, aiov[UIO_SMALLIOV];
422 int i, error;
423 size_t cnt;
424 u_int iovlen;
425 #ifdef KTRACE
426 struct iovec *ktriov;
427 #endif
428
429 error = 0;
430 #ifdef KTRACE
431 ktriov = NULL;
432 #endif
433 /* note: can't use iovlen until iovcnt is validated */
434 iovlen = iovcnt * sizeof(struct iovec);
435 if ((u_int)iovcnt > UIO_SMALLIOV) {
436 if ((u_int)iovcnt > IOV_MAX) {
437 error = EINVAL;
438 goto out;
439 }
440 iov = malloc(iovlen, M_IOV, M_WAITOK);
441 needfree = iov;
442 } else if ((u_int)iovcnt > 0) {
443 iov = aiov;
444 needfree = NULL;
445 } else {
446 error = EINVAL;
447 goto out;
448 }
449
450 auio.uio_iov = iov;
451 auio.uio_iovcnt = iovcnt;
452 auio.uio_rw = UIO_WRITE;
453 auio.uio_segflg = UIO_USERSPACE;
454 auio.uio_procp = p;
455 error = copyin(iovp, iov, iovlen);
456 if (error)
457 goto done;
458 auio.uio_resid = 0;
459 for (i = 0; i < iovcnt; i++) {
460 auio.uio_resid += iov->iov_len;
461 /*
462 * Writes return ssize_t because -1 is returned on error.
463 * Therefore we must restrict the length to SSIZE_MAX to
464 * avoid garbage return values.
465 */
466 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) {
467 error = EINVAL;
468 goto done;
469 }
470 iov++;
471 }
472 #ifdef KTRACE
473 /*
474 * if tracing, save a copy of iovec
475 */
476 if (KTRPOINT(p, KTR_GENIO)) {
477 ktriov = malloc(iovlen, M_TEMP, M_WAITOK);
478 memcpy((caddr_t)ktriov, (caddr_t)auio.uio_iov, iovlen);
479 }
480 #endif
481 cnt = auio.uio_resid;
482 error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred, flags);
483 if (error) {
484 if (auio.uio_resid != cnt && (error == ERESTART ||
485 error == EINTR || error == EWOULDBLOCK))
486 error = 0;
487 if (error == EPIPE)
488 psignal(p, SIGPIPE);
489 }
490 cnt -= auio.uio_resid;
491 #ifdef KTRACE
492 if (KTRPOINT(p, KTR_GENIO))
493 if (error == 0) {
494 ktrgenio(p, fd, UIO_WRITE, ktriov, cnt, error);
495 free(ktriov, M_TEMP);
496 }
497 #endif
498 *retval = cnt;
499 done:
500 if (needfree)
501 free(needfree, M_IOV);
502 out:
503 FILE_UNUSE(fp, p);
504 return (error);
505 }
506
507 /*
508 * Ioctl system call
509 */
510 /* ARGSUSED */
511 int
512 sys_ioctl(struct lwp *l, void *v, register_t *retval)
513 {
514 struct sys_ioctl_args /* {
515 syscallarg(int) fd;
516 syscallarg(u_long) com;
517 syscallarg(caddr_t) data;
518 } */ *uap = v;
519 struct file *fp;
520 struct proc *p;
521 struct filedesc *fdp;
522 u_long com;
523 int error;
524 u_int size;
525 caddr_t data, memp;
526 int tmp;
527 #define STK_PARAMS 128
528 u_long stkbuf[STK_PARAMS/sizeof(u_long)];
529
530 error = 0;
531 p = l->l_proc;
532 fdp = p->p_fd;
533
534 if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL)
535 return (EBADF);
536
537 FILE_USE(fp);
538
539 if ((fp->f_flag & (FREAD | FWRITE)) == 0) {
540 error = EBADF;
541 com = 0;
542 goto out;
543 }
544
545 switch (com = SCARG(uap, com)) {
546 case FIONCLEX:
547 fdp->fd_ofileflags[SCARG(uap, fd)] &= ~UF_EXCLOSE;
548 goto out;
549
550 case FIOCLEX:
551 fdp->fd_ofileflags[SCARG(uap, fd)] |= UF_EXCLOSE;
552 goto out;
553 }
554
555 /*
556 * Interpret high order word to find amount of data to be
557 * copied to/from the user's address space.
558 */
559 size = IOCPARM_LEN(com);
560 if (size > IOCPARM_MAX) {
561 error = ENOTTY;
562 goto out;
563 }
564 memp = NULL;
565 if (size > sizeof(stkbuf)) {
566 memp = (caddr_t)malloc((u_long)size, M_IOCTLOPS, M_WAITOK);
567 data = memp;
568 } else
569 data = (caddr_t)stkbuf;
570 if (com&IOC_IN) {
571 if (size) {
572 error = copyin(SCARG(uap, data), data, size);
573 if (error) {
574 if (memp)
575 free(memp, M_IOCTLOPS);
576 goto out;
577 }
578 } else
579 *(caddr_t *)data = SCARG(uap, data);
580 } else if ((com&IOC_OUT) && size)
581 /*
582 * Zero the buffer so the user always
583 * gets back something deterministic.
584 */
585 memset(data, 0, size);
586 else if (com&IOC_VOID)
587 *(caddr_t *)data = SCARG(uap, data);
588
589 switch (com) {
590
591 case FIONBIO:
592 if ((tmp = *(int *)data) != 0)
593 fp->f_flag |= FNONBLOCK;
594 else
595 fp->f_flag &= ~FNONBLOCK;
596 error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p);
597 break;
598
599 case FIOASYNC:
600 if ((tmp = *(int *)data) != 0)
601 fp->f_flag |= FASYNC;
602 else
603 fp->f_flag &= ~FASYNC;
604 error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, (caddr_t)&tmp, p);
605 break;
606
607 case FIOSETOWN:
608 tmp = *(int *)data;
609 if (fp->f_type == DTYPE_SOCKET) {
610 ((struct socket *)fp->f_data)->so_pgid = tmp;
611 error = 0;
612 break;
613 }
614 if (tmp <= 0) {
615 tmp = -tmp;
616 } else {
617 struct proc *p1 = pfind(tmp);
618 if (p1 == 0) {
619 error = ESRCH;
620 break;
621 }
622 tmp = p1->p_pgrp->pg_id;
623 }
624 error = (*fp->f_ops->fo_ioctl)
625 (fp, TIOCSPGRP, (caddr_t)&tmp, p);
626 break;
627
628 case FIOGETOWN:
629 if (fp->f_type == DTYPE_SOCKET) {
630 error = 0;
631 *(int *)data = ((struct socket *)fp->f_data)->so_pgid;
632 break;
633 }
634 error = (*fp->f_ops->fo_ioctl)(fp, TIOCGPGRP, data, p);
635 if (error == 0)
636 *(int *)data = -*(int *)data;
637 break;
638
639 default:
640 error = (*fp->f_ops->fo_ioctl)(fp, com, data, p);
641 /*
642 * Copy any data to user, size was
643 * already set and checked above.
644 */
645 if (error == 0 && (com&IOC_OUT) && size)
646 error = copyout(data, SCARG(uap, data), size);
647 break;
648 }
649 if (memp)
650 free(memp, M_IOCTLOPS);
651 out:
652 FILE_UNUSE(fp, p);
653 switch (error) {
654 case -1:
655 printf("sys_ioctl: _IO%s%s('%c', %lu, %lu) returned -1: "
656 "pid=%d comm=%s\n",
657 (com & IOC_IN) ? "W" : "", (com & IOC_OUT) ? "R" : "",
658 (char)IOCGROUP(com), (com & 0xff), IOCPARM_LEN(com),
659 p->p_pid, p->p_comm);
660 /* FALLTHROUGH */
661 case EPASSTHROUGH:
662 error = ENOTTY;
663 /* FALLTHROUGH */
664 default:
665 return (error);
666 }
667 }
668
669 int selwait, nselcoll;
670
671 /*
672 * Select system call.
673 */
674 int
675 sys_select(struct lwp *l, void *v, register_t *retval)
676 {
677 struct sys_select_args /* {
678 syscallarg(int) nd;
679 syscallarg(fd_set *) in;
680 syscallarg(fd_set *) ou;
681 syscallarg(fd_set *) ex;
682 syscallarg(struct timeval *) tv;
683 } */ *uap = v;
684 struct proc *p;
685 caddr_t bits;
686 char smallbits[howmany(FD_SETSIZE, NFDBITS) *
687 sizeof(fd_mask) * 6];
688 struct timeval atv;
689 int s, ncoll, error, timo;
690 size_t ni;
691
692 error = 0;
693 p = l->l_proc;
694 if (SCARG(uap, nd) < 0)
695 return (EINVAL);
696 if (SCARG(uap, nd) > p->p_fd->fd_nfiles) {
697 /* forgiving; slightly wrong */
698 SCARG(uap, nd) = p->p_fd->fd_nfiles;
699 }
700 ni = howmany(SCARG(uap, nd), NFDBITS) * sizeof(fd_mask);
701 if (ni * 6 > sizeof(smallbits))
702 bits = malloc(ni * 6, M_TEMP, M_WAITOK);
703 else
704 bits = smallbits;
705
706 #define getbits(name, x) \
707 if (SCARG(uap, name)) { \
708 error = copyin(SCARG(uap, name), bits + ni * x, ni); \
709 if (error) \
710 goto done; \
711 } else \
712 memset(bits + ni * x, 0, ni);
713 getbits(in, 0);
714 getbits(ou, 1);
715 getbits(ex, 2);
716 #undef getbits
717
718 timo = 0;
719 if (SCARG(uap, tv)) {
720 error = copyin(SCARG(uap, tv), (caddr_t)&atv,
721 sizeof(atv));
722 if (error)
723 goto done;
724 if (itimerfix(&atv)) {
725 error = EINVAL;
726 goto done;
727 }
728 s = splclock();
729 timeradd(&atv, &time, &atv);
730 splx(s);
731 }
732
733 retry:
734 ncoll = nselcoll;
735 l->l_flag |= L_SELECT;
736 error = selscan(p, (fd_mask *)(bits + ni * 0),
737 (fd_mask *)(bits + ni * 3), SCARG(uap, nd), retval);
738 if (error || *retval)
739 goto done;
740 if (SCARG(uap, tv)) {
741 /*
742 * We have to recalculate the timeout on every retry.
743 */
744 timo = hzto(&atv);
745 if (timo <= 0)
746 goto done;
747 }
748 s = splsched();
749 if ((l->l_flag & L_SELECT) == 0 || nselcoll != ncoll) {
750 splx(s);
751 goto retry;
752 }
753 l->l_flag &= ~L_SELECT;
754 error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "select", timo);
755 splx(s);
756 if (error == 0)
757 goto retry;
758 done:
759 l->l_flag &= ~L_SELECT;
760 /* select is not restarted after signals... */
761 if (error == ERESTART)
762 error = EINTR;
763 if (error == EWOULDBLOCK)
764 error = 0;
765 if (error == 0) {
766
767 #define putbits(name, x) \
768 if (SCARG(uap, name)) { \
769 error = copyout(bits + ni * x, SCARG(uap, name), ni); \
770 if (error) \
771 goto out; \
772 }
773 putbits(in, 3);
774 putbits(ou, 4);
775 putbits(ex, 5);
776 #undef putbits
777 }
778 out:
779 if (ni * 6 > sizeof(smallbits))
780 free(bits, M_TEMP);
781 return (error);
782 }
783
784 int
785 selscan(struct proc *p, fd_mask *ibitp, fd_mask *obitp, int nfd,
786 register_t *retval)
787 {
788 struct filedesc *fdp;
789 int msk, i, j, fd, n;
790 fd_mask ibits, obits;
791 struct file *fp;
792 static const int flag[3] = { POLLRDNORM | POLLHUP | POLLERR,
793 POLLWRNORM | POLLHUP | POLLERR,
794 POLLRDBAND };
795
796 fdp = p->p_fd;
797 n = 0;
798 for (msk = 0; msk < 3; msk++) {
799 for (i = 0; i < nfd; i += NFDBITS) {
800 ibits = *ibitp++;
801 obits = 0;
802 while ((j = ffs(ibits)) && (fd = i + --j) < nfd) {
803 ibits &= ~(1 << j);
804 if ((fp = fd_getfile(fdp, fd)) == NULL)
805 return (EBADF);
806 FILE_USE(fp);
807 if ((*fp->f_ops->fo_poll)(fp, flag[msk], p)) {
808 obits |= (1 << j);
809 n++;
810 }
811 FILE_UNUSE(fp, p);
812 }
813 *obitp++ = obits;
814 }
815 }
816 *retval = n;
817 return (0);
818 }
819
820 /*
821 * Poll system call.
822 */
823 int
824 sys_poll(struct lwp *l, void *v, register_t *retval)
825 {
826 struct sys_poll_args /* {
827 syscallarg(struct pollfd *) fds;
828 syscallarg(u_int) nfds;
829 syscallarg(int) timeout;
830 } */ *uap = v;
831 struct proc *p;
832 caddr_t bits;
833 char smallbits[32 * sizeof(struct pollfd)];
834 struct timeval atv;
835 int s, ncoll, error, timo;
836 size_t ni;
837
838 error = 0;
839 p = l->l_proc;
840 if (SCARG(uap, nfds) > p->p_fd->fd_nfiles) {
841 /* forgiving; slightly wrong */
842 SCARG(uap, nfds) = p->p_fd->fd_nfiles;
843 }
844 ni = SCARG(uap, nfds) * sizeof(struct pollfd);
845 if (ni > sizeof(smallbits))
846 bits = malloc(ni, M_TEMP, M_WAITOK);
847 else
848 bits = smallbits;
849
850 error = copyin(SCARG(uap, fds), bits, ni);
851 if (error)
852 goto done;
853
854 timo = 0;
855 if (SCARG(uap, timeout) != INFTIM) {
856 atv.tv_sec = SCARG(uap, timeout) / 1000;
857 atv.tv_usec = (SCARG(uap, timeout) % 1000) * 1000;
858 if (itimerfix(&atv)) {
859 error = EINVAL;
860 goto done;
861 }
862 s = splclock();
863 timeradd(&atv, &time, &atv);
864 splx(s);
865 }
866
867 retry:
868 ncoll = nselcoll;
869 l->l_flag |= L_SELECT;
870 error = pollscan(p, (struct pollfd *)bits, SCARG(uap, nfds), retval);
871 if (error || *retval)
872 goto done;
873 if (SCARG(uap, timeout) != INFTIM) {
874 /*
875 * We have to recalculate the timeout on every retry.
876 */
877 timo = hzto(&atv);
878 if (timo <= 0)
879 goto done;
880 }
881 s = splsched();
882 if ((l->l_flag & L_SELECT) == 0 || nselcoll != ncoll) {
883 splx(s);
884 goto retry;
885 }
886 l->l_flag &= ~L_SELECT;
887 error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "select", timo);
888 splx(s);
889 if (error == 0)
890 goto retry;
891 done:
892 l->l_flag &= ~L_SELECT;
893 /* poll is not restarted after signals... */
894 if (error == ERESTART)
895 error = EINTR;
896 if (error == EWOULDBLOCK)
897 error = 0;
898 if (error == 0) {
899 error = copyout(bits, SCARG(uap, fds), ni);
900 if (error)
901 goto out;
902 }
903 out:
904 if (ni > sizeof(smallbits))
905 free(bits, M_TEMP);
906 return (error);
907 }
908
909 int
910 pollscan(struct proc *p, struct pollfd *fds, int nfd, register_t *retval)
911 {
912 struct filedesc *fdp;
913 int i, n;
914 struct file *fp;
915
916 fdp = p->p_fd;
917 n = 0;
918 for (i = 0; i < nfd; i++, fds++) {
919 if (fds->fd >= fdp->fd_nfiles) {
920 fds->revents = POLLNVAL;
921 n++;
922 } else if (fds->fd < 0) {
923 fds->revents = 0;
924 } else {
925 if ((fp = fd_getfile(fdp, fds->fd)) == NULL) {
926 fds->revents = POLLNVAL;
927 n++;
928 } else {
929 FILE_USE(fp);
930 fds->revents = (*fp->f_ops->fo_poll)(fp,
931 fds->events | POLLERR | POLLHUP, p);
932 if (fds->revents != 0)
933 n++;
934 FILE_UNUSE(fp, p);
935 }
936 }
937 }
938 *retval = n;
939 return (0);
940 }
941
942 /*ARGSUSED*/
943 int
944 seltrue(dev_t dev, int events, struct proc *p)
945 {
946
947 return (events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM));
948 }
949
950 /*
951 * Record a select request.
952 */
953 void
954 selrecord(struct proc *selector, struct selinfo *sip)
955 {
956 struct lwp *l;
957 struct proc *p;
958 pid_t mypid;
959 int collision;
960
961 mypid = selector->p_pid;
962 if (sip->sel_pid == mypid)
963 return;
964 collision = 0;
965 if (sip->sel_pid && (p = pfind(sip->sel_pid))) {
966 LIST_FOREACH(l, &p->p_lwps, l_sibling) {
967 if (l->l_wchan == (caddr_t)&selwait) {
968 collision = 1;
969 sip->sel_flags |= SI_COLL;
970 }
971 }
972 }
973
974 if (collision == 0)
975 sip->sel_pid = mypid;
976 }
977
978 /*
979 * Do a wakeup when a selectable event occurs.
980 */
981 void
982 selwakeup(sip)
983 struct selinfo *sip;
984 {
985 struct lwp *l;
986 struct proc *p;
987 int s;
988
989 if (sip->sel_pid == 0)
990 return;
991 if (sip->sel_flags & SI_COLL) {
992 sip->sel_pid = 0;
993 nselcoll++;
994 sip->sel_flags &= ~SI_COLL;
995 wakeup((caddr_t)&selwait);
996 return;
997 }
998 p = pfind(sip->sel_pid);
999 sip->sel_pid = 0;
1000 if (p != NULL) {
1001 LIST_FOREACH(l, &p->p_lwps, l_sibling) {
1002 SCHED_LOCK(s);
1003 if (l->l_wchan == (caddr_t)&selwait) {
1004 if (l->l_stat == LSSLEEP)
1005 setrunnable(l);
1006 else
1007 unsleep(l);
1008 } else if (l->l_flag & L_SELECT)
1009 l->l_flag &= ~L_SELECT;
1010 SCHED_UNLOCK(s);
1011 }
1012 }
1013 }
1014