sys_generic.c revision 1.77 1 /* $NetBSD: sys_generic.c,v 1.77 2003/08/07 16:31:54 agc Exp $ */
2
3 /*
4 * Copyright (c) 1982, 1986, 1989, 1993
5 * The Regents of the University of California. All rights reserved.
6 * (c) UNIX System Laboratories, Inc.
7 * All or some portions of this file are derived from material licensed
8 * to the University of California by American Telephone and Telegraph
9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
10 * the permission of UNIX System Laboratories, Inc.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 *
36 * @(#)sys_generic.c 8.9 (Berkeley) 2/14/95
37 */
38
39 #include <sys/cdefs.h>
40 __KERNEL_RCSID(0, "$NetBSD: sys_generic.c,v 1.77 2003/08/07 16:31:54 agc Exp $");
41
42 #include "opt_ktrace.h"
43
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #include <sys/filedesc.h>
47 #include <sys/ioctl.h>
48 #include <sys/file.h>
49 #include <sys/proc.h>
50 #include <sys/socketvar.h>
51 #include <sys/signalvar.h>
52 #include <sys/uio.h>
53 #include <sys/kernel.h>
54 #include <sys/stat.h>
55 #include <sys/malloc.h>
56 #include <sys/poll.h>
57 #ifdef KTRACE
58 #include <sys/ktrace.h>
59 #endif
60
61 #include <sys/mount.h>
62 #include <sys/sa.h>
63 #include <sys/syscallargs.h>
64
65 int selscan __P((struct proc *, fd_mask *, fd_mask *, int, register_t *));
66 int pollscan __P((struct proc *, struct pollfd *, int, register_t *));
67
68 /*
69 * Read system call.
70 */
71 /* ARGSUSED */
72 int
73 sys_read(struct lwp *l, void *v, register_t *retval)
74 {
75 struct sys_read_args /* {
76 syscallarg(int) fd;
77 syscallarg(void *) buf;
78 syscallarg(size_t) nbyte;
79 } */ *uap = v;
80 int fd;
81 struct file *fp;
82 struct proc *p;
83 struct filedesc *fdp;
84
85 fd = SCARG(uap, fd);
86 p = l->l_proc;
87 fdp = p->p_fd;
88
89 if ((fp = fd_getfile(fdp, fd)) == NULL)
90 return (EBADF);
91
92 if ((fp->f_flag & FREAD) == 0) {
93 simple_unlock(&fp->f_slock);
94 return (EBADF);
95 }
96
97 FILE_USE(fp);
98
99 /* dofileread() will unuse the descriptor for us */
100 return (dofileread(p, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
101 &fp->f_offset, FOF_UPDATE_OFFSET, retval));
102 }
103
104 int
105 dofileread(struct proc *p, int fd, struct file *fp, void *buf, size_t nbyte,
106 off_t *offset, int flags, register_t *retval)
107 {
108 struct uio auio;
109 struct iovec aiov;
110 size_t cnt;
111 int error;
112 #ifdef KTRACE
113 struct iovec ktriov;
114 #endif
115 error = 0;
116
117 aiov.iov_base = (caddr_t)buf;
118 aiov.iov_len = nbyte;
119 auio.uio_iov = &aiov;
120 auio.uio_iovcnt = 1;
121 auio.uio_resid = nbyte;
122 auio.uio_rw = UIO_READ;
123 auio.uio_segflg = UIO_USERSPACE;
124 auio.uio_procp = p;
125
126 /*
127 * Reads return ssize_t because -1 is returned on error. Therefore
128 * we must restrict the length to SSIZE_MAX to avoid garbage return
129 * values.
130 */
131 if (auio.uio_resid > SSIZE_MAX) {
132 error = EINVAL;
133 goto out;
134 }
135
136 #ifdef KTRACE
137 /*
138 * if tracing, save a copy of iovec
139 */
140 if (KTRPOINT(p, KTR_GENIO))
141 ktriov = aiov;
142 #endif
143 cnt = auio.uio_resid;
144 error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred, flags);
145 if (error)
146 if (auio.uio_resid != cnt && (error == ERESTART ||
147 error == EINTR || error == EWOULDBLOCK))
148 error = 0;
149 cnt -= auio.uio_resid;
150 #ifdef KTRACE
151 if (KTRPOINT(p, KTR_GENIO) && error == 0)
152 ktrgenio(p, fd, UIO_READ, &ktriov, cnt, error);
153 #endif
154 *retval = cnt;
155 out:
156 FILE_UNUSE(fp, p);
157 return (error);
158 }
159
160 /*
161 * Scatter read system call.
162 */
163 int
164 sys_readv(struct lwp *l, void *v, register_t *retval)
165 {
166 struct sys_readv_args /* {
167 syscallarg(int) fd;
168 syscallarg(const struct iovec *) iovp;
169 syscallarg(int) iovcnt;
170 } */ *uap = v;
171 int fd;
172 struct file *fp;
173 struct proc *p;
174 struct filedesc *fdp;
175
176 fd = SCARG(uap, fd);
177 p = l->l_proc;
178 fdp = p->p_fd;
179
180 if ((fp = fd_getfile(fdp, fd)) == NULL)
181 return (EBADF);
182
183 if ((fp->f_flag & FREAD) == 0) {
184 simple_unlock(&fp->f_slock);
185 return (EBADF);
186 }
187
188 FILE_USE(fp);
189
190 /* dofilereadv() will unuse the descriptor for us */
191 return (dofilereadv(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt),
192 &fp->f_offset, FOF_UPDATE_OFFSET, retval));
193 }
194
195 int
196 dofilereadv(struct proc *p, int fd, struct file *fp, const struct iovec *iovp,
197 int iovcnt, off_t *offset, int flags, register_t *retval)
198 {
199 struct uio auio;
200 struct iovec *iov, *needfree, aiov[UIO_SMALLIOV];
201 int i, error;
202 size_t cnt;
203 u_int iovlen;
204 #ifdef KTRACE
205 struct iovec *ktriov;
206 #endif
207
208 error = 0;
209 #ifdef KTRACE
210 ktriov = NULL;
211 #endif
212 /* note: can't use iovlen until iovcnt is validated */
213 iovlen = iovcnt * sizeof(struct iovec);
214 if ((u_int)iovcnt > UIO_SMALLIOV) {
215 if ((u_int)iovcnt > IOV_MAX) {
216 error = EINVAL;
217 goto out;
218 }
219 iov = malloc(iovlen, M_IOV, M_WAITOK);
220 needfree = iov;
221 } else if ((u_int)iovcnt > 0) {
222 iov = aiov;
223 needfree = NULL;
224 } else {
225 error = EINVAL;
226 goto out;
227 }
228
229 auio.uio_iov = iov;
230 auio.uio_iovcnt = iovcnt;
231 auio.uio_rw = UIO_READ;
232 auio.uio_segflg = UIO_USERSPACE;
233 auio.uio_procp = p;
234 error = copyin(iovp, iov, iovlen);
235 if (error)
236 goto done;
237 auio.uio_resid = 0;
238 for (i = 0; i < iovcnt; i++) {
239 auio.uio_resid += iov->iov_len;
240 /*
241 * Reads return ssize_t because -1 is returned on error.
242 * Therefore we must restrict the length to SSIZE_MAX to
243 * avoid garbage return values.
244 */
245 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) {
246 error = EINVAL;
247 goto done;
248 }
249 iov++;
250 }
251 #ifdef KTRACE
252 /*
253 * if tracing, save a copy of iovec
254 */
255 if (KTRPOINT(p, KTR_GENIO)) {
256 ktriov = malloc(iovlen, M_TEMP, M_WAITOK);
257 memcpy((caddr_t)ktriov, (caddr_t)auio.uio_iov, iovlen);
258 }
259 #endif
260 cnt = auio.uio_resid;
261 error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred, flags);
262 if (error)
263 if (auio.uio_resid != cnt && (error == ERESTART ||
264 error == EINTR || error == EWOULDBLOCK))
265 error = 0;
266 cnt -= auio.uio_resid;
267 #ifdef KTRACE
268 if (ktriov != NULL) {
269 if (error == 0)
270 ktrgenio(p, fd, UIO_READ, ktriov, cnt, error);
271 free(ktriov, M_TEMP);
272 }
273 #endif
274 *retval = cnt;
275 done:
276 if (needfree)
277 free(needfree, M_IOV);
278 out:
279 FILE_UNUSE(fp, p);
280 return (error);
281 }
282
283 /*
284 * Write system call
285 */
286 int
287 sys_write(struct lwp *l, void *v, register_t *retval)
288 {
289 struct sys_write_args /* {
290 syscallarg(int) fd;
291 syscallarg(const void *) buf;
292 syscallarg(size_t) nbyte;
293 } */ *uap = v;
294 int fd;
295 struct file *fp;
296 struct proc *p;
297 struct filedesc *fdp;
298
299 fd = SCARG(uap, fd);
300 p = l->l_proc;
301 fdp = p->p_fd;
302
303 if ((fp = fd_getfile(fdp, fd)) == NULL)
304 return (EBADF);
305
306 if ((fp->f_flag & FWRITE) == 0) {
307 simple_unlock(&fp->f_slock);
308 return (EBADF);
309 }
310
311 FILE_USE(fp);
312
313 /* dofilewrite() will unuse the descriptor for us */
314 return (dofilewrite(p, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
315 &fp->f_offset, FOF_UPDATE_OFFSET, retval));
316 }
317
318 int
319 dofilewrite(struct proc *p, int fd, struct file *fp, const void *buf,
320 size_t nbyte, off_t *offset, int flags, register_t *retval)
321 {
322 struct uio auio;
323 struct iovec aiov;
324 size_t cnt;
325 int error;
326 #ifdef KTRACE
327 struct iovec ktriov;
328 #endif
329
330 error = 0;
331 aiov.iov_base = (caddr_t)buf; /* XXX kills const */
332 aiov.iov_len = nbyte;
333 auio.uio_iov = &aiov;
334 auio.uio_iovcnt = 1;
335 auio.uio_resid = nbyte;
336 auio.uio_rw = UIO_WRITE;
337 auio.uio_segflg = UIO_USERSPACE;
338 auio.uio_procp = p;
339
340 /*
341 * Writes return ssize_t because -1 is returned on error. Therefore
342 * we must restrict the length to SSIZE_MAX to avoid garbage return
343 * values.
344 */
345 if (auio.uio_resid > SSIZE_MAX) {
346 error = EINVAL;
347 goto out;
348 }
349
350 #ifdef KTRACE
351 /*
352 * if tracing, save a copy of iovec
353 */
354 if (KTRPOINT(p, KTR_GENIO))
355 ktriov = aiov;
356 #endif
357 cnt = auio.uio_resid;
358 error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred, flags);
359 if (error) {
360 if (auio.uio_resid != cnt && (error == ERESTART ||
361 error == EINTR || error == EWOULDBLOCK))
362 error = 0;
363 if (error == EPIPE)
364 psignal(p, SIGPIPE);
365 }
366 cnt -= auio.uio_resid;
367 #ifdef KTRACE
368 if (KTRPOINT(p, KTR_GENIO) && error == 0)
369 ktrgenio(p, fd, UIO_WRITE, &ktriov, cnt, error);
370 #endif
371 *retval = cnt;
372 out:
373 FILE_UNUSE(fp, p);
374 return (error);
375 }
376
377 /*
378 * Gather write system call
379 */
380 int
381 sys_writev(struct lwp *l, void *v, register_t *retval)
382 {
383 struct sys_writev_args /* {
384 syscallarg(int) fd;
385 syscallarg(const struct iovec *) iovp;
386 syscallarg(int) iovcnt;
387 } */ *uap = v;
388 int fd;
389 struct file *fp;
390 struct proc *p;
391 struct filedesc *fdp;
392
393 fd = SCARG(uap, fd);
394 p = l->l_proc;
395 fdp = p->p_fd;
396
397 if ((fp = fd_getfile(fdp, fd)) == NULL)
398 return (EBADF);
399
400 if ((fp->f_flag & FWRITE) == 0) {
401 simple_unlock(&fp->f_slock);
402 return (EBADF);
403 }
404
405 FILE_USE(fp);
406
407 /* dofilewritev() will unuse the descriptor for us */
408 return (dofilewritev(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt),
409 &fp->f_offset, FOF_UPDATE_OFFSET, retval));
410 }
411
412 int
413 dofilewritev(struct proc *p, int fd, struct file *fp, const struct iovec *iovp,
414 int iovcnt, off_t *offset, int flags, register_t *retval)
415 {
416 struct uio auio;
417 struct iovec *iov, *needfree, aiov[UIO_SMALLIOV];
418 int i, error;
419 size_t cnt;
420 u_int iovlen;
421 #ifdef KTRACE
422 struct iovec *ktriov;
423 #endif
424
425 error = 0;
426 #ifdef KTRACE
427 ktriov = NULL;
428 #endif
429 /* note: can't use iovlen until iovcnt is validated */
430 iovlen = iovcnt * sizeof(struct iovec);
431 if ((u_int)iovcnt > UIO_SMALLIOV) {
432 if ((u_int)iovcnt > IOV_MAX) {
433 error = EINVAL;
434 goto out;
435 }
436 iov = malloc(iovlen, M_IOV, M_WAITOK);
437 needfree = iov;
438 } else if ((u_int)iovcnt > 0) {
439 iov = aiov;
440 needfree = NULL;
441 } else {
442 error = EINVAL;
443 goto out;
444 }
445
446 auio.uio_iov = iov;
447 auio.uio_iovcnt = iovcnt;
448 auio.uio_rw = UIO_WRITE;
449 auio.uio_segflg = UIO_USERSPACE;
450 auio.uio_procp = p;
451 error = copyin(iovp, iov, iovlen);
452 if (error)
453 goto done;
454 auio.uio_resid = 0;
455 for (i = 0; i < iovcnt; i++) {
456 auio.uio_resid += iov->iov_len;
457 /*
458 * Writes return ssize_t because -1 is returned on error.
459 * Therefore we must restrict the length to SSIZE_MAX to
460 * avoid garbage return values.
461 */
462 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) {
463 error = EINVAL;
464 goto done;
465 }
466 iov++;
467 }
468 #ifdef KTRACE
469 /*
470 * if tracing, save a copy of iovec
471 */
472 if (KTRPOINT(p, KTR_GENIO)) {
473 ktriov = malloc(iovlen, M_TEMP, M_WAITOK);
474 memcpy((caddr_t)ktriov, (caddr_t)auio.uio_iov, iovlen);
475 }
476 #endif
477 cnt = auio.uio_resid;
478 error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred, flags);
479 if (error) {
480 if (auio.uio_resid != cnt && (error == ERESTART ||
481 error == EINTR || error == EWOULDBLOCK))
482 error = 0;
483 if (error == EPIPE)
484 psignal(p, SIGPIPE);
485 }
486 cnt -= auio.uio_resid;
487 #ifdef KTRACE
488 if (KTRPOINT(p, KTR_GENIO))
489 if (error == 0) {
490 ktrgenio(p, fd, UIO_WRITE, ktriov, cnt, error);
491 free(ktriov, M_TEMP);
492 }
493 #endif
494 *retval = cnt;
495 done:
496 if (needfree)
497 free(needfree, M_IOV);
498 out:
499 FILE_UNUSE(fp, p);
500 return (error);
501 }
502
503 /*
504 * Ioctl system call
505 */
506 /* ARGSUSED */
507 int
508 sys_ioctl(struct lwp *l, void *v, register_t *retval)
509 {
510 struct sys_ioctl_args /* {
511 syscallarg(int) fd;
512 syscallarg(u_long) com;
513 syscallarg(caddr_t) data;
514 } */ *uap = v;
515 struct file *fp;
516 struct proc *p;
517 struct filedesc *fdp;
518 u_long com;
519 int error;
520 u_int size;
521 caddr_t data, memp;
522 int tmp;
523 #define STK_PARAMS 128
524 u_long stkbuf[STK_PARAMS/sizeof(u_long)];
525
526 error = 0;
527 p = l->l_proc;
528 fdp = p->p_fd;
529
530 if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL)
531 return (EBADF);
532
533 FILE_USE(fp);
534
535 if ((fp->f_flag & (FREAD | FWRITE)) == 0) {
536 error = EBADF;
537 com = 0;
538 goto out;
539 }
540
541 switch (com = SCARG(uap, com)) {
542 case FIONCLEX:
543 fdp->fd_ofileflags[SCARG(uap, fd)] &= ~UF_EXCLOSE;
544 goto out;
545
546 case FIOCLEX:
547 fdp->fd_ofileflags[SCARG(uap, fd)] |= UF_EXCLOSE;
548 goto out;
549 }
550
551 /*
552 * Interpret high order word to find amount of data to be
553 * copied to/from the user's address space.
554 */
555 size = IOCPARM_LEN(com);
556 if (size > IOCPARM_MAX) {
557 error = ENOTTY;
558 goto out;
559 }
560 memp = NULL;
561 if (size > sizeof(stkbuf)) {
562 memp = (caddr_t)malloc((u_long)size, M_IOCTLOPS, M_WAITOK);
563 data = memp;
564 } else
565 data = (caddr_t)stkbuf;
566 if (com&IOC_IN) {
567 if (size) {
568 error = copyin(SCARG(uap, data), data, size);
569 if (error) {
570 if (memp)
571 free(memp, M_IOCTLOPS);
572 goto out;
573 }
574 #ifdef KTRACE
575 if (KTRPOINT(p, KTR_GENIO)) {
576 struct iovec iov;
577 iov.iov_base = SCARG(uap, data);
578 iov.iov_len = size;
579 ktrgenio(p, SCARG(uap, fd), UIO_WRITE, &iov,
580 size, 0);
581 }
582 #endif
583 } else
584 *(caddr_t *)data = SCARG(uap, data);
585 } else if ((com&IOC_OUT) && size)
586 /*
587 * Zero the buffer so the user always
588 * gets back something deterministic.
589 */
590 memset(data, 0, size);
591 else if (com&IOC_VOID)
592 *(caddr_t *)data = SCARG(uap, data);
593
594 switch (com) {
595
596 case FIONBIO:
597 if ((tmp = *(int *)data) != 0)
598 fp->f_flag |= FNONBLOCK;
599 else
600 fp->f_flag &= ~FNONBLOCK;
601 error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p);
602 break;
603
604 case FIOASYNC:
605 if ((tmp = *(int *)data) != 0)
606 fp->f_flag |= FASYNC;
607 else
608 fp->f_flag &= ~FASYNC;
609 error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, (caddr_t)&tmp, p);
610 break;
611
612 case FIOSETOWN:
613 tmp = *(int *)data;
614 if (fp->f_type == DTYPE_SOCKET) {
615 ((struct socket *)fp->f_data)->so_pgid = tmp;
616 error = 0;
617 break;
618 }
619 if (tmp <= 0) {
620 tmp = -tmp;
621 } else {
622 struct proc *p1 = pfind(tmp);
623 if (p1 == 0) {
624 error = ESRCH;
625 break;
626 }
627 tmp = p1->p_pgrp->pg_id;
628 }
629 error = (*fp->f_ops->fo_ioctl)
630 (fp, TIOCSPGRP, (caddr_t)&tmp, p);
631 break;
632
633 case FIOGETOWN:
634 if (fp->f_type == DTYPE_SOCKET) {
635 error = 0;
636 *(int *)data = ((struct socket *)fp->f_data)->so_pgid;
637 break;
638 }
639 error = (*fp->f_ops->fo_ioctl)(fp, TIOCGPGRP, data, p);
640 if (error == 0)
641 *(int *)data = -*(int *)data;
642 break;
643
644 default:
645 error = (*fp->f_ops->fo_ioctl)(fp, com, data, p);
646 /*
647 * Copy any data to user, size was
648 * already set and checked above.
649 */
650 if (error == 0 && (com&IOC_OUT) && size) {
651 error = copyout(data, SCARG(uap, data), size);
652 #ifdef KTRACE
653 if (KTRPOINT(p, KTR_GENIO)) {
654 struct iovec iov;
655 iov.iov_base = SCARG(uap, data);
656 iov.iov_len = size;
657 ktrgenio(p, SCARG(uap, fd), UIO_READ, &iov,
658 size, error);
659 }
660 #endif
661 }
662 break;
663 }
664 if (memp)
665 free(memp, M_IOCTLOPS);
666 out:
667 FILE_UNUSE(fp, p);
668 switch (error) {
669 case -1:
670 printf("sys_ioctl: _IO%s%s('%c', %lu, %lu) returned -1: "
671 "pid=%d comm=%s\n",
672 (com & IOC_IN) ? "W" : "", (com & IOC_OUT) ? "R" : "",
673 (char)IOCGROUP(com), (com & 0xff), IOCPARM_LEN(com),
674 p->p_pid, p->p_comm);
675 /* FALLTHROUGH */
676 case EPASSTHROUGH:
677 error = ENOTTY;
678 /* FALLTHROUGH */
679 default:
680 return (error);
681 }
682 }
683
684 int selwait, nselcoll;
685
686 /*
687 * Select system call.
688 */
689 int
690 sys_select(struct lwp *l, void *v, register_t *retval)
691 {
692 struct sys_select_args /* {
693 syscallarg(int) nd;
694 syscallarg(fd_set *) in;
695 syscallarg(fd_set *) ou;
696 syscallarg(fd_set *) ex;
697 syscallarg(struct timeval *) tv;
698 } */ *uap = v;
699 struct proc *p;
700 caddr_t bits;
701 char smallbits[howmany(FD_SETSIZE, NFDBITS) *
702 sizeof(fd_mask) * 6];
703 struct timeval atv;
704 int s, ncoll, error, timo;
705 size_t ni;
706
707 error = 0;
708 p = l->l_proc;
709 if (SCARG(uap, nd) < 0)
710 return (EINVAL);
711 if (SCARG(uap, nd) > p->p_fd->fd_nfiles) {
712 /* forgiving; slightly wrong */
713 SCARG(uap, nd) = p->p_fd->fd_nfiles;
714 }
715 ni = howmany(SCARG(uap, nd), NFDBITS) * sizeof(fd_mask);
716 if (ni * 6 > sizeof(smallbits))
717 bits = malloc(ni * 6, M_TEMP, M_WAITOK);
718 else
719 bits = smallbits;
720
721 #define getbits(name, x) \
722 if (SCARG(uap, name)) { \
723 error = copyin(SCARG(uap, name), bits + ni * x, ni); \
724 if (error) \
725 goto done; \
726 } else \
727 memset(bits + ni * x, 0, ni);
728 getbits(in, 0);
729 getbits(ou, 1);
730 getbits(ex, 2);
731 #undef getbits
732
733 timo = 0;
734 if (SCARG(uap, tv)) {
735 error = copyin(SCARG(uap, tv), (caddr_t)&atv,
736 sizeof(atv));
737 if (error)
738 goto done;
739 if (itimerfix(&atv)) {
740 error = EINVAL;
741 goto done;
742 }
743 s = splclock();
744 timeradd(&atv, &time, &atv);
745 splx(s);
746 }
747
748 retry:
749 ncoll = nselcoll;
750 l->l_flag |= L_SELECT;
751 error = selscan(p, (fd_mask *)(bits + ni * 0),
752 (fd_mask *)(bits + ni * 3), SCARG(uap, nd), retval);
753 if (error || *retval)
754 goto done;
755 if (SCARG(uap, tv)) {
756 /*
757 * We have to recalculate the timeout on every retry.
758 */
759 timo = hzto(&atv);
760 if (timo <= 0)
761 goto done;
762 }
763 s = splsched();
764 if ((l->l_flag & L_SELECT) == 0 || nselcoll != ncoll) {
765 splx(s);
766 goto retry;
767 }
768 l->l_flag &= ~L_SELECT;
769 error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "select", timo);
770 splx(s);
771 if (error == 0)
772 goto retry;
773 done:
774 l->l_flag &= ~L_SELECT;
775 /* select is not restarted after signals... */
776 if (error == ERESTART)
777 error = EINTR;
778 if (error == EWOULDBLOCK)
779 error = 0;
780 if (error == 0) {
781
782 #define putbits(name, x) \
783 if (SCARG(uap, name)) { \
784 error = copyout(bits + ni * x, SCARG(uap, name), ni); \
785 if (error) \
786 goto out; \
787 }
788 putbits(in, 3);
789 putbits(ou, 4);
790 putbits(ex, 5);
791 #undef putbits
792 }
793 out:
794 if (ni * 6 > sizeof(smallbits))
795 free(bits, M_TEMP);
796 return (error);
797 }
798
799 int
800 selscan(struct proc *p, fd_mask *ibitp, fd_mask *obitp, int nfd,
801 register_t *retval)
802 {
803 struct filedesc *fdp;
804 int msk, i, j, fd, n;
805 fd_mask ibits, obits;
806 struct file *fp;
807 static const int flag[3] = { POLLRDNORM | POLLHUP | POLLERR,
808 POLLWRNORM | POLLHUP | POLLERR,
809 POLLRDBAND };
810
811 fdp = p->p_fd;
812 n = 0;
813 for (msk = 0; msk < 3; msk++) {
814 for (i = 0; i < nfd; i += NFDBITS) {
815 ibits = *ibitp++;
816 obits = 0;
817 while ((j = ffs(ibits)) && (fd = i + --j) < nfd) {
818 ibits &= ~(1 << j);
819 if ((fp = fd_getfile(fdp, fd)) == NULL)
820 return (EBADF);
821 FILE_USE(fp);
822 if ((*fp->f_ops->fo_poll)(fp, flag[msk], p)) {
823 obits |= (1 << j);
824 n++;
825 }
826 FILE_UNUSE(fp, p);
827 }
828 *obitp++ = obits;
829 }
830 }
831 *retval = n;
832 return (0);
833 }
834
835 /*
836 * Poll system call.
837 */
838 int
839 sys_poll(struct lwp *l, void *v, register_t *retval)
840 {
841 struct sys_poll_args /* {
842 syscallarg(struct pollfd *) fds;
843 syscallarg(u_int) nfds;
844 syscallarg(int) timeout;
845 } */ *uap = v;
846 struct proc *p;
847 caddr_t bits;
848 char smallbits[32 * sizeof(struct pollfd)];
849 struct timeval atv;
850 int s, ncoll, error, timo;
851 size_t ni;
852
853 error = 0;
854 p = l->l_proc;
855 if (SCARG(uap, nfds) > p->p_fd->fd_nfiles) {
856 /* forgiving; slightly wrong */
857 SCARG(uap, nfds) = p->p_fd->fd_nfiles;
858 }
859 ni = SCARG(uap, nfds) * sizeof(struct pollfd);
860 if (ni > sizeof(smallbits))
861 bits = malloc(ni, M_TEMP, M_WAITOK);
862 else
863 bits = smallbits;
864
865 error = copyin(SCARG(uap, fds), bits, ni);
866 if (error)
867 goto done;
868
869 timo = 0;
870 if (SCARG(uap, timeout) != INFTIM) {
871 atv.tv_sec = SCARG(uap, timeout) / 1000;
872 atv.tv_usec = (SCARG(uap, timeout) % 1000) * 1000;
873 if (itimerfix(&atv)) {
874 error = EINVAL;
875 goto done;
876 }
877 s = splclock();
878 timeradd(&atv, &time, &atv);
879 splx(s);
880 }
881
882 retry:
883 ncoll = nselcoll;
884 l->l_flag |= L_SELECT;
885 error = pollscan(p, (struct pollfd *)bits, SCARG(uap, nfds), retval);
886 if (error || *retval)
887 goto done;
888 if (SCARG(uap, timeout) != INFTIM) {
889 /*
890 * We have to recalculate the timeout on every retry.
891 */
892 timo = hzto(&atv);
893 if (timo <= 0)
894 goto done;
895 }
896 s = splsched();
897 if ((l->l_flag & L_SELECT) == 0 || nselcoll != ncoll) {
898 splx(s);
899 goto retry;
900 }
901 l->l_flag &= ~L_SELECT;
902 error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "select", timo);
903 splx(s);
904 if (error == 0)
905 goto retry;
906 done:
907 l->l_flag &= ~L_SELECT;
908 /* poll is not restarted after signals... */
909 if (error == ERESTART)
910 error = EINTR;
911 if (error == EWOULDBLOCK)
912 error = 0;
913 if (error == 0) {
914 error = copyout(bits, SCARG(uap, fds), ni);
915 if (error)
916 goto out;
917 }
918 out:
919 if (ni > sizeof(smallbits))
920 free(bits, M_TEMP);
921 return (error);
922 }
923
924 int
925 pollscan(struct proc *p, struct pollfd *fds, int nfd, register_t *retval)
926 {
927 struct filedesc *fdp;
928 int i, n;
929 struct file *fp;
930
931 fdp = p->p_fd;
932 n = 0;
933 for (i = 0; i < nfd; i++, fds++) {
934 if (fds->fd >= fdp->fd_nfiles) {
935 fds->revents = POLLNVAL;
936 n++;
937 } else if (fds->fd < 0) {
938 fds->revents = 0;
939 } else {
940 if ((fp = fd_getfile(fdp, fds->fd)) == NULL) {
941 fds->revents = POLLNVAL;
942 n++;
943 } else {
944 FILE_USE(fp);
945 fds->revents = (*fp->f_ops->fo_poll)(fp,
946 fds->events | POLLERR | POLLHUP, p);
947 if (fds->revents != 0)
948 n++;
949 FILE_UNUSE(fp, p);
950 }
951 }
952 }
953 *retval = n;
954 return (0);
955 }
956
957 /*ARGSUSED*/
958 int
959 seltrue(dev_t dev, int events, struct proc *p)
960 {
961
962 return (events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM));
963 }
964
965 /*
966 * Record a select request.
967 */
968 void
969 selrecord(struct proc *selector, struct selinfo *sip)
970 {
971 struct lwp *l;
972 struct proc *p;
973 pid_t mypid;
974
975 mypid = selector->p_pid;
976 if (sip->sel_pid == mypid)
977 return;
978 if (sip->sel_pid && (p = pfind(sip->sel_pid))) {
979 LIST_FOREACH(l, &p->p_lwps, l_sibling) {
980 if (l->l_wchan == (caddr_t)&selwait) {
981 sip->sel_collision = 1;
982 return;
983 }
984 }
985 }
986
987 sip->sel_pid = mypid;
988 }
989
990 /*
991 * Do a wakeup when a selectable event occurs.
992 */
993 void
994 selwakeup(sip)
995 struct selinfo *sip;
996 {
997 struct lwp *l;
998 struct proc *p;
999 int s;
1000
1001 if (sip->sel_pid == 0)
1002 return;
1003 if (sip->sel_collision) {
1004 sip->sel_pid = 0;
1005 nselcoll++;
1006 sip->sel_collision = 0;
1007 wakeup((caddr_t)&selwait);
1008 return;
1009 }
1010 p = pfind(sip->sel_pid);
1011 sip->sel_pid = 0;
1012 if (p != NULL) {
1013 LIST_FOREACH(l, &p->p_lwps, l_sibling) {
1014 SCHED_LOCK(s);
1015 if (l->l_wchan == (caddr_t)&selwait) {
1016 if (l->l_stat == LSSLEEP)
1017 setrunnable(l);
1018 else
1019 unsleep(l);
1020 } else if (l->l_flag & L_SELECT)
1021 l->l_flag &= ~L_SELECT;
1022 SCHED_UNLOCK(s);
1023 }
1024 }
1025 }
1026