sys_generic.c revision 1.76 1 /* $NetBSD: sys_generic.c,v 1.76 2003/06/29 22:31:25 fvdl Exp $ */
2
3 /*
4 * Copyright (c) 1982, 1986, 1989, 1993
5 * The Regents of the University of California. All rights reserved.
6 * (c) UNIX System Laboratories, Inc.
7 * All or some portions of this file are derived from material licensed
8 * to the University of California by American Telephone and Telegraph
9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
10 * the permission of UNIX System Laboratories, Inc.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. All advertising materials mentioning features or use of this software
21 * must display the following acknowledgement:
22 * This product includes software developed by the University of
23 * California, Berkeley and its contributors.
24 * 4. Neither the name of the University nor the names of its contributors
25 * may be used to endorse or promote products derived from this software
26 * without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
29 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
32 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
33 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
34 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
35 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
37 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38 * SUCH DAMAGE.
39 *
40 * @(#)sys_generic.c 8.9 (Berkeley) 2/14/95
41 */
42
43 #include <sys/cdefs.h>
44 __KERNEL_RCSID(0, "$NetBSD: sys_generic.c,v 1.76 2003/06/29 22:31:25 fvdl Exp $");
45
46 #include "opt_ktrace.h"
47
48 #include <sys/param.h>
49 #include <sys/systm.h>
50 #include <sys/filedesc.h>
51 #include <sys/ioctl.h>
52 #include <sys/file.h>
53 #include <sys/proc.h>
54 #include <sys/socketvar.h>
55 #include <sys/signalvar.h>
56 #include <sys/uio.h>
57 #include <sys/kernel.h>
58 #include <sys/stat.h>
59 #include <sys/malloc.h>
60 #include <sys/poll.h>
61 #ifdef KTRACE
62 #include <sys/ktrace.h>
63 #endif
64
65 #include <sys/mount.h>
66 #include <sys/sa.h>
67 #include <sys/syscallargs.h>
68
69 int selscan __P((struct proc *, fd_mask *, fd_mask *, int, register_t *));
70 int pollscan __P((struct proc *, struct pollfd *, int, register_t *));
71
72 /*
73 * Read system call.
74 */
75 /* ARGSUSED */
76 int
77 sys_read(struct lwp *l, void *v, register_t *retval)
78 {
79 struct sys_read_args /* {
80 syscallarg(int) fd;
81 syscallarg(void *) buf;
82 syscallarg(size_t) nbyte;
83 } */ *uap = v;
84 int fd;
85 struct file *fp;
86 struct proc *p;
87 struct filedesc *fdp;
88
89 fd = SCARG(uap, fd);
90 p = l->l_proc;
91 fdp = p->p_fd;
92
93 if ((fp = fd_getfile(fdp, fd)) == NULL)
94 return (EBADF);
95
96 if ((fp->f_flag & FREAD) == 0) {
97 simple_unlock(&fp->f_slock);
98 return (EBADF);
99 }
100
101 FILE_USE(fp);
102
103 /* dofileread() will unuse the descriptor for us */
104 return (dofileread(p, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
105 &fp->f_offset, FOF_UPDATE_OFFSET, retval));
106 }
107
108 int
109 dofileread(struct proc *p, int fd, struct file *fp, void *buf, size_t nbyte,
110 off_t *offset, int flags, register_t *retval)
111 {
112 struct uio auio;
113 struct iovec aiov;
114 size_t cnt;
115 int error;
116 #ifdef KTRACE
117 struct iovec ktriov;
118 #endif
119 error = 0;
120
121 aiov.iov_base = (caddr_t)buf;
122 aiov.iov_len = nbyte;
123 auio.uio_iov = &aiov;
124 auio.uio_iovcnt = 1;
125 auio.uio_resid = nbyte;
126 auio.uio_rw = UIO_READ;
127 auio.uio_segflg = UIO_USERSPACE;
128 auio.uio_procp = p;
129
130 /*
131 * Reads return ssize_t because -1 is returned on error. Therefore
132 * we must restrict the length to SSIZE_MAX to avoid garbage return
133 * values.
134 */
135 if (auio.uio_resid > SSIZE_MAX) {
136 error = EINVAL;
137 goto out;
138 }
139
140 #ifdef KTRACE
141 /*
142 * if tracing, save a copy of iovec
143 */
144 if (KTRPOINT(p, KTR_GENIO))
145 ktriov = aiov;
146 #endif
147 cnt = auio.uio_resid;
148 error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred, flags);
149 if (error)
150 if (auio.uio_resid != cnt && (error == ERESTART ||
151 error == EINTR || error == EWOULDBLOCK))
152 error = 0;
153 cnt -= auio.uio_resid;
154 #ifdef KTRACE
155 if (KTRPOINT(p, KTR_GENIO) && error == 0)
156 ktrgenio(p, fd, UIO_READ, &ktriov, cnt, error);
157 #endif
158 *retval = cnt;
159 out:
160 FILE_UNUSE(fp, p);
161 return (error);
162 }
163
164 /*
165 * Scatter read system call.
166 */
167 int
168 sys_readv(struct lwp *l, void *v, register_t *retval)
169 {
170 struct sys_readv_args /* {
171 syscallarg(int) fd;
172 syscallarg(const struct iovec *) iovp;
173 syscallarg(int) iovcnt;
174 } */ *uap = v;
175 int fd;
176 struct file *fp;
177 struct proc *p;
178 struct filedesc *fdp;
179
180 fd = SCARG(uap, fd);
181 p = l->l_proc;
182 fdp = p->p_fd;
183
184 if ((fp = fd_getfile(fdp, fd)) == NULL)
185 return (EBADF);
186
187 if ((fp->f_flag & FREAD) == 0) {
188 simple_unlock(&fp->f_slock);
189 return (EBADF);
190 }
191
192 FILE_USE(fp);
193
194 /* dofilereadv() will unuse the descriptor for us */
195 return (dofilereadv(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt),
196 &fp->f_offset, FOF_UPDATE_OFFSET, retval));
197 }
198
199 int
200 dofilereadv(struct proc *p, int fd, struct file *fp, const struct iovec *iovp,
201 int iovcnt, off_t *offset, int flags, register_t *retval)
202 {
203 struct uio auio;
204 struct iovec *iov, *needfree, aiov[UIO_SMALLIOV];
205 int i, error;
206 size_t cnt;
207 u_int iovlen;
208 #ifdef KTRACE
209 struct iovec *ktriov;
210 #endif
211
212 error = 0;
213 #ifdef KTRACE
214 ktriov = NULL;
215 #endif
216 /* note: can't use iovlen until iovcnt is validated */
217 iovlen = iovcnt * sizeof(struct iovec);
218 if ((u_int)iovcnt > UIO_SMALLIOV) {
219 if ((u_int)iovcnt > IOV_MAX) {
220 error = EINVAL;
221 goto out;
222 }
223 iov = malloc(iovlen, M_IOV, M_WAITOK);
224 needfree = iov;
225 } else if ((u_int)iovcnt > 0) {
226 iov = aiov;
227 needfree = NULL;
228 } else {
229 error = EINVAL;
230 goto out;
231 }
232
233 auio.uio_iov = iov;
234 auio.uio_iovcnt = iovcnt;
235 auio.uio_rw = UIO_READ;
236 auio.uio_segflg = UIO_USERSPACE;
237 auio.uio_procp = p;
238 error = copyin(iovp, iov, iovlen);
239 if (error)
240 goto done;
241 auio.uio_resid = 0;
242 for (i = 0; i < iovcnt; i++) {
243 auio.uio_resid += iov->iov_len;
244 /*
245 * Reads return ssize_t because -1 is returned on error.
246 * Therefore we must restrict the length to SSIZE_MAX to
247 * avoid garbage return values.
248 */
249 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) {
250 error = EINVAL;
251 goto done;
252 }
253 iov++;
254 }
255 #ifdef KTRACE
256 /*
257 * if tracing, save a copy of iovec
258 */
259 if (KTRPOINT(p, KTR_GENIO)) {
260 ktriov = malloc(iovlen, M_TEMP, M_WAITOK);
261 memcpy((caddr_t)ktriov, (caddr_t)auio.uio_iov, iovlen);
262 }
263 #endif
264 cnt = auio.uio_resid;
265 error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred, flags);
266 if (error)
267 if (auio.uio_resid != cnt && (error == ERESTART ||
268 error == EINTR || error == EWOULDBLOCK))
269 error = 0;
270 cnt -= auio.uio_resid;
271 #ifdef KTRACE
272 if (ktriov != NULL) {
273 if (error == 0)
274 ktrgenio(p, fd, UIO_READ, ktriov, cnt, error);
275 free(ktriov, M_TEMP);
276 }
277 #endif
278 *retval = cnt;
279 done:
280 if (needfree)
281 free(needfree, M_IOV);
282 out:
283 FILE_UNUSE(fp, p);
284 return (error);
285 }
286
287 /*
288 * Write system call
289 */
290 int
291 sys_write(struct lwp *l, void *v, register_t *retval)
292 {
293 struct sys_write_args /* {
294 syscallarg(int) fd;
295 syscallarg(const void *) buf;
296 syscallarg(size_t) nbyte;
297 } */ *uap = v;
298 int fd;
299 struct file *fp;
300 struct proc *p;
301 struct filedesc *fdp;
302
303 fd = SCARG(uap, fd);
304 p = l->l_proc;
305 fdp = p->p_fd;
306
307 if ((fp = fd_getfile(fdp, fd)) == NULL)
308 return (EBADF);
309
310 if ((fp->f_flag & FWRITE) == 0) {
311 simple_unlock(&fp->f_slock);
312 return (EBADF);
313 }
314
315 FILE_USE(fp);
316
317 /* dofilewrite() will unuse the descriptor for us */
318 return (dofilewrite(p, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
319 &fp->f_offset, FOF_UPDATE_OFFSET, retval));
320 }
321
322 int
323 dofilewrite(struct proc *p, int fd, struct file *fp, const void *buf,
324 size_t nbyte, off_t *offset, int flags, register_t *retval)
325 {
326 struct uio auio;
327 struct iovec aiov;
328 size_t cnt;
329 int error;
330 #ifdef KTRACE
331 struct iovec ktriov;
332 #endif
333
334 error = 0;
335 aiov.iov_base = (caddr_t)buf; /* XXX kills const */
336 aiov.iov_len = nbyte;
337 auio.uio_iov = &aiov;
338 auio.uio_iovcnt = 1;
339 auio.uio_resid = nbyte;
340 auio.uio_rw = UIO_WRITE;
341 auio.uio_segflg = UIO_USERSPACE;
342 auio.uio_procp = p;
343
344 /*
345 * Writes return ssize_t because -1 is returned on error. Therefore
346 * we must restrict the length to SSIZE_MAX to avoid garbage return
347 * values.
348 */
349 if (auio.uio_resid > SSIZE_MAX) {
350 error = EINVAL;
351 goto out;
352 }
353
354 #ifdef KTRACE
355 /*
356 * if tracing, save a copy of iovec
357 */
358 if (KTRPOINT(p, KTR_GENIO))
359 ktriov = aiov;
360 #endif
361 cnt = auio.uio_resid;
362 error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred, flags);
363 if (error) {
364 if (auio.uio_resid != cnt && (error == ERESTART ||
365 error == EINTR || error == EWOULDBLOCK))
366 error = 0;
367 if (error == EPIPE)
368 psignal(p, SIGPIPE);
369 }
370 cnt -= auio.uio_resid;
371 #ifdef KTRACE
372 if (KTRPOINT(p, KTR_GENIO) && error == 0)
373 ktrgenio(p, fd, UIO_WRITE, &ktriov, cnt, error);
374 #endif
375 *retval = cnt;
376 out:
377 FILE_UNUSE(fp, p);
378 return (error);
379 }
380
381 /*
382 * Gather write system call
383 */
384 int
385 sys_writev(struct lwp *l, void *v, register_t *retval)
386 {
387 struct sys_writev_args /* {
388 syscallarg(int) fd;
389 syscallarg(const struct iovec *) iovp;
390 syscallarg(int) iovcnt;
391 } */ *uap = v;
392 int fd;
393 struct file *fp;
394 struct proc *p;
395 struct filedesc *fdp;
396
397 fd = SCARG(uap, fd);
398 p = l->l_proc;
399 fdp = p->p_fd;
400
401 if ((fp = fd_getfile(fdp, fd)) == NULL)
402 return (EBADF);
403
404 if ((fp->f_flag & FWRITE) == 0) {
405 simple_unlock(&fp->f_slock);
406 return (EBADF);
407 }
408
409 FILE_USE(fp);
410
411 /* dofilewritev() will unuse the descriptor for us */
412 return (dofilewritev(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt),
413 &fp->f_offset, FOF_UPDATE_OFFSET, retval));
414 }
415
416 int
417 dofilewritev(struct proc *p, int fd, struct file *fp, const struct iovec *iovp,
418 int iovcnt, off_t *offset, int flags, register_t *retval)
419 {
420 struct uio auio;
421 struct iovec *iov, *needfree, aiov[UIO_SMALLIOV];
422 int i, error;
423 size_t cnt;
424 u_int iovlen;
425 #ifdef KTRACE
426 struct iovec *ktriov;
427 #endif
428
429 error = 0;
430 #ifdef KTRACE
431 ktriov = NULL;
432 #endif
433 /* note: can't use iovlen until iovcnt is validated */
434 iovlen = iovcnt * sizeof(struct iovec);
435 if ((u_int)iovcnt > UIO_SMALLIOV) {
436 if ((u_int)iovcnt > IOV_MAX) {
437 error = EINVAL;
438 goto out;
439 }
440 iov = malloc(iovlen, M_IOV, M_WAITOK);
441 needfree = iov;
442 } else if ((u_int)iovcnt > 0) {
443 iov = aiov;
444 needfree = NULL;
445 } else {
446 error = EINVAL;
447 goto out;
448 }
449
450 auio.uio_iov = iov;
451 auio.uio_iovcnt = iovcnt;
452 auio.uio_rw = UIO_WRITE;
453 auio.uio_segflg = UIO_USERSPACE;
454 auio.uio_procp = p;
455 error = copyin(iovp, iov, iovlen);
456 if (error)
457 goto done;
458 auio.uio_resid = 0;
459 for (i = 0; i < iovcnt; i++) {
460 auio.uio_resid += iov->iov_len;
461 /*
462 * Writes return ssize_t because -1 is returned on error.
463 * Therefore we must restrict the length to SSIZE_MAX to
464 * avoid garbage return values.
465 */
466 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) {
467 error = EINVAL;
468 goto done;
469 }
470 iov++;
471 }
472 #ifdef KTRACE
473 /*
474 * if tracing, save a copy of iovec
475 */
476 if (KTRPOINT(p, KTR_GENIO)) {
477 ktriov = malloc(iovlen, M_TEMP, M_WAITOK);
478 memcpy((caddr_t)ktriov, (caddr_t)auio.uio_iov, iovlen);
479 }
480 #endif
481 cnt = auio.uio_resid;
482 error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred, flags);
483 if (error) {
484 if (auio.uio_resid != cnt && (error == ERESTART ||
485 error == EINTR || error == EWOULDBLOCK))
486 error = 0;
487 if (error == EPIPE)
488 psignal(p, SIGPIPE);
489 }
490 cnt -= auio.uio_resid;
491 #ifdef KTRACE
492 if (KTRPOINT(p, KTR_GENIO))
493 if (error == 0) {
494 ktrgenio(p, fd, UIO_WRITE, ktriov, cnt, error);
495 free(ktriov, M_TEMP);
496 }
497 #endif
498 *retval = cnt;
499 done:
500 if (needfree)
501 free(needfree, M_IOV);
502 out:
503 FILE_UNUSE(fp, p);
504 return (error);
505 }
506
507 /*
508 * Ioctl system call
509 */
510 /* ARGSUSED */
511 int
512 sys_ioctl(struct lwp *l, void *v, register_t *retval)
513 {
514 struct sys_ioctl_args /* {
515 syscallarg(int) fd;
516 syscallarg(u_long) com;
517 syscallarg(caddr_t) data;
518 } */ *uap = v;
519 struct file *fp;
520 struct proc *p;
521 struct filedesc *fdp;
522 u_long com;
523 int error;
524 u_int size;
525 caddr_t data, memp;
526 int tmp;
527 #define STK_PARAMS 128
528 u_long stkbuf[STK_PARAMS/sizeof(u_long)];
529
530 error = 0;
531 p = l->l_proc;
532 fdp = p->p_fd;
533
534 if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL)
535 return (EBADF);
536
537 FILE_USE(fp);
538
539 if ((fp->f_flag & (FREAD | FWRITE)) == 0) {
540 error = EBADF;
541 com = 0;
542 goto out;
543 }
544
545 switch (com = SCARG(uap, com)) {
546 case FIONCLEX:
547 fdp->fd_ofileflags[SCARG(uap, fd)] &= ~UF_EXCLOSE;
548 goto out;
549
550 case FIOCLEX:
551 fdp->fd_ofileflags[SCARG(uap, fd)] |= UF_EXCLOSE;
552 goto out;
553 }
554
555 /*
556 * Interpret high order word to find amount of data to be
557 * copied to/from the user's address space.
558 */
559 size = IOCPARM_LEN(com);
560 if (size > IOCPARM_MAX) {
561 error = ENOTTY;
562 goto out;
563 }
564 memp = NULL;
565 if (size > sizeof(stkbuf)) {
566 memp = (caddr_t)malloc((u_long)size, M_IOCTLOPS, M_WAITOK);
567 data = memp;
568 } else
569 data = (caddr_t)stkbuf;
570 if (com&IOC_IN) {
571 if (size) {
572 error = copyin(SCARG(uap, data), data, size);
573 if (error) {
574 if (memp)
575 free(memp, M_IOCTLOPS);
576 goto out;
577 }
578 #ifdef KTRACE
579 if (KTRPOINT(p, KTR_GENIO)) {
580 struct iovec iov;
581 iov.iov_base = SCARG(uap, data);
582 iov.iov_len = size;
583 ktrgenio(p, SCARG(uap, fd), UIO_WRITE, &iov,
584 size, 0);
585 }
586 #endif
587 } else
588 *(caddr_t *)data = SCARG(uap, data);
589 } else if ((com&IOC_OUT) && size)
590 /*
591 * Zero the buffer so the user always
592 * gets back something deterministic.
593 */
594 memset(data, 0, size);
595 else if (com&IOC_VOID)
596 *(caddr_t *)data = SCARG(uap, data);
597
598 switch (com) {
599
600 case FIONBIO:
601 if ((tmp = *(int *)data) != 0)
602 fp->f_flag |= FNONBLOCK;
603 else
604 fp->f_flag &= ~FNONBLOCK;
605 error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p);
606 break;
607
608 case FIOASYNC:
609 if ((tmp = *(int *)data) != 0)
610 fp->f_flag |= FASYNC;
611 else
612 fp->f_flag &= ~FASYNC;
613 error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, (caddr_t)&tmp, p);
614 break;
615
616 case FIOSETOWN:
617 tmp = *(int *)data;
618 if (fp->f_type == DTYPE_SOCKET) {
619 ((struct socket *)fp->f_data)->so_pgid = tmp;
620 error = 0;
621 break;
622 }
623 if (tmp <= 0) {
624 tmp = -tmp;
625 } else {
626 struct proc *p1 = pfind(tmp);
627 if (p1 == 0) {
628 error = ESRCH;
629 break;
630 }
631 tmp = p1->p_pgrp->pg_id;
632 }
633 error = (*fp->f_ops->fo_ioctl)
634 (fp, TIOCSPGRP, (caddr_t)&tmp, p);
635 break;
636
637 case FIOGETOWN:
638 if (fp->f_type == DTYPE_SOCKET) {
639 error = 0;
640 *(int *)data = ((struct socket *)fp->f_data)->so_pgid;
641 break;
642 }
643 error = (*fp->f_ops->fo_ioctl)(fp, TIOCGPGRP, data, p);
644 if (error == 0)
645 *(int *)data = -*(int *)data;
646 break;
647
648 default:
649 error = (*fp->f_ops->fo_ioctl)(fp, com, data, p);
650 /*
651 * Copy any data to user, size was
652 * already set and checked above.
653 */
654 if (error == 0 && (com&IOC_OUT) && size) {
655 error = copyout(data, SCARG(uap, data), size);
656 #ifdef KTRACE
657 if (KTRPOINT(p, KTR_GENIO)) {
658 struct iovec iov;
659 iov.iov_base = SCARG(uap, data);
660 iov.iov_len = size;
661 ktrgenio(p, SCARG(uap, fd), UIO_READ, &iov,
662 size, error);
663 }
664 #endif
665 }
666 break;
667 }
668 if (memp)
669 free(memp, M_IOCTLOPS);
670 out:
671 FILE_UNUSE(fp, p);
672 switch (error) {
673 case -1:
674 printf("sys_ioctl: _IO%s%s('%c', %lu, %lu) returned -1: "
675 "pid=%d comm=%s\n",
676 (com & IOC_IN) ? "W" : "", (com & IOC_OUT) ? "R" : "",
677 (char)IOCGROUP(com), (com & 0xff), IOCPARM_LEN(com),
678 p->p_pid, p->p_comm);
679 /* FALLTHROUGH */
680 case EPASSTHROUGH:
681 error = ENOTTY;
682 /* FALLTHROUGH */
683 default:
684 return (error);
685 }
686 }
687
688 int selwait, nselcoll;
689
690 /*
691 * Select system call.
692 */
693 int
694 sys_select(struct lwp *l, void *v, register_t *retval)
695 {
696 struct sys_select_args /* {
697 syscallarg(int) nd;
698 syscallarg(fd_set *) in;
699 syscallarg(fd_set *) ou;
700 syscallarg(fd_set *) ex;
701 syscallarg(struct timeval *) tv;
702 } */ *uap = v;
703 struct proc *p;
704 caddr_t bits;
705 char smallbits[howmany(FD_SETSIZE, NFDBITS) *
706 sizeof(fd_mask) * 6];
707 struct timeval atv;
708 int s, ncoll, error, timo;
709 size_t ni;
710
711 error = 0;
712 p = l->l_proc;
713 if (SCARG(uap, nd) < 0)
714 return (EINVAL);
715 if (SCARG(uap, nd) > p->p_fd->fd_nfiles) {
716 /* forgiving; slightly wrong */
717 SCARG(uap, nd) = p->p_fd->fd_nfiles;
718 }
719 ni = howmany(SCARG(uap, nd), NFDBITS) * sizeof(fd_mask);
720 if (ni * 6 > sizeof(smallbits))
721 bits = malloc(ni * 6, M_TEMP, M_WAITOK);
722 else
723 bits = smallbits;
724
725 #define getbits(name, x) \
726 if (SCARG(uap, name)) { \
727 error = copyin(SCARG(uap, name), bits + ni * x, ni); \
728 if (error) \
729 goto done; \
730 } else \
731 memset(bits + ni * x, 0, ni);
732 getbits(in, 0);
733 getbits(ou, 1);
734 getbits(ex, 2);
735 #undef getbits
736
737 timo = 0;
738 if (SCARG(uap, tv)) {
739 error = copyin(SCARG(uap, tv), (caddr_t)&atv,
740 sizeof(atv));
741 if (error)
742 goto done;
743 if (itimerfix(&atv)) {
744 error = EINVAL;
745 goto done;
746 }
747 s = splclock();
748 timeradd(&atv, &time, &atv);
749 splx(s);
750 }
751
752 retry:
753 ncoll = nselcoll;
754 l->l_flag |= L_SELECT;
755 error = selscan(p, (fd_mask *)(bits + ni * 0),
756 (fd_mask *)(bits + ni * 3), SCARG(uap, nd), retval);
757 if (error || *retval)
758 goto done;
759 if (SCARG(uap, tv)) {
760 /*
761 * We have to recalculate the timeout on every retry.
762 */
763 timo = hzto(&atv);
764 if (timo <= 0)
765 goto done;
766 }
767 s = splsched();
768 if ((l->l_flag & L_SELECT) == 0 || nselcoll != ncoll) {
769 splx(s);
770 goto retry;
771 }
772 l->l_flag &= ~L_SELECT;
773 error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "select", timo);
774 splx(s);
775 if (error == 0)
776 goto retry;
777 done:
778 l->l_flag &= ~L_SELECT;
779 /* select is not restarted after signals... */
780 if (error == ERESTART)
781 error = EINTR;
782 if (error == EWOULDBLOCK)
783 error = 0;
784 if (error == 0) {
785
786 #define putbits(name, x) \
787 if (SCARG(uap, name)) { \
788 error = copyout(bits + ni * x, SCARG(uap, name), ni); \
789 if (error) \
790 goto out; \
791 }
792 putbits(in, 3);
793 putbits(ou, 4);
794 putbits(ex, 5);
795 #undef putbits
796 }
797 out:
798 if (ni * 6 > sizeof(smallbits))
799 free(bits, M_TEMP);
800 return (error);
801 }
802
803 int
804 selscan(struct proc *p, fd_mask *ibitp, fd_mask *obitp, int nfd,
805 register_t *retval)
806 {
807 struct filedesc *fdp;
808 int msk, i, j, fd, n;
809 fd_mask ibits, obits;
810 struct file *fp;
811 static const int flag[3] = { POLLRDNORM | POLLHUP | POLLERR,
812 POLLWRNORM | POLLHUP | POLLERR,
813 POLLRDBAND };
814
815 fdp = p->p_fd;
816 n = 0;
817 for (msk = 0; msk < 3; msk++) {
818 for (i = 0; i < nfd; i += NFDBITS) {
819 ibits = *ibitp++;
820 obits = 0;
821 while ((j = ffs(ibits)) && (fd = i + --j) < nfd) {
822 ibits &= ~(1 << j);
823 if ((fp = fd_getfile(fdp, fd)) == NULL)
824 return (EBADF);
825 FILE_USE(fp);
826 if ((*fp->f_ops->fo_poll)(fp, flag[msk], p)) {
827 obits |= (1 << j);
828 n++;
829 }
830 FILE_UNUSE(fp, p);
831 }
832 *obitp++ = obits;
833 }
834 }
835 *retval = n;
836 return (0);
837 }
838
839 /*
840 * Poll system call.
841 */
842 int
843 sys_poll(struct lwp *l, void *v, register_t *retval)
844 {
845 struct sys_poll_args /* {
846 syscallarg(struct pollfd *) fds;
847 syscallarg(u_int) nfds;
848 syscallarg(int) timeout;
849 } */ *uap = v;
850 struct proc *p;
851 caddr_t bits;
852 char smallbits[32 * sizeof(struct pollfd)];
853 struct timeval atv;
854 int s, ncoll, error, timo;
855 size_t ni;
856
857 error = 0;
858 p = l->l_proc;
859 if (SCARG(uap, nfds) > p->p_fd->fd_nfiles) {
860 /* forgiving; slightly wrong */
861 SCARG(uap, nfds) = p->p_fd->fd_nfiles;
862 }
863 ni = SCARG(uap, nfds) * sizeof(struct pollfd);
864 if (ni > sizeof(smallbits))
865 bits = malloc(ni, M_TEMP, M_WAITOK);
866 else
867 bits = smallbits;
868
869 error = copyin(SCARG(uap, fds), bits, ni);
870 if (error)
871 goto done;
872
873 timo = 0;
874 if (SCARG(uap, timeout) != INFTIM) {
875 atv.tv_sec = SCARG(uap, timeout) / 1000;
876 atv.tv_usec = (SCARG(uap, timeout) % 1000) * 1000;
877 if (itimerfix(&atv)) {
878 error = EINVAL;
879 goto done;
880 }
881 s = splclock();
882 timeradd(&atv, &time, &atv);
883 splx(s);
884 }
885
886 retry:
887 ncoll = nselcoll;
888 l->l_flag |= L_SELECT;
889 error = pollscan(p, (struct pollfd *)bits, SCARG(uap, nfds), retval);
890 if (error || *retval)
891 goto done;
892 if (SCARG(uap, timeout) != INFTIM) {
893 /*
894 * We have to recalculate the timeout on every retry.
895 */
896 timo = hzto(&atv);
897 if (timo <= 0)
898 goto done;
899 }
900 s = splsched();
901 if ((l->l_flag & L_SELECT) == 0 || nselcoll != ncoll) {
902 splx(s);
903 goto retry;
904 }
905 l->l_flag &= ~L_SELECT;
906 error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "select", timo);
907 splx(s);
908 if (error == 0)
909 goto retry;
910 done:
911 l->l_flag &= ~L_SELECT;
912 /* poll is not restarted after signals... */
913 if (error == ERESTART)
914 error = EINTR;
915 if (error == EWOULDBLOCK)
916 error = 0;
917 if (error == 0) {
918 error = copyout(bits, SCARG(uap, fds), ni);
919 if (error)
920 goto out;
921 }
922 out:
923 if (ni > sizeof(smallbits))
924 free(bits, M_TEMP);
925 return (error);
926 }
927
928 int
929 pollscan(struct proc *p, struct pollfd *fds, int nfd, register_t *retval)
930 {
931 struct filedesc *fdp;
932 int i, n;
933 struct file *fp;
934
935 fdp = p->p_fd;
936 n = 0;
937 for (i = 0; i < nfd; i++, fds++) {
938 if (fds->fd >= fdp->fd_nfiles) {
939 fds->revents = POLLNVAL;
940 n++;
941 } else if (fds->fd < 0) {
942 fds->revents = 0;
943 } else {
944 if ((fp = fd_getfile(fdp, fds->fd)) == NULL) {
945 fds->revents = POLLNVAL;
946 n++;
947 } else {
948 FILE_USE(fp);
949 fds->revents = (*fp->f_ops->fo_poll)(fp,
950 fds->events | POLLERR | POLLHUP, p);
951 if (fds->revents != 0)
952 n++;
953 FILE_UNUSE(fp, p);
954 }
955 }
956 }
957 *retval = n;
958 return (0);
959 }
960
961 /*ARGSUSED*/
962 int
963 seltrue(dev_t dev, int events, struct proc *p)
964 {
965
966 return (events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM));
967 }
968
969 /*
970 * Record a select request.
971 */
972 void
973 selrecord(struct proc *selector, struct selinfo *sip)
974 {
975 struct lwp *l;
976 struct proc *p;
977 pid_t mypid;
978
979 mypid = selector->p_pid;
980 if (sip->sel_pid == mypid)
981 return;
982 if (sip->sel_pid && (p = pfind(sip->sel_pid))) {
983 LIST_FOREACH(l, &p->p_lwps, l_sibling) {
984 if (l->l_wchan == (caddr_t)&selwait) {
985 sip->sel_collision = 1;
986 return;
987 }
988 }
989 }
990
991 sip->sel_pid = mypid;
992 }
993
994 /*
995 * Do a wakeup when a selectable event occurs.
996 */
997 void
998 selwakeup(sip)
999 struct selinfo *sip;
1000 {
1001 struct lwp *l;
1002 struct proc *p;
1003 int s;
1004
1005 if (sip->sel_pid == 0)
1006 return;
1007 if (sip->sel_collision) {
1008 sip->sel_pid = 0;
1009 nselcoll++;
1010 sip->sel_collision = 0;
1011 wakeup((caddr_t)&selwait);
1012 return;
1013 }
1014 p = pfind(sip->sel_pid);
1015 sip->sel_pid = 0;
1016 if (p != NULL) {
1017 LIST_FOREACH(l, &p->p_lwps, l_sibling) {
1018 SCHED_LOCK(s);
1019 if (l->l_wchan == (caddr_t)&selwait) {
1020 if (l->l_stat == LSSLEEP)
1021 setrunnable(l);
1022 else
1023 unsleep(l);
1024 } else if (l->l_flag & L_SELECT)
1025 l->l_flag &= ~L_SELECT;
1026 SCHED_UNLOCK(s);
1027 }
1028 }
1029 }
1030