sys_generic.c revision 1.76.2.2 1 /* $NetBSD: sys_generic.c,v 1.76.2.2 2004/08/03 10:52:55 skrll Exp $ */
2
3 /*
4 * Copyright (c) 1982, 1986, 1989, 1993
5 * The Regents of the University of California. All rights reserved.
6 * (c) UNIX System Laboratories, Inc.
7 * All or some portions of this file are derived from material licensed
8 * to the University of California by American Telephone and Telegraph
9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
10 * the permission of UNIX System Laboratories, Inc.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 *
36 * @(#)sys_generic.c 8.9 (Berkeley) 2/14/95
37 */
38
39 #include <sys/cdefs.h>
40 __KERNEL_RCSID(0, "$NetBSD: sys_generic.c,v 1.76.2.2 2004/08/03 10:52:55 skrll Exp $");
41
42 #include "opt_ktrace.h"
43
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #include <sys/filedesc.h>
47 #include <sys/ioctl.h>
48 #include <sys/file.h>
49 #include <sys/proc.h>
50 #include <sys/socketvar.h>
51 #include <sys/signalvar.h>
52 #include <sys/uio.h>
53 #include <sys/kernel.h>
54 #include <sys/stat.h>
55 #include <sys/malloc.h>
56 #include <sys/poll.h>
57 #ifdef KTRACE
58 #include <sys/ktrace.h>
59 #endif
60
61 #include <sys/mount.h>
62 #include <sys/sa.h>
63 #include <sys/syscallargs.h>
64
65 int selscan(struct lwp *, fd_mask *, fd_mask *, int, register_t *);
66 int pollscan(struct lwp *, struct pollfd *, int, register_t *);
67
68 /*
69 * Read system call.
70 */
71 /* ARGSUSED */
72 int
73 sys_read(struct lwp *l, void *v, register_t *retval)
74 {
75 struct sys_read_args /* {
76 syscallarg(int) fd;
77 syscallarg(void *) buf;
78 syscallarg(size_t) nbyte;
79 } */ *uap = v;
80 int fd;
81 struct file *fp;
82 struct proc *p;
83 struct filedesc *fdp;
84
85 fd = SCARG(uap, fd);
86 p = l->l_proc;
87 fdp = p->p_fd;
88
89 if ((fp = fd_getfile(fdp, fd)) == NULL)
90 return (EBADF);
91
92 if ((fp->f_flag & FREAD) == 0) {
93 simple_unlock(&fp->f_slock);
94 return (EBADF);
95 }
96
97 FILE_USE(fp);
98
99 /* dofileread() will unuse the descriptor for us */
100 return (dofileread(l, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
101 &fp->f_offset, FOF_UPDATE_OFFSET, retval));
102 }
103
104 int
105 dofileread(struct lwp *l, int fd, struct file *fp, void *buf, size_t nbyte,
106 off_t *offset, int flags, register_t *retval)
107 {
108 struct iovec aiov;
109 struct uio auio;
110 struct proc *p;
111 size_t cnt;
112 int error;
113 #ifdef KTRACE
114 struct iovec ktriov = {0};
115 #endif
116 p = l->l_proc;
117 error = 0;
118
119 aiov.iov_base = (caddr_t)buf;
120 aiov.iov_len = nbyte;
121 auio.uio_iov = &aiov;
122 auio.uio_iovcnt = 1;
123 auio.uio_resid = nbyte;
124 auio.uio_rw = UIO_READ;
125 auio.uio_segflg = UIO_USERSPACE;
126 auio.uio_lwp = l;
127
128 /*
129 * Reads return ssize_t because -1 is returned on error. Therefore
130 * we must restrict the length to SSIZE_MAX to avoid garbage return
131 * values.
132 */
133 if (auio.uio_resid > SSIZE_MAX) {
134 error = EINVAL;
135 goto out;
136 }
137
138 #ifdef KTRACE
139 /*
140 * if tracing, save a copy of iovec
141 */
142 if (KTRPOINT(p, KTR_GENIO))
143 ktriov = aiov;
144 #endif
145 cnt = auio.uio_resid;
146 error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred, flags);
147 if (error)
148 if (auio.uio_resid != cnt && (error == ERESTART ||
149 error == EINTR || error == EWOULDBLOCK))
150 error = 0;
151 cnt -= auio.uio_resid;
152 #ifdef KTRACE
153 if (KTRPOINT(p, KTR_GENIO) && error == 0)
154 ktrgenio(l, fd, UIO_READ, &ktriov, cnt, error);
155 #endif
156 *retval = cnt;
157 out:
158 FILE_UNUSE(fp, l);
159 return (error);
160 }
161
162 /*
163 * Scatter read system call.
164 */
165 int
166 sys_readv(struct lwp *l, void *v, register_t *retval)
167 {
168 struct sys_readv_args /* {
169 syscallarg(int) fd;
170 syscallarg(const struct iovec *) iovp;
171 syscallarg(int) iovcnt;
172 } */ *uap = v;
173 struct filedesc *fdp;
174 struct file *fp;
175 struct proc *p;
176 int fd;
177
178 fd = SCARG(uap, fd);
179 p = l->l_proc;
180 fdp = p->p_fd;
181
182 if ((fp = fd_getfile(fdp, fd)) == NULL)
183 return (EBADF);
184
185 if ((fp->f_flag & FREAD) == 0) {
186 simple_unlock(&fp->f_slock);
187 return (EBADF);
188 }
189
190 FILE_USE(fp);
191
192 /* dofilereadv() will unuse the descriptor for us */
193 return (dofilereadv(l, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt),
194 &fp->f_offset, FOF_UPDATE_OFFSET, retval));
195 }
196
197 int
198 dofilereadv(struct lwp *l, int fd, struct file *fp, const struct iovec *iovp,
199 int iovcnt, off_t *offset, int flags, register_t *retval)
200 {
201 struct proc *p;
202 struct uio auio;
203 struct iovec *iov, *needfree, aiov[UIO_SMALLIOV];
204 int i, error;
205 size_t cnt;
206 u_int iovlen;
207 #ifdef KTRACE
208 struct iovec *ktriov;
209 #endif
210
211 p = l->l_proc;
212 error = 0;
213 #ifdef KTRACE
214 ktriov = NULL;
215 #endif
216 /* note: can't use iovlen until iovcnt is validated */
217 iovlen = iovcnt * sizeof(struct iovec);
218 if ((u_int)iovcnt > UIO_SMALLIOV) {
219 if ((u_int)iovcnt > IOV_MAX) {
220 error = EINVAL;
221 goto out;
222 }
223 iov = malloc(iovlen, M_IOV, M_WAITOK);
224 needfree = iov;
225 } else if ((u_int)iovcnt > 0) {
226 iov = aiov;
227 needfree = NULL;
228 } else {
229 error = EINVAL;
230 goto out;
231 }
232
233 auio.uio_iov = iov;
234 auio.uio_iovcnt = iovcnt;
235 auio.uio_rw = UIO_READ;
236 auio.uio_segflg = UIO_USERSPACE;
237 auio.uio_lwp = l;
238 error = copyin(iovp, iov, iovlen);
239 if (error)
240 goto done;
241 auio.uio_resid = 0;
242 for (i = 0; i < iovcnt; i++) {
243 auio.uio_resid += iov->iov_len;
244 /*
245 * Reads return ssize_t because -1 is returned on error.
246 * Therefore we must restrict the length to SSIZE_MAX to
247 * avoid garbage return values.
248 */
249 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) {
250 error = EINVAL;
251 goto done;
252 }
253 iov++;
254 }
255 #ifdef KTRACE
256 /*
257 * if tracing, save a copy of iovec
258 */
259 if (KTRPOINT(p, KTR_GENIO)) {
260 ktriov = malloc(iovlen, M_TEMP, M_WAITOK);
261 memcpy((caddr_t)ktriov, (caddr_t)auio.uio_iov, iovlen);
262 }
263 #endif
264 cnt = auio.uio_resid;
265 error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred, flags);
266 if (error)
267 if (auio.uio_resid != cnt && (error == ERESTART ||
268 error == EINTR || error == EWOULDBLOCK))
269 error = 0;
270 cnt -= auio.uio_resid;
271 #ifdef KTRACE
272 if (ktriov != NULL) {
273 if (KTRPOINT(p, KTR_GENIO) && (error == 0))
274 ktrgenio(l, fd, UIO_READ, ktriov, cnt, error);
275 free(ktriov, M_TEMP);
276 }
277 #endif
278 *retval = cnt;
279 done:
280 if (needfree)
281 free(needfree, M_IOV);
282 out:
283 FILE_UNUSE(fp, l);
284 return (error);
285 }
286
287 /*
288 * Write system call
289 */
290 int
291 sys_write(struct lwp *l, void *v, register_t *retval)
292 {
293 struct sys_write_args /* {
294 syscallarg(int) fd;
295 syscallarg(const void *) buf;
296 syscallarg(size_t) nbyte;
297 } */ *uap = v;
298 int fd;
299 struct file *fp;
300 struct proc *p;
301 struct filedesc *fdp;
302
303 fd = SCARG(uap, fd);
304 p = l->l_proc;
305 fdp = p->p_fd;
306
307 if ((fp = fd_getfile(fdp, fd)) == NULL)
308 return (EBADF);
309
310 if ((fp->f_flag & FWRITE) == 0) {
311 simple_unlock(&fp->f_slock);
312 return (EBADF);
313 }
314
315 FILE_USE(fp);
316
317 /* dofilewrite() will unuse the descriptor for us */
318 return (dofilewrite(l, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
319 &fp->f_offset, FOF_UPDATE_OFFSET, retval));
320 }
321
322 int
323 dofilewrite(struct lwp *l, int fd, struct file *fp, const void *buf,
324 size_t nbyte, off_t *offset, int flags, register_t *retval)
325 {
326 struct iovec aiov;
327 struct uio auio;
328 struct proc *p;
329 size_t cnt;
330 int error;
331 #ifdef KTRACE
332 struct iovec ktriov = {0};
333 #endif
334
335 p = l->l_proc;
336 error = 0;
337 aiov.iov_base = (caddr_t)buf; /* XXX kills const */
338 aiov.iov_len = nbyte;
339 auio.uio_iov = &aiov;
340 auio.uio_iovcnt = 1;
341 auio.uio_resid = nbyte;
342 auio.uio_rw = UIO_WRITE;
343 auio.uio_segflg = UIO_USERSPACE;
344 auio.uio_lwp = l;
345
346 /*
347 * Writes return ssize_t because -1 is returned on error. Therefore
348 * we must restrict the length to SSIZE_MAX to avoid garbage return
349 * values.
350 */
351 if (auio.uio_resid > SSIZE_MAX) {
352 error = EINVAL;
353 goto out;
354 }
355
356 #ifdef KTRACE
357 /*
358 * if tracing, save a copy of iovec
359 */
360 if (KTRPOINT(p, KTR_GENIO))
361 ktriov = aiov;
362 #endif
363 cnt = auio.uio_resid;
364 error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred, flags);
365 if (error) {
366 if (auio.uio_resid != cnt && (error == ERESTART ||
367 error == EINTR || error == EWOULDBLOCK))
368 error = 0;
369 if (error == EPIPE)
370 psignal(p, SIGPIPE);
371 }
372 cnt -= auio.uio_resid;
373 #ifdef KTRACE
374 if (KTRPOINT(p, KTR_GENIO) && error == 0)
375 ktrgenio(l, fd, UIO_WRITE, &ktriov, cnt, error);
376 #endif
377 *retval = cnt;
378 out:
379 FILE_UNUSE(fp, l);
380 return (error);
381 }
382
383 /*
384 * Gather write system call
385 */
386 int
387 sys_writev(struct lwp *l, void *v, register_t *retval)
388 {
389 struct sys_writev_args /* {
390 syscallarg(int) fd;
391 syscallarg(const struct iovec *) iovp;
392 syscallarg(int) iovcnt;
393 } */ *uap = v;
394 int fd;
395 struct file *fp;
396 struct proc *p;
397 struct filedesc *fdp;
398
399 fd = SCARG(uap, fd);
400 p = l->l_proc;
401 fdp = p->p_fd;
402
403 if ((fp = fd_getfile(fdp, fd)) == NULL)
404 return (EBADF);
405
406 if ((fp->f_flag & FWRITE) == 0) {
407 simple_unlock(&fp->f_slock);
408 return (EBADF);
409 }
410
411 FILE_USE(fp);
412
413 /* dofilewritev() will unuse the descriptor for us */
414 return (dofilewritev(l, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt),
415 &fp->f_offset, FOF_UPDATE_OFFSET, retval));
416 }
417
418 int
419 dofilewritev(struct lwp *l, int fd, struct file *fp, const struct iovec *iovp,
420 int iovcnt, off_t *offset, int flags, register_t *retval)
421 {
422 struct proc *p;
423 struct uio auio;
424 struct iovec *iov, *needfree, aiov[UIO_SMALLIOV];
425 int i, error;
426 size_t cnt;
427 u_int iovlen;
428 #ifdef KTRACE
429 struct iovec *ktriov;
430 #endif
431
432 p = l->l_proc;
433 error = 0;
434 #ifdef KTRACE
435 ktriov = NULL;
436 #endif
437 /* note: can't use iovlen until iovcnt is validated */
438 iovlen = iovcnt * sizeof(struct iovec);
439 if ((u_int)iovcnt > UIO_SMALLIOV) {
440 if ((u_int)iovcnt > IOV_MAX) {
441 error = EINVAL;
442 goto out;
443 }
444 iov = malloc(iovlen, M_IOV, M_WAITOK);
445 needfree = iov;
446 } else if ((u_int)iovcnt > 0) {
447 iov = aiov;
448 needfree = NULL;
449 } else {
450 error = EINVAL;
451 goto out;
452 }
453
454 auio.uio_iov = iov;
455 auio.uio_iovcnt = iovcnt;
456 auio.uio_rw = UIO_WRITE;
457 auio.uio_segflg = UIO_USERSPACE;
458 auio.uio_lwp = l;
459 error = copyin(iovp, iov, iovlen);
460 if (error)
461 goto done;
462 auio.uio_resid = 0;
463 for (i = 0; i < iovcnt; i++) {
464 auio.uio_resid += iov->iov_len;
465 /*
466 * Writes return ssize_t because -1 is returned on error.
467 * Therefore we must restrict the length to SSIZE_MAX to
468 * avoid garbage return values.
469 */
470 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) {
471 error = EINVAL;
472 goto done;
473 }
474 iov++;
475 }
476 #ifdef KTRACE
477 /*
478 * if tracing, save a copy of iovec
479 */
480 if (KTRPOINT(p, KTR_GENIO)) {
481 ktriov = malloc(iovlen, M_TEMP, M_WAITOK);
482 memcpy((caddr_t)ktriov, (caddr_t)auio.uio_iov, iovlen);
483 }
484 #endif
485 cnt = auio.uio_resid;
486 error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred, flags);
487 if (error) {
488 if (auio.uio_resid != cnt && (error == ERESTART ||
489 error == EINTR || error == EWOULDBLOCK))
490 error = 0;
491 if (error == EPIPE)
492 psignal(p, SIGPIPE);
493 }
494 cnt -= auio.uio_resid;
495 #ifdef KTRACE
496 if (ktriov != NULL) {
497 if (KTRPOINT(p, KTR_GENIO) && (error == 0))
498 ktrgenio(l, fd, UIO_WRITE, ktriov, cnt, error);
499 free(ktriov, M_TEMP);
500 }
501 #endif
502 *retval = cnt;
503 done:
504 if (needfree)
505 free(needfree, M_IOV);
506 out:
507 FILE_UNUSE(fp, l);
508 return (error);
509 }
510
511 /*
512 * Ioctl system call
513 */
514 /* ARGSUSED */
515 int
516 sys_ioctl(struct lwp *l, void *v, register_t *retval)
517 {
518 struct sys_ioctl_args /* {
519 syscallarg(int) fd;
520 syscallarg(u_long) com;
521 syscallarg(caddr_t) data;
522 } */ *uap = v;
523 struct file *fp;
524 struct proc *p;
525 struct filedesc *fdp;
526 u_long com;
527 int error;
528 u_int size;
529 caddr_t data, memp;
530 #define STK_PARAMS 128
531 u_long stkbuf[STK_PARAMS/sizeof(u_long)];
532
533 error = 0;
534 p = l->l_proc;
535 fdp = p->p_fd;
536
537 if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL)
538 return (EBADF);
539
540 FILE_USE(fp);
541
542 if ((fp->f_flag & (FREAD | FWRITE)) == 0) {
543 error = EBADF;
544 com = 0;
545 goto out;
546 }
547
548 switch (com = SCARG(uap, com)) {
549 case FIONCLEX:
550 fdp->fd_ofileflags[SCARG(uap, fd)] &= ~UF_EXCLOSE;
551 goto out;
552
553 case FIOCLEX:
554 fdp->fd_ofileflags[SCARG(uap, fd)] |= UF_EXCLOSE;
555 goto out;
556 }
557
558 /*
559 * Interpret high order word to find amount of data to be
560 * copied to/from the user's address space.
561 */
562 size = IOCPARM_LEN(com);
563 if (size > IOCPARM_MAX) {
564 error = ENOTTY;
565 goto out;
566 }
567 memp = NULL;
568 if (size > sizeof(stkbuf)) {
569 memp = (caddr_t)malloc((u_long)size, M_IOCTLOPS, M_WAITOK);
570 data = memp;
571 } else
572 data = (caddr_t)stkbuf;
573 if (com&IOC_IN) {
574 if (size) {
575 error = copyin(SCARG(uap, data), data, size);
576 if (error) {
577 if (memp)
578 free(memp, M_IOCTLOPS);
579 goto out;
580 }
581 #ifdef KTRACE
582 if (KTRPOINT(p, KTR_GENIO)) {
583 struct iovec iov;
584 iov.iov_base = SCARG(uap, data);
585 iov.iov_len = size;
586 ktrgenio(l, SCARG(uap, fd), UIO_WRITE, &iov,
587 size, 0);
588 }
589 #endif
590 } else
591 *(caddr_t *)data = SCARG(uap, data);
592 } else if ((com&IOC_OUT) && size)
593 /*
594 * Zero the buffer so the user always
595 * gets back something deterministic.
596 */
597 memset(data, 0, size);
598 else if (com&IOC_VOID)
599 *(caddr_t *)data = SCARG(uap, data);
600
601 switch (com) {
602
603 case FIONBIO:
604 if (*(int *)data != 0)
605 fp->f_flag |= FNONBLOCK;
606 else
607 fp->f_flag &= ~FNONBLOCK;
608 error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, data, l);
609 break;
610
611 case FIOASYNC:
612 if (*(int *)data != 0)
613 fp->f_flag |= FASYNC;
614 else
615 fp->f_flag &= ~FASYNC;
616 error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, data, l);
617 break;
618
619 default:
620 error = (*fp->f_ops->fo_ioctl)(fp, com, data, l);
621 /*
622 * Copy any data to user, size was
623 * already set and checked above.
624 */
625 if (error == 0 && (com&IOC_OUT) && size) {
626 error = copyout(data, SCARG(uap, data), size);
627 #ifdef KTRACE
628 if (KTRPOINT(p, KTR_GENIO)) {
629 struct iovec iov;
630 iov.iov_base = SCARG(uap, data);
631 iov.iov_len = size;
632 ktrgenio(l, SCARG(uap, fd), UIO_READ, &iov,
633 size, error);
634 }
635 #endif
636 }
637 break;
638 }
639 if (memp)
640 free(memp, M_IOCTLOPS);
641 out:
642 FILE_UNUSE(fp, l);
643 switch (error) {
644 case -1:
645 printf("sys_ioctl: _IO%s%s('%c', %lu, %lu) returned -1: "
646 "pid=%d comm=%s\n",
647 (com & IOC_IN) ? "W" : "", (com & IOC_OUT) ? "R" : "",
648 (char)IOCGROUP(com), (com & 0xff), IOCPARM_LEN(com),
649 p->p_pid, p->p_comm);
650 /* FALLTHROUGH */
651 case EPASSTHROUGH:
652 error = ENOTTY;
653 /* FALLTHROUGH */
654 default:
655 return (error);
656 }
657 }
658
659 int selwait, nselcoll;
660
661 /*
662 * Select system call.
663 */
664 int
665 sys_select(struct lwp *l, void *v, register_t *retval)
666 {
667 struct sys_select_args /* {
668 syscallarg(int) nd;
669 syscallarg(fd_set *) in;
670 syscallarg(fd_set *) ou;
671 syscallarg(fd_set *) ex;
672 syscallarg(struct timeval *) tv;
673 } */ *uap = v;
674 struct proc *p;
675 caddr_t bits;
676 char smallbits[howmany(FD_SETSIZE, NFDBITS) *
677 sizeof(fd_mask) * 6];
678 struct timeval atv;
679 int s, ncoll, error, timo;
680 size_t ni;
681
682 error = 0;
683 p = l->l_proc;
684 if (SCARG(uap, nd) < 0)
685 return (EINVAL);
686 if (SCARG(uap, nd) > p->p_fd->fd_nfiles) {
687 /* forgiving; slightly wrong */
688 SCARG(uap, nd) = p->p_fd->fd_nfiles;
689 }
690 ni = howmany(SCARG(uap, nd), NFDBITS) * sizeof(fd_mask);
691 if (ni * 6 > sizeof(smallbits))
692 bits = malloc(ni * 6, M_TEMP, M_WAITOK);
693 else
694 bits = smallbits;
695
696 #define getbits(name, x) \
697 if (SCARG(uap, name)) { \
698 error = copyin(SCARG(uap, name), bits + ni * x, ni); \
699 if (error) \
700 goto done; \
701 } else \
702 memset(bits + ni * x, 0, ni);
703 getbits(in, 0);
704 getbits(ou, 1);
705 getbits(ex, 2);
706 #undef getbits
707
708 timo = 0;
709 if (SCARG(uap, tv)) {
710 error = copyin(SCARG(uap, tv), (caddr_t)&atv,
711 sizeof(atv));
712 if (error)
713 goto done;
714 if (itimerfix(&atv)) {
715 error = EINVAL;
716 goto done;
717 }
718 s = splclock();
719 timeradd(&atv, &time, &atv);
720 splx(s);
721 }
722
723 retry:
724 ncoll = nselcoll;
725 l->l_flag |= L_SELECT;
726 error = selscan(l, (fd_mask *)(bits + ni * 0),
727 (fd_mask *)(bits + ni * 3), SCARG(uap, nd), retval);
728 if (error || *retval)
729 goto done;
730 if (SCARG(uap, tv)) {
731 /*
732 * We have to recalculate the timeout on every retry.
733 */
734 timo = hzto(&atv);
735 if (timo <= 0)
736 goto done;
737 }
738 s = splsched();
739 if ((l->l_flag & L_SELECT) == 0 || nselcoll != ncoll) {
740 splx(s);
741 goto retry;
742 }
743 l->l_flag &= ~L_SELECT;
744 error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "select", timo);
745 splx(s);
746 if (error == 0)
747 goto retry;
748 done:
749 l->l_flag &= ~L_SELECT;
750 /* select is not restarted after signals... */
751 if (error == ERESTART)
752 error = EINTR;
753 if (error == EWOULDBLOCK)
754 error = 0;
755 if (error == 0) {
756
757 #define putbits(name, x) \
758 if (SCARG(uap, name)) { \
759 error = copyout(bits + ni * x, SCARG(uap, name), ni); \
760 if (error) \
761 goto out; \
762 }
763 putbits(in, 3);
764 putbits(ou, 4);
765 putbits(ex, 5);
766 #undef putbits
767 }
768 out:
769 if (ni * 6 > sizeof(smallbits))
770 free(bits, M_TEMP);
771 return (error);
772 }
773
774 int
775 selscan(struct lwp *l, fd_mask *ibitp, fd_mask *obitp, int nfd,
776 register_t *retval)
777 {
778 static const int flag[3] = { POLLRDNORM | POLLHUP | POLLERR,
779 POLLWRNORM | POLLHUP | POLLERR,
780 POLLRDBAND };
781 struct proc *p = l->l_proc;
782 struct filedesc *fdp;
783 int msk, i, j, fd, n;
784 fd_mask ibits, obits;
785 struct file *fp;
786
787 fdp = p->p_fd;
788 n = 0;
789 for (msk = 0; msk < 3; msk++) {
790 for (i = 0; i < nfd; i += NFDBITS) {
791 ibits = *ibitp++;
792 obits = 0;
793 while ((j = ffs(ibits)) && (fd = i + --j) < nfd) {
794 ibits &= ~(1 << j);
795 if ((fp = fd_getfile(fdp, fd)) == NULL)
796 return (EBADF);
797 FILE_USE(fp);
798 if ((*fp->f_ops->fo_poll)(fp, flag[msk], l)) {
799 obits |= (1 << j);
800 n++;
801 }
802 FILE_UNUSE(fp, l);
803 }
804 *obitp++ = obits;
805 }
806 }
807 *retval = n;
808 return (0);
809 }
810
811 /*
812 * Poll system call.
813 */
814 int
815 sys_poll(struct lwp *l, void *v, register_t *retval)
816 {
817 struct sys_poll_args /* {
818 syscallarg(struct pollfd *) fds;
819 syscallarg(u_int) nfds;
820 syscallarg(int) timeout;
821 } */ *uap = v;
822 struct proc *p;
823 caddr_t bits;
824 char smallbits[32 * sizeof(struct pollfd)];
825 struct timeval atv;
826 int s, ncoll, error, timo;
827 size_t ni;
828
829 error = 0;
830 p = l->l_proc;
831 if (SCARG(uap, nfds) > p->p_fd->fd_nfiles) {
832 /* forgiving; slightly wrong */
833 SCARG(uap, nfds) = p->p_fd->fd_nfiles;
834 }
835 ni = SCARG(uap, nfds) * sizeof(struct pollfd);
836 if (ni > sizeof(smallbits))
837 bits = malloc(ni, M_TEMP, M_WAITOK);
838 else
839 bits = smallbits;
840
841 error = copyin(SCARG(uap, fds), bits, ni);
842 if (error)
843 goto done;
844
845 timo = 0;
846 if (SCARG(uap, timeout) != INFTIM) {
847 atv.tv_sec = SCARG(uap, timeout) / 1000;
848 atv.tv_usec = (SCARG(uap, timeout) % 1000) * 1000;
849 if (itimerfix(&atv)) {
850 error = EINVAL;
851 goto done;
852 }
853 s = splclock();
854 timeradd(&atv, &time, &atv);
855 splx(s);
856 }
857
858 retry:
859 ncoll = nselcoll;
860 l->l_flag |= L_SELECT;
861 error = pollscan(l, (struct pollfd *)bits, SCARG(uap, nfds), retval);
862 if (error || *retval)
863 goto done;
864 if (SCARG(uap, timeout) != INFTIM) {
865 /*
866 * We have to recalculate the timeout on every retry.
867 */
868 timo = hzto(&atv);
869 if (timo <= 0)
870 goto done;
871 }
872 s = splsched();
873 if ((l->l_flag & L_SELECT) == 0 || nselcoll != ncoll) {
874 splx(s);
875 goto retry;
876 }
877 l->l_flag &= ~L_SELECT;
878 error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "poll", timo);
879 splx(s);
880 if (error == 0)
881 goto retry;
882 done:
883 l->l_flag &= ~L_SELECT;
884 /* poll is not restarted after signals... */
885 if (error == ERESTART)
886 error = EINTR;
887 if (error == EWOULDBLOCK)
888 error = 0;
889 if (error == 0) {
890 error = copyout(bits, SCARG(uap, fds), ni);
891 if (error)
892 goto out;
893 }
894 out:
895 if (ni > sizeof(smallbits))
896 free(bits, M_TEMP);
897 return (error);
898 }
899
900 int
901 pollscan(struct lwp *l, struct pollfd *fds, int nfd, register_t *retval)
902 {
903 struct proc *p = l->l_proc;
904 struct filedesc *fdp;
905 int i, n;
906 struct file *fp;
907
908 fdp = p->p_fd;
909 n = 0;
910 for (i = 0; i < nfd; i++, fds++) {
911 if (fds->fd >= fdp->fd_nfiles) {
912 fds->revents = POLLNVAL;
913 n++;
914 } else if (fds->fd < 0) {
915 fds->revents = 0;
916 } else {
917 if ((fp = fd_getfile(fdp, fds->fd)) == NULL) {
918 fds->revents = POLLNVAL;
919 n++;
920 } else {
921 FILE_USE(fp);
922 fds->revents = (*fp->f_ops->fo_poll)(fp,
923 fds->events | POLLERR | POLLHUP, l);
924 if (fds->revents != 0)
925 n++;
926 FILE_UNUSE(fp, l);
927 }
928 }
929 }
930 *retval = n;
931 return (0);
932 }
933
934 /*ARGSUSED*/
935 int
936 seltrue(dev_t dev, int events, struct lwp *l)
937 {
938
939 return (events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM));
940 }
941
942 /*
943 * Record a select request.
944 */
945 void
946 selrecord(struct lwp *selector, struct selinfo *sip)
947 {
948 struct lwp *l;
949 struct proc *p;
950 pid_t mypid;
951
952 mypid = selector->l_proc->p_pid;
953 if (sip->sel_pid == mypid)
954 return;
955 if (sip->sel_pid && (p = pfind(sip->sel_pid))) {
956 LIST_FOREACH(l, &p->p_lwps, l_sibling) {
957 if (l->l_wchan == (caddr_t)&selwait) {
958 sip->sel_collision = 1;
959 return;
960 }
961 }
962 }
963
964 sip->sel_pid = mypid;
965 }
966
967 /*
968 * Do a wakeup when a selectable event occurs.
969 */
970 void
971 selwakeup(sip)
972 struct selinfo *sip;
973 {
974 struct lwp *l;
975 struct proc *p;
976 int s;
977
978 if (sip->sel_pid == 0)
979 return;
980 if (sip->sel_collision) {
981 sip->sel_pid = 0;
982 nselcoll++;
983 sip->sel_collision = 0;
984 wakeup((caddr_t)&selwait);
985 return;
986 }
987 p = pfind(sip->sel_pid);
988 sip->sel_pid = 0;
989 if (p != NULL) {
990 LIST_FOREACH(l, &p->p_lwps, l_sibling) {
991 SCHED_LOCK(s);
992 if (l->l_wchan == (caddr_t)&selwait) {
993 if (l->l_stat == LSSLEEP)
994 setrunnable(l);
995 else
996 unsleep(l);
997 } else if (l->l_flag & L_SELECT)
998 l->l_flag &= ~L_SELECT;
999 SCHED_UNLOCK(s);
1000 }
1001 }
1002 }
1003