sys_generic.c revision 1.63 1 /* $NetBSD: sys_generic.c,v 1.63 2002/08/08 20:40:25 jdolecek Exp $ */
2
3 /*
4 * Copyright (c) 1982, 1986, 1989, 1993
5 * The Regents of the University of California. All rights reserved.
6 * (c) UNIX System Laboratories, Inc.
7 * All or some portions of this file are derived from material licensed
8 * to the University of California by American Telephone and Telegraph
9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
10 * the permission of UNIX System Laboratories, Inc.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. All advertising materials mentioning features or use of this software
21 * must display the following acknowledgement:
22 * This product includes software developed by the University of
23 * California, Berkeley and its contributors.
24 * 4. Neither the name of the University nor the names of its contributors
25 * may be used to endorse or promote products derived from this software
26 * without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
29 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
32 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
33 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
34 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
35 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
37 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38 * SUCH DAMAGE.
39 *
40 * @(#)sys_generic.c 8.9 (Berkeley) 2/14/95
41 */
42
43 #include <sys/cdefs.h>
44 __KERNEL_RCSID(0, "$NetBSD: sys_generic.c,v 1.63 2002/08/08 20:40:25 jdolecek Exp $");
45
46 #include "opt_ktrace.h"
47
48 #include <sys/param.h>
49 #include <sys/systm.h>
50 #include <sys/filedesc.h>
51 #include <sys/ioctl.h>
52 #include <sys/file.h>
53 #include <sys/proc.h>
54 #include <sys/socketvar.h>
55 #include <sys/signalvar.h>
56 #include <sys/uio.h>
57 #include <sys/kernel.h>
58 #include <sys/stat.h>
59 #include <sys/malloc.h>
60 #include <sys/poll.h>
61 #ifdef KTRACE
62 #include <sys/ktrace.h>
63 #endif
64
65 #include <sys/mount.h>
66 #include <sys/syscallargs.h>
67
68 int selscan __P((struct proc *, fd_mask *, fd_mask *, int, register_t *));
69 int pollscan __P((struct proc *, struct pollfd *, int, register_t *));
70
71 /*
72 * Read system call.
73 */
74 /* ARGSUSED */
75 int
76 sys_read(struct proc *p, void *v, register_t *retval)
77 {
78 struct sys_read_args /* {
79 syscallarg(int) fd;
80 syscallarg(void *) buf;
81 syscallarg(size_t) nbyte;
82 } */ *uap = v;
83 int fd;
84 struct file *fp;
85 struct filedesc *fdp;
86
87 fd = SCARG(uap, fd);
88 fdp = p->p_fd;
89
90 if ((fp = fd_getfile(fdp, fd)) == NULL)
91 return (EBADF);
92
93 if ((fp->f_flag & FREAD) == 0)
94 return (EBADF);
95
96 FILE_USE(fp);
97
98 /* dofileread() will unuse the descriptor for us */
99 return (dofileread(p, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
100 &fp->f_offset, FOF_UPDATE_OFFSET, retval));
101 }
102
103 int
104 dofileread(struct proc *p, int fd, struct file *fp, void *buf, size_t nbyte,
105 off_t *offset, int flags, register_t *retval)
106 {
107 struct uio auio;
108 struct iovec aiov;
109 long cnt, error;
110 #ifdef KTRACE
111 struct iovec ktriov;
112 #endif
113 error = 0;
114
115 aiov.iov_base = (caddr_t)buf;
116 aiov.iov_len = nbyte;
117 auio.uio_iov = &aiov;
118 auio.uio_iovcnt = 1;
119 auio.uio_resid = nbyte;
120 auio.uio_rw = UIO_READ;
121 auio.uio_segflg = UIO_USERSPACE;
122 auio.uio_procp = p;
123
124 /*
125 * Reads return ssize_t because -1 is returned on error. Therefore
126 * we must restrict the length to SSIZE_MAX to avoid garbage return
127 * values.
128 */
129 if (auio.uio_resid > SSIZE_MAX) {
130 error = EINVAL;
131 goto out;
132 }
133
134 #ifdef KTRACE
135 /*
136 * if tracing, save a copy of iovec
137 */
138 if (KTRPOINT(p, KTR_GENIO))
139 ktriov = aiov;
140 #endif
141 cnt = auio.uio_resid;
142 error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred, flags);
143 if (error)
144 if (auio.uio_resid != cnt && (error == ERESTART ||
145 error == EINTR || error == EWOULDBLOCK))
146 error = 0;
147 cnt -= auio.uio_resid;
148 #ifdef KTRACE
149 if (KTRPOINT(p, KTR_GENIO) && error == 0)
150 ktrgenio(p, fd, UIO_READ, &ktriov, cnt, error);
151 #endif
152 *retval = cnt;
153 out:
154 FILE_UNUSE(fp, p);
155 return (error);
156 }
157
158 /*
159 * Scatter read system call.
160 */
161 int
162 sys_readv(struct proc *p, void *v, register_t *retval)
163 {
164 struct sys_readv_args /* {
165 syscallarg(int) fd;
166 syscallarg(const struct iovec *) iovp;
167 syscallarg(int) iovcnt;
168 } */ *uap = v;
169 int fd;
170 struct file *fp;
171 struct filedesc *fdp;
172
173 fd = SCARG(uap, fd);
174 fdp = p->p_fd;
175
176 if ((fp = fd_getfile(fdp, fd)) == NULL)
177 return (EBADF);
178
179 if ((fp->f_flag & FREAD) == 0)
180 return (EBADF);
181
182 FILE_USE(fp);
183
184 /* dofilereadv() will unuse the descriptor for us */
185 return (dofilereadv(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt),
186 &fp->f_offset, FOF_UPDATE_OFFSET, retval));
187 }
188
189 int
190 dofilereadv(struct proc *p, int fd, struct file *fp, const struct iovec *iovp,
191 int iovcnt, off_t *offset, int flags, register_t *retval)
192 {
193 struct uio auio;
194 struct iovec *iov, *needfree, aiov[UIO_SMALLIOV];
195 long i, cnt, error;
196 u_int iovlen;
197 #ifdef KTRACE
198 struct iovec *ktriov;
199 #endif
200
201 error = 0;
202 #ifdef KTRACE
203 ktriov = NULL;
204 #endif
205 /* note: can't use iovlen until iovcnt is validated */
206 iovlen = iovcnt * sizeof(struct iovec);
207 if ((u_int)iovcnt > UIO_SMALLIOV) {
208 if ((u_int)iovcnt > IOV_MAX) {
209 error = EINVAL;
210 goto out;
211 }
212 iov = malloc(iovlen, M_IOV, M_WAITOK);
213 needfree = iov;
214 } else if ((u_int)iovcnt > 0) {
215 iov = aiov;
216 needfree = NULL;
217 } else {
218 error = EINVAL;
219 goto out;
220 }
221
222 auio.uio_iov = iov;
223 auio.uio_iovcnt = iovcnt;
224 auio.uio_rw = UIO_READ;
225 auio.uio_segflg = UIO_USERSPACE;
226 auio.uio_procp = p;
227 error = copyin(iovp, iov, iovlen);
228 if (error)
229 goto done;
230 auio.uio_resid = 0;
231 for (i = 0; i < iovcnt; i++) {
232 auio.uio_resid += iov->iov_len;
233 /*
234 * Reads return ssize_t because -1 is returned on error.
235 * Therefore we must restrict the length to SSIZE_MAX to
236 * avoid garbage return values.
237 */
238 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) {
239 error = EINVAL;
240 goto done;
241 }
242 iov++;
243 }
244 #ifdef KTRACE
245 /*
246 * if tracing, save a copy of iovec
247 */
248 if (KTRPOINT(p, KTR_GENIO)) {
249 ktriov = malloc(iovlen, M_TEMP, M_WAITOK);
250 memcpy((caddr_t)ktriov, (caddr_t)auio.uio_iov, iovlen);
251 }
252 #endif
253 cnt = auio.uio_resid;
254 error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred, flags);
255 if (error)
256 if (auio.uio_resid != cnt && (error == ERESTART ||
257 error == EINTR || error == EWOULDBLOCK))
258 error = 0;
259 cnt -= auio.uio_resid;
260 #ifdef KTRACE
261 if (ktriov != NULL) {
262 if (error == 0)
263 ktrgenio(p, fd, UIO_READ, ktriov, cnt, error);
264 free(ktriov, M_TEMP);
265 }
266 #endif
267 *retval = cnt;
268 done:
269 if (needfree)
270 free(needfree, M_IOV);
271 out:
272 FILE_UNUSE(fp, p);
273 return (error);
274 }
275
276 /*
277 * Write system call
278 */
279 int
280 sys_write(struct proc *p, void *v, register_t *retval)
281 {
282 struct sys_write_args /* {
283 syscallarg(int) fd;
284 syscallarg(const void *) buf;
285 syscallarg(size_t) nbyte;
286 } */ *uap = v;
287 int fd;
288 struct file *fp;
289 struct filedesc *fdp;
290
291 fd = SCARG(uap, fd);
292 fdp = p->p_fd;
293
294 if ((fp = fd_getfile(fdp, fd)) == NULL)
295 return (EBADF);
296
297 if ((fp->f_flag & FWRITE) == 0)
298 return (EBADF);
299
300 FILE_USE(fp);
301
302 /* dofilewrite() will unuse the descriptor for us */
303 return (dofilewrite(p, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
304 &fp->f_offset, FOF_UPDATE_OFFSET, retval));
305 }
306
307 int
308 dofilewrite(struct proc *p, int fd, struct file *fp, const void *buf,
309 size_t nbyte, off_t *offset, int flags, register_t *retval)
310 {
311 struct uio auio;
312 struct iovec aiov;
313 long cnt, error;
314 #ifdef KTRACE
315 struct iovec ktriov;
316 #endif
317
318 error = 0;
319 aiov.iov_base = (caddr_t)buf; /* XXX kills const */
320 aiov.iov_len = nbyte;
321 auio.uio_iov = &aiov;
322 auio.uio_iovcnt = 1;
323 auio.uio_resid = nbyte;
324 auio.uio_rw = UIO_WRITE;
325 auio.uio_segflg = UIO_USERSPACE;
326 auio.uio_procp = p;
327
328 /*
329 * Writes return ssize_t because -1 is returned on error. Therefore
330 * we must restrict the length to SSIZE_MAX to avoid garbage return
331 * values.
332 */
333 if (auio.uio_resid > SSIZE_MAX) {
334 error = EINVAL;
335 goto out;
336 }
337
338 #ifdef KTRACE
339 /*
340 * if tracing, save a copy of iovec
341 */
342 if (KTRPOINT(p, KTR_GENIO))
343 ktriov = aiov;
344 #endif
345 cnt = auio.uio_resid;
346 error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred, flags);
347 if (error) {
348 if (auio.uio_resid != cnt && (error == ERESTART ||
349 error == EINTR || error == EWOULDBLOCK))
350 error = 0;
351 if (error == EPIPE)
352 psignal(p, SIGPIPE);
353 }
354 cnt -= auio.uio_resid;
355 #ifdef KTRACE
356 if (KTRPOINT(p, KTR_GENIO) && error == 0)
357 ktrgenio(p, fd, UIO_WRITE, &ktriov, cnt, error);
358 #endif
359 *retval = cnt;
360 out:
361 FILE_UNUSE(fp, p);
362 return (error);
363 }
364
365 /*
366 * Gather write system call
367 */
368 int
369 sys_writev(struct proc *p, void *v, register_t *retval)
370 {
371 struct sys_writev_args /* {
372 syscallarg(int) fd;
373 syscallarg(const struct iovec *) iovp;
374 syscallarg(int) iovcnt;
375 } */ *uap = v;
376 int fd;
377 struct file *fp;
378 struct filedesc *fdp;
379
380 fd = SCARG(uap, fd);
381 fdp = p->p_fd;
382
383 if ((fp = fd_getfile(fdp, fd)) == NULL)
384 return (EBADF);
385
386 if ((fp->f_flag & FWRITE) == 0)
387 return (EBADF);
388
389 FILE_USE(fp);
390
391 /* dofilewritev() will unuse the descriptor for us */
392 return (dofilewritev(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt),
393 &fp->f_offset, FOF_UPDATE_OFFSET, retval));
394 }
395
396 int
397 dofilewritev(struct proc *p, int fd, struct file *fp, const struct iovec *iovp,
398 int iovcnt, off_t *offset, int flags, register_t *retval)
399 {
400 struct uio auio;
401 struct iovec *iov, *needfree, aiov[UIO_SMALLIOV];
402 long i, cnt, error;
403 u_int iovlen;
404 #ifdef KTRACE
405 struct iovec *ktriov;
406 #endif
407
408 error = 0;
409 #ifdef KTRACE
410 ktriov = NULL;
411 #endif
412 /* note: can't use iovlen until iovcnt is validated */
413 iovlen = iovcnt * sizeof(struct iovec);
414 if ((u_int)iovcnt > UIO_SMALLIOV) {
415 if ((u_int)iovcnt > IOV_MAX) {
416 error = EINVAL;
417 goto out;
418 }
419 iov = malloc(iovlen, M_IOV, M_WAITOK);
420 needfree = iov;
421 } else if ((u_int)iovcnt > 0) {
422 iov = aiov;
423 needfree = NULL;
424 } else {
425 error = EINVAL;
426 goto out;
427 }
428
429 auio.uio_iov = iov;
430 auio.uio_iovcnt = iovcnt;
431 auio.uio_rw = UIO_WRITE;
432 auio.uio_segflg = UIO_USERSPACE;
433 auio.uio_procp = p;
434 error = copyin(iovp, iov, iovlen);
435 if (error)
436 goto done;
437 auio.uio_resid = 0;
438 for (i = 0; i < iovcnt; i++) {
439 auio.uio_resid += iov->iov_len;
440 /*
441 * Writes return ssize_t because -1 is returned on error.
442 * Therefore we must restrict the length to SSIZE_MAX to
443 * avoid garbage return values.
444 */
445 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) {
446 error = EINVAL;
447 goto done;
448 }
449 iov++;
450 }
451 #ifdef KTRACE
452 /*
453 * if tracing, save a copy of iovec
454 */
455 if (KTRPOINT(p, KTR_GENIO)) {
456 ktriov = malloc(iovlen, M_TEMP, M_WAITOK);
457 memcpy((caddr_t)ktriov, (caddr_t)auio.uio_iov, iovlen);
458 }
459 #endif
460 cnt = auio.uio_resid;
461 error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred, flags);
462 if (error) {
463 if (auio.uio_resid != cnt && (error == ERESTART ||
464 error == EINTR || error == EWOULDBLOCK))
465 error = 0;
466 if (error == EPIPE)
467 psignal(p, SIGPIPE);
468 }
469 cnt -= auio.uio_resid;
470 #ifdef KTRACE
471 if (KTRPOINT(p, KTR_GENIO))
472 if (error == 0) {
473 ktrgenio(p, fd, UIO_WRITE, ktriov, cnt, error);
474 free(ktriov, M_TEMP);
475 }
476 #endif
477 *retval = cnt;
478 done:
479 if (needfree)
480 free(needfree, M_IOV);
481 out:
482 FILE_UNUSE(fp, p);
483 return (error);
484 }
485
486 /*
487 * Ioctl system call
488 */
489 /* ARGSUSED */
490 int
491 sys_ioctl(struct proc *p, void *v, register_t *retval)
492 {
493 struct sys_ioctl_args /* {
494 syscallarg(int) fd;
495 syscallarg(u_long) com;
496 syscallarg(caddr_t) data;
497 } */ *uap = v;
498 struct file *fp;
499 struct filedesc *fdp;
500 u_long com;
501 int error;
502 u_int size;
503 caddr_t data, memp;
504 int tmp;
505 #define STK_PARAMS 128
506 u_long stkbuf[STK_PARAMS/sizeof(u_long)];
507
508 error = 0;
509 fdp = p->p_fd;
510
511 if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL)
512 return (EBADF);
513
514 FILE_USE(fp);
515
516 if ((fp->f_flag & (FREAD | FWRITE)) == 0) {
517 error = EBADF;
518 goto out;
519 }
520
521 switch (com = SCARG(uap, com)) {
522 case FIONCLEX:
523 fdp->fd_ofileflags[SCARG(uap, fd)] &= ~UF_EXCLOSE;
524 goto out;
525
526 case FIOCLEX:
527 fdp->fd_ofileflags[SCARG(uap, fd)] |= UF_EXCLOSE;
528 goto out;
529 }
530
531 /*
532 * Interpret high order word to find amount of data to be
533 * copied to/from the user's address space.
534 */
535 size = IOCPARM_LEN(com);
536 if (size > IOCPARM_MAX) {
537 error = ENOTTY;
538 goto out;
539 }
540 memp = NULL;
541 if (size > sizeof(stkbuf)) {
542 memp = (caddr_t)malloc((u_long)size, M_IOCTLOPS, M_WAITOK);
543 data = memp;
544 } else
545 data = (caddr_t)stkbuf;
546 if (com&IOC_IN) {
547 if (size) {
548 error = copyin(SCARG(uap, data), data, size);
549 if (error) {
550 if (memp)
551 free(memp, M_IOCTLOPS);
552 goto out;
553 }
554 } else
555 *(caddr_t *)data = SCARG(uap, data);
556 } else if ((com&IOC_OUT) && size)
557 /*
558 * Zero the buffer so the user always
559 * gets back something deterministic.
560 */
561 memset(data, 0, size);
562 else if (com&IOC_VOID)
563 *(caddr_t *)data = SCARG(uap, data);
564
565 switch (com) {
566
567 case FIONBIO:
568 if ((tmp = *(int *)data) != 0)
569 fp->f_flag |= FNONBLOCK;
570 else
571 fp->f_flag &= ~FNONBLOCK;
572 error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p);
573 break;
574
575 case FIOASYNC:
576 if ((tmp = *(int *)data) != 0)
577 fp->f_flag |= FASYNC;
578 else
579 fp->f_flag &= ~FASYNC;
580 error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, (caddr_t)&tmp, p);
581 break;
582
583 case FIOSETOWN:
584 tmp = *(int *)data;
585 if (fp->f_type == DTYPE_SOCKET) {
586 ((struct socket *)fp->f_data)->so_pgid = tmp;
587 error = 0;
588 break;
589 }
590 if (tmp <= 0) {
591 tmp = -tmp;
592 } else {
593 struct proc *p1 = pfind(tmp);
594 if (p1 == 0) {
595 error = ESRCH;
596 break;
597 }
598 tmp = p1->p_pgrp->pg_id;
599 }
600 error = (*fp->f_ops->fo_ioctl)
601 (fp, TIOCSPGRP, (caddr_t)&tmp, p);
602 break;
603
604 case FIOGETOWN:
605 if (fp->f_type == DTYPE_SOCKET) {
606 error = 0;
607 *(int *)data = ((struct socket *)fp->f_data)->so_pgid;
608 break;
609 }
610 error = (*fp->f_ops->fo_ioctl)(fp, TIOCGPGRP, data, p);
611 if (error == 0)
612 *(int *)data = -*(int *)data;
613 break;
614
615 default:
616 error = (*fp->f_ops->fo_ioctl)(fp, com, data, p);
617 /*
618 * Copy any data to user, size was
619 * already set and checked above.
620 */
621 if (error == 0 && (com&IOC_OUT) && size)
622 error = copyout(data, SCARG(uap, data), size);
623 break;
624 }
625 if (memp)
626 free(memp, M_IOCTLOPS);
627 out:
628 FILE_UNUSE(fp, p);
629 switch (error) {
630 case -1:
631 printf("sys_ioctl: _IO%s%s('%c', %lu, %lu) returned -1: "
632 "pid=%d comm=%s\n",
633 (com & IOC_IN) ? "W" : "", (com & IOC_OUT) ? "R" : "",
634 (char)IOCGROUP(com), (com & 0xff), IOCPARM_LEN(com),
635 p->p_pid, p->p_comm);
636 /* FALLTHROUGH */
637 case EPASSTHROUGH:
638 error = ENOTTY;
639 /* FALLTHROUGH */
640 default:
641 return (error);
642 }
643 }
644
645 int selwait, nselcoll;
646
647 /*
648 * Select system call.
649 */
650 int
651 sys_select(struct proc *p, void *v, register_t *retval)
652 {
653 struct sys_select_args /* {
654 syscallarg(int) nd;
655 syscallarg(fd_set *) in;
656 syscallarg(fd_set *) ou;
657 syscallarg(fd_set *) ex;
658 syscallarg(struct timeval *) tv;
659 } */ *uap = v;
660 caddr_t bits;
661 char smallbits[howmany(FD_SETSIZE, NFDBITS) *
662 sizeof(fd_mask) * 6];
663 struct timeval atv;
664 int s, ncoll, error, timo;
665 size_t ni;
666
667 error = 0;
668 if (SCARG(uap, nd) < 0)
669 return (EINVAL);
670 if (SCARG(uap, nd) > p->p_fd->fd_nfiles) {
671 /* forgiving; slightly wrong */
672 SCARG(uap, nd) = p->p_fd->fd_nfiles;
673 }
674 ni = howmany(SCARG(uap, nd), NFDBITS) * sizeof(fd_mask);
675 if (ni * 6 > sizeof(smallbits))
676 bits = malloc(ni * 6, M_TEMP, M_WAITOK);
677 else
678 bits = smallbits;
679
680 #define getbits(name, x) \
681 if (SCARG(uap, name)) { \
682 error = copyin(SCARG(uap, name), bits + ni * x, ni); \
683 if (error) \
684 goto done; \
685 } else \
686 memset(bits + ni * x, 0, ni);
687 getbits(in, 0);
688 getbits(ou, 1);
689 getbits(ex, 2);
690 #undef getbits
691
692 if (SCARG(uap, tv)) {
693 error = copyin(SCARG(uap, tv), (caddr_t)&atv,
694 sizeof(atv));
695 if (error)
696 goto done;
697 if (itimerfix(&atv)) {
698 error = EINVAL;
699 goto done;
700 }
701 s = splclock();
702 timeradd(&atv, &time, &atv);
703 splx(s);
704 } else
705 timo = 0;
706 retry:
707 ncoll = nselcoll;
708 p->p_flag |= P_SELECT;
709 error = selscan(p, (fd_mask *)(bits + ni * 0),
710 (fd_mask *)(bits + ni * 3), SCARG(uap, nd), retval);
711 if (error || *retval)
712 goto done;
713 if (SCARG(uap, tv)) {
714 /*
715 * We have to recalculate the timeout on every retry.
716 */
717 timo = hzto(&atv);
718 if (timo <= 0)
719 goto done;
720 }
721 s = splsched();
722 if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) {
723 splx(s);
724 goto retry;
725 }
726 p->p_flag &= ~P_SELECT;
727 error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "select", timo);
728 splx(s);
729 if (error == 0)
730 goto retry;
731 done:
732 p->p_flag &= ~P_SELECT;
733 /* select is not restarted after signals... */
734 if (error == ERESTART)
735 error = EINTR;
736 if (error == EWOULDBLOCK)
737 error = 0;
738 if (error == 0) {
739
740 #define putbits(name, x) \
741 if (SCARG(uap, name)) { \
742 error = copyout(bits + ni * x, SCARG(uap, name), ni); \
743 if (error) \
744 goto out; \
745 }
746 putbits(in, 3);
747 putbits(ou, 4);
748 putbits(ex, 5);
749 #undef putbits
750 }
751 out:
752 if (ni * 6 > sizeof(smallbits))
753 free(bits, M_TEMP);
754 return (error);
755 }
756
757 int
758 selscan(struct proc *p, fd_mask *ibitp, fd_mask *obitp, int nfd,
759 register_t *retval)
760 {
761 struct filedesc *fdp;
762 int msk, i, j, fd, n;
763 fd_mask ibits, obits;
764 struct file *fp;
765 static const int flag[3] = { POLLRDNORM | POLLHUP | POLLERR,
766 POLLWRNORM | POLLHUP | POLLERR,
767 POLLRDBAND };
768
769 fdp = p->p_fd;
770 n = 0;
771 for (msk = 0; msk < 3; msk++) {
772 for (i = 0; i < nfd; i += NFDBITS) {
773 ibits = *ibitp++;
774 obits = 0;
775 while ((j = ffs(ibits)) && (fd = i + --j) < nfd) {
776 ibits &= ~(1 << j);
777 if ((fp = fd_getfile(fdp, fd)) == NULL)
778 return (EBADF);
779 FILE_USE(fp);
780 if ((*fp->f_ops->fo_poll)(fp, flag[msk], p)) {
781 obits |= (1 << j);
782 n++;
783 }
784 FILE_UNUSE(fp, p);
785 }
786 *obitp++ = obits;
787 }
788 }
789 *retval = n;
790 return (0);
791 }
792
793 /*
794 * Poll system call.
795 */
796 int
797 sys_poll(struct proc *p, void *v, register_t *retval)
798 {
799 struct sys_poll_args /* {
800 syscallarg(struct pollfd *) fds;
801 syscallarg(u_int) nfds;
802 syscallarg(int) timeout;
803 } */ *uap = v;
804 caddr_t bits;
805 char smallbits[32 * sizeof(struct pollfd)];
806 struct timeval atv;
807 int s, ncoll, error, timo;
808 size_t ni;
809
810 error = 0;
811 if (SCARG(uap, nfds) > p->p_fd->fd_nfiles) {
812 /* forgiving; slightly wrong */
813 SCARG(uap, nfds) = p->p_fd->fd_nfiles;
814 }
815 ni = SCARG(uap, nfds) * sizeof(struct pollfd);
816 if (ni > sizeof(smallbits))
817 bits = malloc(ni, M_TEMP, M_WAITOK);
818 else
819 bits = smallbits;
820
821 error = copyin(SCARG(uap, fds), bits, ni);
822 if (error)
823 goto done;
824
825 if (SCARG(uap, timeout) != INFTIM) {
826 atv.tv_sec = SCARG(uap, timeout) / 1000;
827 atv.tv_usec = (SCARG(uap, timeout) % 1000) * 1000;
828 if (itimerfix(&atv)) {
829 error = EINVAL;
830 goto done;
831 }
832 s = splclock();
833 timeradd(&atv, &time, &atv);
834 splx(s);
835 } else
836 timo = 0;
837 retry:
838 ncoll = nselcoll;
839 p->p_flag |= P_SELECT;
840 error = pollscan(p, (struct pollfd *)bits, SCARG(uap, nfds), retval);
841 if (error || *retval)
842 goto done;
843 if (SCARG(uap, timeout) != INFTIM) {
844 /*
845 * We have to recalculate the timeout on every retry.
846 */
847 timo = hzto(&atv);
848 if (timo <= 0)
849 goto done;
850 }
851 s = splsched();
852 if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) {
853 splx(s);
854 goto retry;
855 }
856 p->p_flag &= ~P_SELECT;
857 error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "select", timo);
858 splx(s);
859 if (error == 0)
860 goto retry;
861 done:
862 p->p_flag &= ~P_SELECT;
863 /* poll is not restarted after signals... */
864 if (error == ERESTART)
865 error = EINTR;
866 if (error == EWOULDBLOCK)
867 error = 0;
868 if (error == 0) {
869 error = copyout(bits, SCARG(uap, fds), ni);
870 if (error)
871 goto out;
872 }
873 out:
874 if (ni > sizeof(smallbits))
875 free(bits, M_TEMP);
876 return (error);
877 }
878
879 int
880 pollscan(struct proc *p, struct pollfd *fds, int nfd, register_t *retval)
881 {
882 struct filedesc *fdp;
883 int i, n;
884 struct file *fp;
885
886 fdp = p->p_fd;
887 n = 0;
888 for (i = 0; i < nfd; i++, fds++) {
889 if (fds->fd >= fdp->fd_nfiles) {
890 fds->revents = POLLNVAL;
891 n++;
892 } else if (fds->fd < 0) {
893 fds->revents = 0;
894 } else {
895 if ((fp = fd_getfile(fdp, fds->fd)) == NULL) {
896 fds->revents = POLLNVAL;
897 n++;
898 } else {
899 FILE_USE(fp);
900 fds->revents = (*fp->f_ops->fo_poll)(fp,
901 fds->events | POLLERR | POLLHUP, p);
902 if (fds->revents != 0)
903 n++;
904 FILE_UNUSE(fp, p);
905 }
906 }
907 }
908 *retval = n;
909 return (0);
910 }
911
912 /*ARGSUSED*/
913 int
914 seltrue(dev_t dev, int events, struct proc *p)
915 {
916
917 return (events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM));
918 }
919
920 /*
921 * Record a select request.
922 */
923 void
924 selrecord(struct proc *selector, struct selinfo *sip)
925 {
926 struct proc *p;
927 pid_t mypid;
928
929 mypid = selector->p_pid;
930 if (sip->si_pid == mypid)
931 return;
932 if (sip->si_pid && (p = pfind(sip->si_pid)) &&
933 p->p_wchan == (caddr_t)&selwait)
934 sip->si_flags |= SI_COLL;
935 else {
936 sip->si_flags &= ~SI_COLL;
937 sip->si_pid = mypid;
938 }
939 }
940
941 /*
942 * Do a wakeup when a selectable event occurs.
943 */
944 void
945 selwakeup(sip)
946 struct selinfo *sip;
947 {
948 struct proc *p;
949 int s;
950
951 if (sip->si_pid == 0)
952 return;
953 if (sip->si_flags & SI_COLL) {
954 nselcoll++;
955 sip->si_flags &= ~SI_COLL;
956 wakeup((caddr_t)&selwait);
957 }
958 p = pfind(sip->si_pid);
959 sip->si_pid = 0;
960 if (p != NULL) {
961 SCHED_LOCK(s);
962 if (p->p_wchan == (caddr_t)&selwait) {
963 if (p->p_stat == SSLEEP)
964 setrunnable(p);
965 else
966 unsleep(p);
967 } else if (p->p_flag & P_SELECT)
968 p->p_flag &= ~P_SELECT;
969 SCHED_UNLOCK(s);
970 }
971 }
972