sys_generic.c revision 1.44 1 /* $NetBSD: sys_generic.c,v 1.44 1998/08/04 04:03:15 perry Exp $ */
2
3 /*
4 * Copyright (c) 1982, 1986, 1989, 1993
5 * The Regents of the University of California. All rights reserved.
6 * (c) UNIX System Laboratories, Inc.
7 * All or some portions of this file are derived from material licensed
8 * to the University of California by American Telephone and Telegraph
9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
10 * the permission of UNIX System Laboratories, Inc.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. All advertising materials mentioning features or use of this software
21 * must display the following acknowledgement:
22 * This product includes software developed by the University of
23 * California, Berkeley and its contributors.
24 * 4. Neither the name of the University nor the names of its contributors
25 * may be used to endorse or promote products derived from this software
26 * without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
29 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
32 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
33 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
34 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
35 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
37 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38 * SUCH DAMAGE.
39 *
40 * @(#)sys_generic.c 8.9 (Berkeley) 2/14/95
41 */
42
43 #include "opt_ktrace.h"
44
45 #include <sys/param.h>
46 #include <sys/systm.h>
47 #include <sys/filedesc.h>
48 #include <sys/ioctl.h>
49 #include <sys/file.h>
50 #include <sys/proc.h>
51 #include <sys/socketvar.h>
52 #include <sys/signalvar.h>
53 #include <sys/uio.h>
54 #include <sys/kernel.h>
55 #include <sys/stat.h>
56 #include <sys/malloc.h>
57 #include <sys/poll.h>
58 #ifdef KTRACE
59 #include <sys/ktrace.h>
60 #endif
61
62 #include <sys/mount.h>
63 #include <sys/syscallargs.h>
64
65 int selscan __P((struct proc *, fd_mask *, fd_mask *, int, register_t *));
66 int pollscan __P((struct proc *, struct pollfd *, int, register_t *));
67
68 /*
69 * Read system call.
70 */
71 /* ARGSUSED */
72 int
73 sys_read(p, v, retval)
74 struct proc *p;
75 void *v;
76 register_t *retval;
77 {
78 register struct sys_read_args /* {
79 syscallarg(int) fd;
80 syscallarg(void *) buf;
81 syscallarg(size_t) nbyte;
82 } */ *uap = v;
83 int fd = SCARG(uap, fd);
84 register struct file *fp;
85 register struct filedesc *fdp = p->p_fd;
86
87 if ((u_int)fd >= fdp->fd_nfiles ||
88 (fp = fdp->fd_ofiles[fd]) == NULL ||
89 (fp->f_flag & FREAD) == 0)
90 return (EBADF);
91
92 return (dofileread(p, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
93 &fp->f_offset, FOF_UPDATE_OFFSET, retval));
94 }
95
96 int
97 dofileread(p, fd, fp, buf, nbyte, offset, flags, retval)
98 struct proc *p;
99 int fd;
100 struct file *fp;
101 void *buf;
102 size_t nbyte;
103 off_t *offset;
104 int flags;
105 register_t *retval;
106 {
107 struct uio auio;
108 struct iovec aiov;
109 long cnt, error = 0;
110 #ifdef KTRACE
111 struct iovec ktriov;
112 #endif
113
114 aiov.iov_base = (caddr_t)buf;
115 aiov.iov_len = nbyte;
116 auio.uio_iov = &aiov;
117 auio.uio_iovcnt = 1;
118 auio.uio_resid = nbyte;
119 auio.uio_rw = UIO_READ;
120 auio.uio_segflg = UIO_USERSPACE;
121 auio.uio_procp = p;
122
123 /*
124 * Reads return ssize_t because -1 is returned on error. Therefore
125 * we must restrict the length to SSIZE_MAX to avoid garbage return
126 * values.
127 */
128 if (auio.uio_resid > SSIZE_MAX)
129 return (EINVAL);
130
131 #ifdef KTRACE
132 /*
133 * if tracing, save a copy of iovec
134 */
135 if (KTRPOINT(p, KTR_GENIO))
136 ktriov = aiov;
137 #endif
138 cnt = auio.uio_resid;
139 error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred, flags);
140 if (error)
141 if (auio.uio_resid != cnt && (error == ERESTART ||
142 error == EINTR || error == EWOULDBLOCK))
143 error = 0;
144 cnt -= auio.uio_resid;
145 #ifdef KTRACE
146 if (KTRPOINT(p, KTR_GENIO) && error == 0)
147 ktrgenio(p->p_tracep, fd, UIO_READ, &ktriov, cnt, error);
148 #endif
149 *retval = cnt;
150 return (error);
151 }
152
153 /*
154 * Scatter read system call.
155 */
156 int
157 sys_readv(p, v, retval)
158 struct proc *p;
159 void *v;
160 register_t *retval;
161 {
162 register struct sys_readv_args /* {
163 syscallarg(int) fd;
164 syscallarg(const struct iovec *) iovp;
165 syscallarg(int) iovcnt;
166 } */ *uap = v;
167 int fd = SCARG(uap, fd);
168 register struct file *fp;
169 register struct filedesc *fdp = p->p_fd;
170
171 if ((u_int)fd >= fdp->fd_nfiles ||
172 (fp = fdp->fd_ofiles[fd]) == NULL ||
173 (fp->f_flag & FREAD) == 0)
174 return (EBADF);
175
176 return (dofilereadv(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt),
177 &fp->f_offset, FOF_UPDATE_OFFSET, retval));
178 }
179
180 int
181 dofilereadv(p, fd, fp, iovp, iovcnt, offset, flags, retval)
182 struct proc *p;
183 int fd;
184 struct file *fp;
185 const struct iovec *iovp;
186 int iovcnt;
187 off_t *offset;
188 int flags;
189 register_t *retval;
190 {
191 struct uio auio;
192 register struct iovec *iov;
193 struct iovec *needfree;
194 struct iovec aiov[UIO_SMALLIOV];
195 long i, cnt, error = 0;
196 u_int iovlen;
197 #ifdef KTRACE
198 struct iovec *ktriov = NULL;
199 #endif
200
201 /* note: can't use iovlen until iovcnt is validated */
202 iovlen = iovcnt * sizeof(struct iovec);
203 if ((u_int)iovcnt > UIO_SMALLIOV) {
204 if ((u_int)iovcnt > IOV_MAX)
205 return (EINVAL);
206 MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK);
207 needfree = iov;
208 } else if ((u_int)iovcnt > 0) {
209 iov = aiov;
210 needfree = NULL;
211 } else
212 return (EINVAL);
213
214 auio.uio_iov = iov;
215 auio.uio_iovcnt = iovcnt;
216 auio.uio_rw = UIO_READ;
217 auio.uio_segflg = UIO_USERSPACE;
218 auio.uio_procp = p;
219 error = copyin(iovp, iov, iovlen);
220 if (error)
221 goto done;
222 auio.uio_resid = 0;
223 for (i = 0; i < iovcnt; i++) {
224 auio.uio_resid += iov->iov_len;
225 /*
226 * Reads return ssize_t because -1 is returned on error.
227 * Therefore we must restrict the length to SSIZE_MAX to
228 * avoid garbage return values.
229 */
230 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) {
231 error = EINVAL;
232 goto done;
233 }
234 iov++;
235 }
236 #ifdef KTRACE
237 /*
238 * if tracing, save a copy of iovec
239 */
240 if (KTRPOINT(p, KTR_GENIO)) {
241 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
242 memcpy((caddr_t)ktriov, (caddr_t)auio.uio_iov, iovlen);
243 }
244 #endif
245 cnt = auio.uio_resid;
246 error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred, flags);
247 if (error)
248 if (auio.uio_resid != cnt && (error == ERESTART ||
249 error == EINTR || error == EWOULDBLOCK))
250 error = 0;
251 cnt -= auio.uio_resid;
252 #ifdef KTRACE
253 if (KTRPOINT(p, KTR_GENIO))
254 if (error == 0) {
255 ktrgenio(p->p_tracep, fd, UIO_READ, ktriov, cnt,
256 error);
257 FREE(ktriov, M_TEMP);
258 }
259 #endif
260 *retval = cnt;
261 done:
262 if (needfree)
263 FREE(needfree, M_IOV);
264 return (error);
265 }
266
267 /*
268 * Write system call
269 */
270 int
271 sys_write(p, v, retval)
272 struct proc *p;
273 void *v;
274 register_t *retval;
275 {
276 register struct sys_write_args /* {
277 syscallarg(int) fd;
278 syscallarg(const void *) buf;
279 syscallarg(size_t) nbyte;
280 } */ *uap = v;
281 int fd = SCARG(uap, fd);
282 register struct file *fp;
283 register struct filedesc *fdp = p->p_fd;
284
285 if ((u_int)fd >= fdp->fd_nfiles ||
286 (fp = fdp->fd_ofiles[fd]) == NULL ||
287 (fp->f_flag & FWRITE) == 0)
288 return (EBADF);
289
290 return (dofilewrite(p, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
291 &fp->f_offset, FOF_UPDATE_OFFSET, retval));
292 }
293
294 int
295 dofilewrite(p, fd, fp, buf, nbyte, offset, flags, retval)
296 struct proc *p;
297 int fd;
298 struct file *fp;
299 const void *buf;
300 size_t nbyte;
301 off_t *offset;
302 int flags;
303 register_t *retval;
304 {
305 struct uio auio;
306 struct iovec aiov;
307 long cnt, error = 0;
308 #ifdef KTRACE
309 struct iovec ktriov;
310 #endif
311
312 aiov.iov_base = (caddr_t)buf; /* XXX kills const */
313 aiov.iov_len = nbyte;
314 auio.uio_iov = &aiov;
315 auio.uio_iovcnt = 1;
316 auio.uio_resid = nbyte;
317 auio.uio_rw = UIO_WRITE;
318 auio.uio_segflg = UIO_USERSPACE;
319 auio.uio_procp = p;
320
321 /*
322 * Writes return ssize_t because -1 is returned on error. Therefore
323 * we must restrict the length to SSIZE_MAX to avoid garbage return
324 * values.
325 */
326 if (auio.uio_resid > SSIZE_MAX)
327 return (EINVAL);
328
329 #ifdef KTRACE
330 /*
331 * if tracing, save a copy of iovec
332 */
333 if (KTRPOINT(p, KTR_GENIO))
334 ktriov = aiov;
335 #endif
336 cnt = auio.uio_resid;
337 error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred, flags);
338 if (error) {
339 if (auio.uio_resid != cnt && (error == ERESTART ||
340 error == EINTR || error == EWOULDBLOCK))
341 error = 0;
342 if (error == EPIPE)
343 psignal(p, SIGPIPE);
344 }
345 cnt -= auio.uio_resid;
346 #ifdef KTRACE
347 if (KTRPOINT(p, KTR_GENIO) && error == 0)
348 ktrgenio(p->p_tracep, fd, UIO_WRITE, &ktriov, cnt, error);
349 #endif
350 *retval = cnt;
351 return (error);
352 }
353
354 /*
355 * Gather write system call
356 */
357 int
358 sys_writev(p, v, retval)
359 struct proc *p;
360 void *v;
361 register_t *retval;
362 {
363 register struct sys_writev_args /* {
364 syscallarg(int) fd;
365 syscallarg(const struct iovec *) iovp;
366 syscallarg(int) iovcnt;
367 } */ *uap = v;
368 int fd = SCARG(uap, fd);
369 register struct file *fp;
370 register struct filedesc *fdp = p->p_fd;
371
372 if ((u_int)fd >= fdp->fd_nfiles ||
373 (fp = fdp->fd_ofiles[fd]) == NULL ||
374 (fp->f_flag & FWRITE) == 0)
375 return (EBADF);
376
377 return (dofilewritev(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt),
378 &fp->f_offset, FOF_UPDATE_OFFSET, retval));
379 }
380
381 int
382 dofilewritev(p, fd, fp, iovp, iovcnt, offset, flags, retval)
383 struct proc *p;
384 int fd;
385 struct file *fp;
386 const struct iovec *iovp;
387 int iovcnt;
388 off_t *offset;
389 int flags;
390 register_t *retval;
391 {
392 struct uio auio;
393 register struct iovec *iov;
394 struct iovec *needfree;
395 struct iovec aiov[UIO_SMALLIOV];
396 long i, cnt, error = 0;
397 u_int iovlen;
398 #ifdef KTRACE
399 struct iovec *ktriov = NULL;
400 #endif
401
402 /* note: can't use iovlen until iovcnt is validated */
403 iovlen = iovcnt * sizeof(struct iovec);
404 if ((u_int)iovcnt > UIO_SMALLIOV) {
405 if ((u_int)iovcnt > IOV_MAX)
406 return (EINVAL);
407 MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK);
408 needfree = iov;
409 } else if ((u_int)iovcnt > 0) {
410 iov = aiov;
411 needfree = NULL;
412 } else
413 return (EINVAL);
414
415 auio.uio_iov = iov;
416 auio.uio_iovcnt = iovcnt;
417 auio.uio_rw = UIO_WRITE;
418 auio.uio_segflg = UIO_USERSPACE;
419 auio.uio_procp = p;
420 error = copyin(iovp, iov, iovlen);
421 if (error)
422 goto done;
423 auio.uio_resid = 0;
424 for (i = 0; i < iovcnt; i++) {
425 auio.uio_resid += iov->iov_len;
426 /*
427 * Writes return ssize_t because -1 is returned on error.
428 * Therefore we must restrict the length to SSIZE_MAX to
429 * avoid garbage return values.
430 */
431 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) {
432 error = EINVAL;
433 goto done;
434 }
435 iov++;
436 }
437 #ifdef KTRACE
438 /*
439 * if tracing, save a copy of iovec
440 */
441 if (KTRPOINT(p, KTR_GENIO)) {
442 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
443 memcpy((caddr_t)ktriov, (caddr_t)auio.uio_iov, iovlen);
444 }
445 #endif
446 cnt = auio.uio_resid;
447 error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred, flags);
448 if (error) {
449 if (auio.uio_resid != cnt && (error == ERESTART ||
450 error == EINTR || error == EWOULDBLOCK))
451 error = 0;
452 if (error == EPIPE)
453 psignal(p, SIGPIPE);
454 }
455 cnt -= auio.uio_resid;
456 #ifdef KTRACE
457 if (KTRPOINT(p, KTR_GENIO))
458 if (error == 0) {
459 ktrgenio(p->p_tracep, fd, UIO_WRITE, ktriov, cnt,
460 error);
461 FREE(ktriov, M_TEMP);
462 }
463 #endif
464 *retval = cnt;
465 done:
466 if (needfree)
467 FREE(needfree, M_IOV);
468 return (error);
469 }
470
471 /*
472 * Ioctl system call
473 */
474 /* ARGSUSED */
475 int
476 sys_ioctl(p, v, retval)
477 struct proc *p;
478 void *v;
479 register_t *retval;
480 {
481 register struct sys_ioctl_args /* {
482 syscallarg(int) fd;
483 syscallarg(u_long) com;
484 syscallarg(caddr_t) data;
485 } */ *uap = v;
486 register struct file *fp;
487 register struct filedesc *fdp;
488 register u_long com;
489 register int error;
490 register u_int size;
491 caddr_t data, memp;
492 int tmp;
493 #define STK_PARAMS 128
494 char stkbuf[STK_PARAMS];
495
496 fdp = p->p_fd;
497 if ((u_int)SCARG(uap, fd) >= fdp->fd_nfiles ||
498 (fp = fdp->fd_ofiles[SCARG(uap, fd)]) == NULL)
499 return (EBADF);
500
501 if ((fp->f_flag & (FREAD | FWRITE)) == 0)
502 return (EBADF);
503
504 switch (com = SCARG(uap, com)) {
505 case FIONCLEX:
506 fdp->fd_ofileflags[SCARG(uap, fd)] &= ~UF_EXCLOSE;
507 return (0);
508 case FIOCLEX:
509 fdp->fd_ofileflags[SCARG(uap, fd)] |= UF_EXCLOSE;
510 return (0);
511 }
512
513 /*
514 * Interpret high order word to find amount of data to be
515 * copied to/from the user's address space.
516 */
517 size = IOCPARM_LEN(com);
518 if (size > IOCPARM_MAX)
519 return (ENOTTY);
520 memp = NULL;
521 if (size > sizeof(stkbuf)) {
522 memp = (caddr_t)malloc((u_long)size, M_IOCTLOPS, M_WAITOK);
523 data = memp;
524 } else
525 data = stkbuf;
526 if (com&IOC_IN) {
527 if (size) {
528 error = copyin(SCARG(uap, data), data, size);
529 if (error) {
530 if (memp)
531 free(memp, M_IOCTLOPS);
532 return (error);
533 }
534 } else
535 *(caddr_t *)data = SCARG(uap, data);
536 } else if ((com&IOC_OUT) && size)
537 /*
538 * Zero the buffer so the user always
539 * gets back something deterministic.
540 */
541 memset(data, 0, size);
542 else if (com&IOC_VOID)
543 *(caddr_t *)data = SCARG(uap, data);
544
545 switch (com) {
546
547 case FIONBIO:
548 if ((tmp = *(int *)data) != 0)
549 fp->f_flag |= FNONBLOCK;
550 else
551 fp->f_flag &= ~FNONBLOCK;
552 error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p);
553 break;
554
555 case FIOASYNC:
556 if ((tmp = *(int *)data) != 0)
557 fp->f_flag |= FASYNC;
558 else
559 fp->f_flag &= ~FASYNC;
560 error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, (caddr_t)&tmp, p);
561 break;
562
563 case FIOSETOWN:
564 tmp = *(int *)data;
565 if (fp->f_type == DTYPE_SOCKET) {
566 ((struct socket *)fp->f_data)->so_pgid = tmp;
567 error = 0;
568 break;
569 }
570 if (tmp <= 0) {
571 tmp = -tmp;
572 } else {
573 struct proc *p1 = pfind(tmp);
574 if (p1 == 0) {
575 error = ESRCH;
576 break;
577 }
578 tmp = p1->p_pgrp->pg_id;
579 }
580 error = (*fp->f_ops->fo_ioctl)
581 (fp, TIOCSPGRP, (caddr_t)&tmp, p);
582 break;
583
584 case FIOGETOWN:
585 if (fp->f_type == DTYPE_SOCKET) {
586 error = 0;
587 *(int *)data = ((struct socket *)fp->f_data)->so_pgid;
588 break;
589 }
590 error = (*fp->f_ops->fo_ioctl)(fp, TIOCGPGRP, data, p);
591 *(int *)data = -*(int *)data;
592 break;
593
594 default:
595 error = (*fp->f_ops->fo_ioctl)(fp, com, data, p);
596 /*
597 * Copy any data to user, size was
598 * already set and checked above.
599 */
600 if (error == 0 && (com&IOC_OUT) && size)
601 error = copyout(data, SCARG(uap, data), size);
602 break;
603 }
604 if (memp)
605 free(memp, M_IOCTLOPS);
606 return (error);
607 }
608
609 int selwait, nselcoll;
610
611 /*
612 * Select system call.
613 */
614 int
615 sys_select(p, v, retval)
616 register struct proc *p;
617 void *v;
618 register_t *retval;
619 {
620 register struct sys_select_args /* {
621 syscallarg(int) nd;
622 syscallarg(fd_set *) in;
623 syscallarg(fd_set *) ou;
624 syscallarg(fd_set *) ex;
625 syscallarg(struct timeval *) tv;
626 } */ *uap = v;
627 caddr_t bits;
628 char smallbits[howmany(FD_SETSIZE, NFDBITS) * sizeof(fd_mask) * 6];
629 struct timeval atv;
630 int s, ncoll, error = 0, timo;
631 size_t ni;
632
633 if (SCARG(uap, nd) < 0)
634 return (EINVAL);
635 if (SCARG(uap, nd) > p->p_fd->fd_nfiles) {
636 /* forgiving; slightly wrong */
637 SCARG(uap, nd) = p->p_fd->fd_nfiles;
638 }
639 ni = howmany(SCARG(uap, nd), NFDBITS) * sizeof(fd_mask);
640 if (ni * 6 > sizeof(smallbits))
641 bits = malloc(ni * 6, M_TEMP, M_WAITOK);
642 else
643 bits = smallbits;
644
645 #define getbits(name, x) \
646 if (SCARG(uap, name)) { \
647 error = copyin(SCARG(uap, name), bits + ni * x, ni); \
648 if (error) \
649 goto done; \
650 } else \
651 memset(bits + ni * x, 0, ni);
652 getbits(in, 0);
653 getbits(ou, 1);
654 getbits(ex, 2);
655 #undef getbits
656
657 if (SCARG(uap, tv)) {
658 error = copyin(SCARG(uap, tv), (caddr_t)&atv,
659 sizeof(atv));
660 if (error)
661 goto done;
662 if (itimerfix(&atv)) {
663 error = EINVAL;
664 goto done;
665 }
666 s = splclock();
667 timeradd(&atv, &time, &atv);
668 timo = hzto(&atv);
669 /*
670 * Avoid inadvertently sleeping forever.
671 */
672 if (timo == 0)
673 timo = 1;
674 splx(s);
675 } else
676 timo = 0;
677 retry:
678 ncoll = nselcoll;
679 p->p_flag |= P_SELECT;
680 error = selscan(p, (fd_mask *)(bits + ni * 0),
681 (fd_mask *)(bits + ni * 3), SCARG(uap, nd), retval);
682 if (error || *retval)
683 goto done;
684 s = splhigh();
685 if (timo && timercmp(&time, &atv, >=)) {
686 splx(s);
687 goto done;
688 }
689 if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) {
690 splx(s);
691 goto retry;
692 }
693 p->p_flag &= ~P_SELECT;
694 error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "select", timo);
695 splx(s);
696 if (error == 0)
697 goto retry;
698 done:
699 p->p_flag &= ~P_SELECT;
700 /* select is not restarted after signals... */
701 if (error == ERESTART)
702 error = EINTR;
703 if (error == EWOULDBLOCK)
704 error = 0;
705 if (error == 0) {
706 #define putbits(name, x) \
707 if (SCARG(uap, name)) { \
708 error = copyout(bits + ni * x, SCARG(uap, name), ni); \
709 if (error) \
710 goto out; \
711 }
712 putbits(in, 3);
713 putbits(ou, 4);
714 putbits(ex, 5);
715 #undef putbits
716 }
717 out:
718 if (ni * 6 > sizeof(smallbits))
719 free(bits, M_TEMP);
720 return (error);
721 }
722
723 int
724 selscan(p, ibitp, obitp, nfd, retval)
725 struct proc *p;
726 fd_mask *ibitp, *obitp;
727 int nfd;
728 register_t *retval;
729 {
730 register struct filedesc *fdp = p->p_fd;
731 register int msk, i, j, fd;
732 register fd_mask ibits, obits;
733 struct file *fp;
734 int n = 0;
735 static int flag[3] = { POLLRDNORM | POLLHUP | POLLERR,
736 POLLWRNORM | POLLHUP | POLLERR,
737 POLLRDBAND };
738
739 for (msk = 0; msk < 3; msk++) {
740 for (i = 0; i < nfd; i += NFDBITS) {
741 ibits = *ibitp++;
742 obits = 0;
743 while ((j = ffs(ibits)) && (fd = i + --j) < nfd) {
744 ibits &= ~(1 << j);
745 fp = fdp->fd_ofiles[fd];
746 if (fp == NULL)
747 return (EBADF);
748 if ((*fp->f_ops->fo_poll)(fp, flag[msk], p)) {
749 obits |= (1 << j);
750 n++;
751 }
752 }
753 *obitp++ = obits;
754 }
755 }
756 *retval = n;
757 return (0);
758 }
759
760 /*
761 * Poll system call.
762 */
763 int
764 sys_poll(p, v, retval)
765 register struct proc *p;
766 void *v;
767 register_t *retval;
768 {
769 register struct sys_poll_args /* {
770 syscallarg(struct pollfd *) fds;
771 syscallarg(u_int) nfds;
772 syscallarg(int) timeout;
773 } */ *uap = v;
774 caddr_t bits;
775 char smallbits[32 * sizeof(struct pollfd)];
776 struct timeval atv;
777 int s, ncoll, error = 0, timo;
778 size_t ni;
779
780 if (SCARG(uap, nfds) > p->p_fd->fd_nfiles) {
781 /* forgiving; slightly wrong */
782 SCARG(uap, nfds) = p->p_fd->fd_nfiles;
783 }
784 ni = SCARG(uap, nfds) * sizeof(struct pollfd);
785 if (ni > sizeof(smallbits))
786 bits = malloc(ni, M_TEMP, M_WAITOK);
787 else
788 bits = smallbits;
789
790 error = copyin(SCARG(uap, fds), bits, ni);
791 if (error)
792 goto done;
793
794 if (SCARG(uap, timeout) != INFTIM) {
795 atv.tv_sec = SCARG(uap, timeout) / 1000;
796 atv.tv_usec = (SCARG(uap, timeout) % 1000) * 1000;
797 if (itimerfix(&atv)) {
798 error = EINVAL;
799 goto done;
800 }
801 s = splclock();
802 timeradd(&atv, &time, &atv);
803 timo = hzto(&atv);
804 /*
805 * Avoid inadvertently sleeping forever.
806 */
807 if (timo == 0)
808 timo = 1;
809 splx(s);
810 } else
811 timo = 0;
812 retry:
813 ncoll = nselcoll;
814 p->p_flag |= P_SELECT;
815 error = pollscan(p, (struct pollfd *)bits, SCARG(uap, nfds), retval);
816 if (error || *retval)
817 goto done;
818 s = splhigh();
819 if (timo && timercmp(&time, &atv, >=)) {
820 splx(s);
821 goto done;
822 }
823 if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) {
824 splx(s);
825 goto retry;
826 }
827 p->p_flag &= ~P_SELECT;
828 error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "select", timo);
829 splx(s);
830 if (error == 0)
831 goto retry;
832 done:
833 p->p_flag &= ~P_SELECT;
834 /* poll is not restarted after signals... */
835 if (error == ERESTART)
836 error = EINTR;
837 if (error == EWOULDBLOCK)
838 error = 0;
839 if (error == 0) {
840 error = copyout(bits, SCARG(uap, fds), ni);
841 if (error)
842 goto out;
843 }
844 out:
845 if (ni > sizeof(smallbits))
846 free(bits, M_TEMP);
847 return (error);
848 }
849
850 int
851 pollscan(p, fds, nfd, retval)
852 struct proc *p;
853 struct pollfd *fds;
854 int nfd;
855 register_t *retval;
856 {
857 register struct filedesc *fdp = p->p_fd;
858 int i;
859 struct file *fp;
860 int n = 0;
861
862 for (i = 0; i < nfd; i++, fds++) {
863 if ((u_int)fds->fd >= fdp->fd_nfiles) {
864 fds->revents = POLLNVAL;
865 n++;
866 } else {
867 fp = fdp->fd_ofiles[fds->fd];
868 if (fp == 0) {
869 fds->revents = POLLNVAL;
870 n++;
871 } else {
872 fds->revents = (*fp->f_ops->fo_poll)(fp,
873 fds->events | POLLERR | POLLHUP, p);
874 if (fds->revents != 0)
875 n++;
876 }
877 }
878 }
879 *retval = n;
880 return (0);
881 }
882
883 /*ARGSUSED*/
884 int
885 seltrue(dev, events, p)
886 dev_t dev;
887 int events;
888 struct proc *p;
889 {
890
891 return (events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM));
892 }
893
894 /*
895 * Record a select request.
896 */
897 void
898 selrecord(selector, sip)
899 struct proc *selector;
900 struct selinfo *sip;
901 {
902 struct proc *p;
903 pid_t mypid;
904
905 mypid = selector->p_pid;
906 if (sip->si_pid == mypid)
907 return;
908 if (sip->si_pid && (p = pfind(sip->si_pid)) &&
909 p->p_wchan == (caddr_t)&selwait)
910 sip->si_flags |= SI_COLL;
911 else
912 sip->si_pid = mypid;
913 }
914
915 /*
916 * Do a wakeup when a selectable event occurs.
917 */
918 void
919 selwakeup(sip)
920 register struct selinfo *sip;
921 {
922 register struct proc *p;
923 int s;
924
925 if (sip->si_pid == 0)
926 return;
927 if (sip->si_flags & SI_COLL) {
928 nselcoll++;
929 sip->si_flags &= ~SI_COLL;
930 wakeup((caddr_t)&selwait);
931 }
932 p = pfind(sip->si_pid);
933 sip->si_pid = 0;
934 if (p != NULL) {
935 s = splhigh();
936 if (p->p_wchan == (caddr_t)&selwait) {
937 if (p->p_stat == SSLEEP)
938 setrunnable(p);
939 else
940 unsleep(p);
941 } else if (p->p_flag & P_SELECT)
942 p->p_flag &= ~P_SELECT;
943 splx(s);
944 }
945 }
946