sys_generic.c revision 1.60 1 /* $NetBSD: sys_generic.c,v 1.60 2001/11/14 18:43:58 christos Exp $ */
2
3 /*
4 * Copyright (c) 1982, 1986, 1989, 1993
5 * The Regents of the University of California. All rights reserved.
6 * (c) UNIX System Laboratories, Inc.
7 * All or some portions of this file are derived from material licensed
8 * to the University of California by American Telephone and Telegraph
9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
10 * the permission of UNIX System Laboratories, Inc.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. All advertising materials mentioning features or use of this software
21 * must display the following acknowledgement:
22 * This product includes software developed by the University of
23 * California, Berkeley and its contributors.
24 * 4. Neither the name of the University nor the names of its contributors
25 * may be used to endorse or promote products derived from this software
26 * without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
29 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
32 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
33 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
34 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
35 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
37 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38 * SUCH DAMAGE.
39 *
40 * @(#)sys_generic.c 8.9 (Berkeley) 2/14/95
41 */
42
43 #include <sys/cdefs.h>
44 __KERNEL_RCSID(0, "$NetBSD: sys_generic.c,v 1.60 2001/11/14 18:43:58 christos Exp $");
45
46 #include "opt_ktrace.h"
47
48 #include <sys/param.h>
49 #include <sys/systm.h>
50 #include <sys/filedesc.h>
51 #include <sys/ioctl.h>
52 #include <sys/file.h>
53 #include <sys/proc.h>
54 #include <sys/socketvar.h>
55 #include <sys/signalvar.h>
56 #include <sys/uio.h>
57 #include <sys/kernel.h>
58 #include <sys/stat.h>
59 #include <sys/malloc.h>
60 #include <sys/poll.h>
61 #ifdef KTRACE
62 #include <sys/ktrace.h>
63 #endif
64
65 #include <sys/mount.h>
66 #include <sys/syscallargs.h>
67
68 int selscan __P((struct proc *, fd_mask *, fd_mask *, int, register_t *));
69 int pollscan __P((struct proc *, struct pollfd *, int, register_t *));
70
71 /*
72 * Read system call.
73 */
74 /* ARGSUSED */
75 int
76 sys_read(struct proc *p, void *v, register_t *retval)
77 {
78 struct sys_read_args /* {
79 syscallarg(int) fd;
80 syscallarg(void *) buf;
81 syscallarg(size_t) nbyte;
82 } */ *uap = v;
83 int fd;
84 struct file *fp;
85 struct filedesc *fdp;
86
87 fd = SCARG(uap, fd);
88 fdp = p->p_fd;
89
90 if ((fp = fd_getfile(fdp, fd)) == NULL)
91 return (EBADF);
92
93 if ((fp->f_flag & FREAD) == 0)
94 return (EBADF);
95
96 FILE_USE(fp);
97
98 /* dofileread() will unuse the descriptor for us */
99 return (dofileread(p, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
100 &fp->f_offset, FOF_UPDATE_OFFSET, retval));
101 }
102
103 int
104 dofileread(struct proc *p, int fd, struct file *fp, void *buf, size_t nbyte,
105 off_t *offset, int flags, register_t *retval)
106 {
107 struct uio auio;
108 struct iovec aiov;
109 long cnt, error;
110 #ifdef KTRACE
111 struct iovec ktriov;
112 #endif
113 error = 0;
114
115 aiov.iov_base = (caddr_t)buf;
116 aiov.iov_len = nbyte;
117 auio.uio_iov = &aiov;
118 auio.uio_iovcnt = 1;
119 auio.uio_resid = nbyte;
120 auio.uio_rw = UIO_READ;
121 auio.uio_segflg = UIO_USERSPACE;
122 auio.uio_procp = p;
123
124 /*
125 * Reads return ssize_t because -1 is returned on error. Therefore
126 * we must restrict the length to SSIZE_MAX to avoid garbage return
127 * values.
128 */
129 if (auio.uio_resid > SSIZE_MAX) {
130 error = EINVAL;
131 goto out;
132 }
133
134 #ifdef KTRACE
135 /*
136 * if tracing, save a copy of iovec
137 */
138 if (KTRPOINT(p, KTR_GENIO))
139 ktriov = aiov;
140 #endif
141 cnt = auio.uio_resid;
142 error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred, flags);
143 if (error)
144 if (auio.uio_resid != cnt && (error == ERESTART ||
145 error == EINTR || error == EWOULDBLOCK))
146 error = 0;
147 cnt -= auio.uio_resid;
148 #ifdef KTRACE
149 if (KTRPOINT(p, KTR_GENIO) && error == 0)
150 ktrgenio(p, fd, UIO_READ, &ktriov, cnt, error);
151 #endif
152 *retval = cnt;
153 out:
154 FILE_UNUSE(fp, p);
155 return (error);
156 }
157
158 /*
159 * Scatter read system call.
160 */
161 int
162 sys_readv(struct proc *p, void *v, register_t *retval)
163 {
164 struct sys_readv_args /* {
165 syscallarg(int) fd;
166 syscallarg(const struct iovec *) iovp;
167 syscallarg(int) iovcnt;
168 } */ *uap = v;
169 int fd;
170 struct file *fp;
171 struct filedesc *fdp;
172
173 fd = SCARG(uap, fd);
174 fdp = p->p_fd;
175
176 if ((fp = fd_getfile(fdp, fd)) == NULL)
177 return (EBADF);
178
179 if ((fp->f_flag & FREAD) == 0)
180 return (EBADF);
181
182 FILE_USE(fp);
183
184 /* dofilereadv() will unuse the descriptor for us */
185 return (dofilereadv(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt),
186 &fp->f_offset, FOF_UPDATE_OFFSET, retval));
187 }
188
189 int
190 dofilereadv(struct proc *p, int fd, struct file *fp, const struct iovec *iovp,
191 int iovcnt, off_t *offset, int flags, register_t *retval)
192 {
193 struct uio auio;
194 struct iovec *iov, *needfree, aiov[UIO_SMALLIOV];
195 long i, cnt, error;
196 u_int iovlen;
197 #ifdef KTRACE
198 struct iovec *ktriov;
199 #endif
200
201 error = 0;
202 #ifdef KTRACE
203 ktriov = NULL;
204 #endif
205 /* note: can't use iovlen until iovcnt is validated */
206 iovlen = iovcnt * sizeof(struct iovec);
207 if ((u_int)iovcnt > UIO_SMALLIOV) {
208 if ((u_int)iovcnt > IOV_MAX) {
209 error = EINVAL;
210 goto out;
211 }
212 iov = malloc(iovlen, M_IOV, M_WAITOK);
213 needfree = iov;
214 } else if ((u_int)iovcnt > 0) {
215 iov = aiov;
216 needfree = NULL;
217 } else {
218 error = EINVAL;
219 goto out;
220 }
221
222 auio.uio_iov = iov;
223 auio.uio_iovcnt = iovcnt;
224 auio.uio_rw = UIO_READ;
225 auio.uio_segflg = UIO_USERSPACE;
226 auio.uio_procp = p;
227 error = copyin(iovp, iov, iovlen);
228 if (error)
229 goto done;
230 auio.uio_resid = 0;
231 for (i = 0; i < iovcnt; i++) {
232 auio.uio_resid += iov->iov_len;
233 /*
234 * Reads return ssize_t because -1 is returned on error.
235 * Therefore we must restrict the length to SSIZE_MAX to
236 * avoid garbage return values.
237 */
238 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) {
239 error = EINVAL;
240 goto done;
241 }
242 iov++;
243 }
244 #ifdef KTRACE
245 /*
246 * if tracing, save a copy of iovec
247 */
248 if (KTRPOINT(p, KTR_GENIO)) {
249 ktriov = malloc(iovlen, M_TEMP, M_WAITOK);
250 memcpy((caddr_t)ktriov, (caddr_t)auio.uio_iov, iovlen);
251 }
252 #endif
253 cnt = auio.uio_resid;
254 error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred, flags);
255 if (error)
256 if (auio.uio_resid != cnt && (error == ERESTART ||
257 error == EINTR || error == EWOULDBLOCK))
258 error = 0;
259 cnt -= auio.uio_resid;
260 #ifdef KTRACE
261 if (ktriov != NULL) {
262 if (error == 0)
263 ktrgenio(p, fd, UIO_READ, ktriov, cnt, error);
264 free(ktriov, M_TEMP);
265 }
266 #endif
267 *retval = cnt;
268 done:
269 if (needfree)
270 free(needfree, M_IOV);
271 out:
272 FILE_UNUSE(fp, p);
273 return (error);
274 }
275
276 /*
277 * Write system call
278 */
279 int
280 sys_write(struct proc *p, void *v, register_t *retval)
281 {
282 struct sys_write_args /* {
283 syscallarg(int) fd;
284 syscallarg(const void *) buf;
285 syscallarg(size_t) nbyte;
286 } */ *uap = v;
287 int fd;
288 struct file *fp;
289 struct filedesc *fdp;
290
291 fd = SCARG(uap, fd);
292 fdp = p->p_fd;
293
294 if ((fp = fd_getfile(fdp, fd)) == NULL)
295 return (EBADF);
296
297 if ((fp->f_flag & FWRITE) == 0)
298 return (EBADF);
299
300 FILE_USE(fp);
301
302 /* dofilewrite() will unuse the descriptor for us */
303 return (dofilewrite(p, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
304 &fp->f_offset, FOF_UPDATE_OFFSET, retval));
305 }
306
307 int
308 dofilewrite(struct proc *p, int fd, struct file *fp, const void *buf,
309 size_t nbyte, off_t *offset, int flags, register_t *retval)
310 {
311 struct uio auio;
312 struct iovec aiov;
313 long cnt, error;
314 #ifdef KTRACE
315 struct iovec ktriov;
316 #endif
317
318 error = 0;
319 aiov.iov_base = (caddr_t)buf; /* XXX kills const */
320 aiov.iov_len = nbyte;
321 auio.uio_iov = &aiov;
322 auio.uio_iovcnt = 1;
323 auio.uio_resid = nbyte;
324 auio.uio_rw = UIO_WRITE;
325 auio.uio_segflg = UIO_USERSPACE;
326 auio.uio_procp = p;
327
328 /*
329 * Writes return ssize_t because -1 is returned on error. Therefore
330 * we must restrict the length to SSIZE_MAX to avoid garbage return
331 * values.
332 */
333 if (auio.uio_resid > SSIZE_MAX) {
334 error = EINVAL;
335 goto out;
336 }
337
338 #ifdef KTRACE
339 /*
340 * if tracing, save a copy of iovec
341 */
342 if (KTRPOINT(p, KTR_GENIO))
343 ktriov = aiov;
344 #endif
345 cnt = auio.uio_resid;
346 error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred, flags);
347 if (error) {
348 if (auio.uio_resid != cnt && (error == ERESTART ||
349 error == EINTR || error == EWOULDBLOCK))
350 error = 0;
351 if (error == EPIPE)
352 psignal(p, SIGPIPE);
353 }
354 cnt -= auio.uio_resid;
355 #ifdef KTRACE
356 if (KTRPOINT(p, KTR_GENIO) && error == 0)
357 ktrgenio(p, fd, UIO_WRITE, &ktriov, cnt, error);
358 #endif
359 *retval = cnt;
360 out:
361 FILE_UNUSE(fp, p);
362 return (error);
363 }
364
365 /*
366 * Gather write system call
367 */
368 int
369 sys_writev(struct proc *p, void *v, register_t *retval)
370 {
371 struct sys_writev_args /* {
372 syscallarg(int) fd;
373 syscallarg(const struct iovec *) iovp;
374 syscallarg(int) iovcnt;
375 } */ *uap = v;
376 int fd;
377 struct file *fp;
378 struct filedesc *fdp;
379
380 fd = SCARG(uap, fd);
381 fdp = p->p_fd;
382
383 if ((fp = fd_getfile(fdp, fd)) == NULL)
384 return (EBADF);
385
386 if ((fp->f_flag & FWRITE) == 0)
387 return (EBADF);
388
389 FILE_USE(fp);
390
391 /* dofilewritev() will unuse the descriptor for us */
392 return (dofilewritev(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt),
393 &fp->f_offset, FOF_UPDATE_OFFSET, retval));
394 }
395
396 int
397 dofilewritev(struct proc *p, int fd, struct file *fp, const struct iovec *iovp,
398 int iovcnt, off_t *offset, int flags, register_t *retval)
399 {
400 struct uio auio;
401 struct iovec *iov, *needfree, aiov[UIO_SMALLIOV];
402 long i, cnt, error;
403 u_int iovlen;
404 #ifdef KTRACE
405 struct iovec *ktriov;
406 #endif
407
408 error = 0;
409 #ifdef KTRACE
410 ktriov = NULL;
411 #endif
412 /* note: can't use iovlen until iovcnt is validated */
413 iovlen = iovcnt * sizeof(struct iovec);
414 if ((u_int)iovcnt > UIO_SMALLIOV) {
415 if ((u_int)iovcnt > IOV_MAX)
416 return (EINVAL);
417 iov = malloc(iovlen, M_IOV, M_WAITOK);
418 needfree = iov;
419 } else if ((u_int)iovcnt > 0) {
420 iov = aiov;
421 needfree = NULL;
422 } else {
423 error = EINVAL;
424 goto out;
425 }
426
427 auio.uio_iov = iov;
428 auio.uio_iovcnt = iovcnt;
429 auio.uio_rw = UIO_WRITE;
430 auio.uio_segflg = UIO_USERSPACE;
431 auio.uio_procp = p;
432 error = copyin(iovp, iov, iovlen);
433 if (error)
434 goto done;
435 auio.uio_resid = 0;
436 for (i = 0; i < iovcnt; i++) {
437 auio.uio_resid += iov->iov_len;
438 /*
439 * Writes return ssize_t because -1 is returned on error.
440 * Therefore we must restrict the length to SSIZE_MAX to
441 * avoid garbage return values.
442 */
443 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) {
444 error = EINVAL;
445 goto done;
446 }
447 iov++;
448 }
449 #ifdef KTRACE
450 /*
451 * if tracing, save a copy of iovec
452 */
453 if (KTRPOINT(p, KTR_GENIO)) {
454 ktriov = malloc(iovlen, M_TEMP, M_WAITOK);
455 memcpy((caddr_t)ktriov, (caddr_t)auio.uio_iov, iovlen);
456 }
457 #endif
458 cnt = auio.uio_resid;
459 error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred, flags);
460 if (error) {
461 if (auio.uio_resid != cnt && (error == ERESTART ||
462 error == EINTR || error == EWOULDBLOCK))
463 error = 0;
464 if (error == EPIPE)
465 psignal(p, SIGPIPE);
466 }
467 cnt -= auio.uio_resid;
468 #ifdef KTRACE
469 if (KTRPOINT(p, KTR_GENIO))
470 if (error == 0) {
471 ktrgenio(p, fd, UIO_WRITE, ktriov, cnt, error);
472 free(ktriov, M_TEMP);
473 }
474 #endif
475 *retval = cnt;
476 done:
477 if (needfree)
478 free(needfree, M_IOV);
479 out:
480 FILE_UNUSE(fp, p);
481 return (error);
482 }
483
484 /*
485 * Ioctl system call
486 */
487 /* ARGSUSED */
488 int
489 sys_ioctl(struct proc *p, void *v, register_t *retval)
490 {
491 struct sys_ioctl_args /* {
492 syscallarg(int) fd;
493 syscallarg(u_long) com;
494 syscallarg(caddr_t) data;
495 } */ *uap = v;
496 struct file *fp;
497 struct filedesc *fdp;
498 u_long com;
499 int error;
500 u_int size;
501 caddr_t data, memp;
502 int tmp;
503 #define STK_PARAMS 128
504 u_long stkbuf[STK_PARAMS/sizeof(u_long)];
505
506 error = 0;
507 fdp = p->p_fd;
508
509 if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL)
510 return (EBADF);
511
512 FILE_USE(fp);
513
514 if ((fp->f_flag & (FREAD | FWRITE)) == 0) {
515 error = EBADF;
516 goto out;
517 }
518
519 switch (com = SCARG(uap, com)) {
520 case FIONCLEX:
521 fdp->fd_ofileflags[SCARG(uap, fd)] &= ~UF_EXCLOSE;
522 goto out;
523
524 case FIOCLEX:
525 fdp->fd_ofileflags[SCARG(uap, fd)] |= UF_EXCLOSE;
526 goto out;
527 }
528
529 /*
530 * Interpret high order word to find amount of data to be
531 * copied to/from the user's address space.
532 */
533 size = IOCPARM_LEN(com);
534 if (size > IOCPARM_MAX) {
535 error = ENOTTY;
536 goto out;
537 }
538 memp = NULL;
539 if (size > sizeof(stkbuf)) {
540 memp = (caddr_t)malloc((u_long)size, M_IOCTLOPS, M_WAITOK);
541 data = memp;
542 } else
543 data = (caddr_t)stkbuf;
544 if (com&IOC_IN) {
545 if (size) {
546 error = copyin(SCARG(uap, data), data, size);
547 if (error) {
548 if (memp)
549 free(memp, M_IOCTLOPS);
550 goto out;
551 }
552 } else
553 *(caddr_t *)data = SCARG(uap, data);
554 } else if ((com&IOC_OUT) && size)
555 /*
556 * Zero the buffer so the user always
557 * gets back something deterministic.
558 */
559 memset(data, 0, size);
560 else if (com&IOC_VOID)
561 *(caddr_t *)data = SCARG(uap, data);
562
563 switch (com) {
564
565 case FIONBIO:
566 if ((tmp = *(int *)data) != 0)
567 fp->f_flag |= FNONBLOCK;
568 else
569 fp->f_flag &= ~FNONBLOCK;
570 error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p);
571 break;
572
573 case FIOASYNC:
574 if ((tmp = *(int *)data) != 0)
575 fp->f_flag |= FASYNC;
576 else
577 fp->f_flag &= ~FASYNC;
578 error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, (caddr_t)&tmp, p);
579 break;
580
581 case FIOSETOWN:
582 tmp = *(int *)data;
583 if (fp->f_type == DTYPE_SOCKET) {
584 ((struct socket *)fp->f_data)->so_pgid = tmp;
585 error = 0;
586 break;
587 }
588 if (tmp <= 0) {
589 tmp = -tmp;
590 } else {
591 struct proc *p1 = pfind(tmp);
592 if (p1 == 0) {
593 error = ESRCH;
594 break;
595 }
596 tmp = p1->p_pgrp->pg_id;
597 }
598 error = (*fp->f_ops->fo_ioctl)
599 (fp, TIOCSPGRP, (caddr_t)&tmp, p);
600 break;
601
602 case FIOGETOWN:
603 if (fp->f_type == DTYPE_SOCKET) {
604 error = 0;
605 *(int *)data = ((struct socket *)fp->f_data)->so_pgid;
606 break;
607 }
608 error = (*fp->f_ops->fo_ioctl)(fp, TIOCGPGRP, data, p);
609 if (error == 0)
610 *(int *)data = -*(int *)data;
611 break;
612
613 default:
614 error = (*fp->f_ops->fo_ioctl)(fp, com, data, p);
615 /*
616 * Copy any data to user, size was
617 * already set and checked above.
618 */
619 if (error == 0 && (com&IOC_OUT) && size)
620 error = copyout(data, SCARG(uap, data), size);
621 break;
622 }
623 if (memp)
624 free(memp, M_IOCTLOPS);
625 out:
626 FILE_UNUSE(fp, p);
627 return (error);
628 }
629
630 int selwait, nselcoll;
631
632 /*
633 * Select system call.
634 */
635 int
636 sys_select(struct proc *p, void *v, register_t *retval)
637 {
638 struct sys_select_args /* {
639 syscallarg(int) nd;
640 syscallarg(fd_set *) in;
641 syscallarg(fd_set *) ou;
642 syscallarg(fd_set *) ex;
643 syscallarg(struct timeval *) tv;
644 } */ *uap = v;
645 caddr_t bits;
646 char smallbits[howmany(FD_SETSIZE, NFDBITS) *
647 sizeof(fd_mask) * 6];
648 struct timeval atv;
649 int s, ncoll, error, timo;
650 size_t ni;
651
652 error = 0;
653 if (SCARG(uap, nd) < 0)
654 return (EINVAL);
655 if (SCARG(uap, nd) > p->p_fd->fd_nfiles) {
656 /* forgiving; slightly wrong */
657 SCARG(uap, nd) = p->p_fd->fd_nfiles;
658 }
659 ni = howmany(SCARG(uap, nd), NFDBITS) * sizeof(fd_mask);
660 if (ni * 6 > sizeof(smallbits))
661 bits = malloc(ni * 6, M_TEMP, M_WAITOK);
662 else
663 bits = smallbits;
664
665 #define getbits(name, x) \
666 if (SCARG(uap, name)) { \
667 error = copyin(SCARG(uap, name), bits + ni * x, ni); \
668 if (error) \
669 goto done; \
670 } else \
671 memset(bits + ni * x, 0, ni);
672 getbits(in, 0);
673 getbits(ou, 1);
674 getbits(ex, 2);
675 #undef getbits
676
677 if (SCARG(uap, tv)) {
678 error = copyin(SCARG(uap, tv), (caddr_t)&atv,
679 sizeof(atv));
680 if (error)
681 goto done;
682 if (itimerfix(&atv)) {
683 error = EINVAL;
684 goto done;
685 }
686 s = splclock();
687 timeradd(&atv, &time, &atv);
688 splx(s);
689 } else
690 timo = 0;
691 retry:
692 ncoll = nselcoll;
693 p->p_flag |= P_SELECT;
694 error = selscan(p, (fd_mask *)(bits + ni * 0),
695 (fd_mask *)(bits + ni * 3), SCARG(uap, nd), retval);
696 if (error || *retval)
697 goto done;
698 if (SCARG(uap, tv)) {
699 /*
700 * We have to recalculate the timeout on every retry.
701 */
702 timo = hzto(&atv);
703 if (timo <= 0)
704 goto done;
705 }
706 s = splsched();
707 if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) {
708 splx(s);
709 goto retry;
710 }
711 p->p_flag &= ~P_SELECT;
712 error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "select", timo);
713 splx(s);
714 if (error == 0)
715 goto retry;
716 done:
717 p->p_flag &= ~P_SELECT;
718 /* select is not restarted after signals... */
719 if (error == ERESTART)
720 error = EINTR;
721 if (error == EWOULDBLOCK)
722 error = 0;
723 if (error == 0) {
724
725 #define putbits(name, x) \
726 if (SCARG(uap, name)) { \
727 error = copyout(bits + ni * x, SCARG(uap, name), ni); \
728 if (error) \
729 goto out; \
730 }
731 putbits(in, 3);
732 putbits(ou, 4);
733 putbits(ex, 5);
734 #undef putbits
735 }
736 out:
737 if (ni * 6 > sizeof(smallbits))
738 free(bits, M_TEMP);
739 return (error);
740 }
741
742 int
743 selscan(struct proc *p, fd_mask *ibitp, fd_mask *obitp, int nfd,
744 register_t *retval)
745 {
746 struct filedesc *fdp;
747 int msk, i, j, fd, n;
748 fd_mask ibits, obits;
749 struct file *fp;
750 static int flag[3] = { POLLRDNORM | POLLHUP | POLLERR,
751 POLLWRNORM | POLLHUP | POLLERR,
752 POLLRDBAND };
753
754 fdp = p->p_fd;
755 n = 0;
756 for (msk = 0; msk < 3; msk++) {
757 for (i = 0; i < nfd; i += NFDBITS) {
758 ibits = *ibitp++;
759 obits = 0;
760 while ((j = ffs(ibits)) && (fd = i + --j) < nfd) {
761 ibits &= ~(1 << j);
762 if ((fp = fd_getfile(fdp, fd)) == NULL)
763 return (EBADF);
764 FILE_USE(fp);
765 if ((*fp->f_ops->fo_poll)(fp, flag[msk], p)) {
766 obits |= (1 << j);
767 n++;
768 }
769 FILE_UNUSE(fp, p);
770 }
771 *obitp++ = obits;
772 }
773 }
774 *retval = n;
775 return (0);
776 }
777
778 /*
779 * Poll system call.
780 */
781 int
782 sys_poll(struct proc *p, void *v, register_t *retval)
783 {
784 struct sys_poll_args /* {
785 syscallarg(struct pollfd *) fds;
786 syscallarg(u_int) nfds;
787 syscallarg(int) timeout;
788 } */ *uap = v;
789 caddr_t bits;
790 char smallbits[32 * sizeof(struct pollfd)];
791 struct timeval atv;
792 int s, ncoll, error, timo;
793 size_t ni;
794
795 error = 0;
796 if (SCARG(uap, nfds) > p->p_fd->fd_nfiles) {
797 /* forgiving; slightly wrong */
798 SCARG(uap, nfds) = p->p_fd->fd_nfiles;
799 }
800 ni = SCARG(uap, nfds) * sizeof(struct pollfd);
801 if (ni > sizeof(smallbits))
802 bits = malloc(ni, M_TEMP, M_WAITOK);
803 else
804 bits = smallbits;
805
806 error = copyin(SCARG(uap, fds), bits, ni);
807 if (error)
808 goto done;
809
810 if (SCARG(uap, timeout) != INFTIM) {
811 atv.tv_sec = SCARG(uap, timeout) / 1000;
812 atv.tv_usec = (SCARG(uap, timeout) % 1000) * 1000;
813 if (itimerfix(&atv)) {
814 error = EINVAL;
815 goto done;
816 }
817 s = splclock();
818 timeradd(&atv, &time, &atv);
819 splx(s);
820 } else
821 timo = 0;
822 retry:
823 ncoll = nselcoll;
824 p->p_flag |= P_SELECT;
825 error = pollscan(p, (struct pollfd *)bits, SCARG(uap, nfds), retval);
826 if (error || *retval)
827 goto done;
828 if (SCARG(uap, timeout) != INFTIM) {
829 /*
830 * We have to recalculate the timeout on every retry.
831 */
832 timo = hzto(&atv);
833 if (timo <= 0)
834 goto done;
835 }
836 s = splsched();
837 if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) {
838 splx(s);
839 goto retry;
840 }
841 p->p_flag &= ~P_SELECT;
842 error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "select", timo);
843 splx(s);
844 if (error == 0)
845 goto retry;
846 done:
847 p->p_flag &= ~P_SELECT;
848 /* poll is not restarted after signals... */
849 if (error == ERESTART)
850 error = EINTR;
851 if (error == EWOULDBLOCK)
852 error = 0;
853 if (error == 0) {
854 error = copyout(bits, SCARG(uap, fds), ni);
855 if (error)
856 goto out;
857 }
858 out:
859 if (ni > sizeof(smallbits))
860 free(bits, M_TEMP);
861 return (error);
862 }
863
864 int
865 pollscan(struct proc *p, struct pollfd *fds, int nfd, register_t *retval)
866 {
867 struct filedesc *fdp;
868 int i, n;
869 struct file *fp;
870
871 fdp = p->p_fd;
872 n = 0;
873 for (i = 0; i < nfd; i++, fds++) {
874 if (fds->fd >= fdp->fd_nfiles) {
875 fds->revents = POLLNVAL;
876 n++;
877 } else if (fds->fd < 0) {
878 fds->revents = 0;
879 } else {
880 if ((fp = fd_getfile(fdp, fds->fd)) == NULL) {
881 fds->revents = POLLNVAL;
882 n++;
883 } else {
884 FILE_USE(fp);
885 fds->revents = (*fp->f_ops->fo_poll)(fp,
886 fds->events | POLLERR | POLLHUP, p);
887 if (fds->revents != 0)
888 n++;
889 FILE_UNUSE(fp, p);
890 }
891 }
892 }
893 *retval = n;
894 return (0);
895 }
896
897 /*ARGSUSED*/
898 int
899 seltrue(dev_t dev, int events, struct proc *p)
900 {
901
902 return (events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM));
903 }
904
905 /*
906 * Record a select request.
907 */
908 void
909 selrecord(struct proc *selector, struct selinfo *sip)
910 {
911 struct proc *p;
912 pid_t mypid;
913
914 mypid = selector->p_pid;
915 if (sip->si_pid == mypid)
916 return;
917 if (sip->si_pid && (p = pfind(sip->si_pid)) &&
918 p->p_wchan == (caddr_t)&selwait)
919 sip->si_flags |= SI_COLL;
920 else {
921 sip->si_flags &= ~SI_COLL;
922 sip->si_pid = mypid;
923 }
924 }
925
926 /*
927 * Do a wakeup when a selectable event occurs.
928 */
929 void
930 selwakeup(sip)
931 struct selinfo *sip;
932 {
933 struct proc *p;
934 int s;
935
936 if (sip->si_pid == 0)
937 return;
938 if (sip->si_flags & SI_COLL) {
939 nselcoll++;
940 sip->si_flags &= ~SI_COLL;
941 wakeup((caddr_t)&selwait);
942 }
943 p = pfind(sip->si_pid);
944 sip->si_pid = 0;
945 if (p != NULL) {
946 SCHED_LOCK(s);
947 if (p->p_wchan == (caddr_t)&selwait) {
948 if (p->p_stat == SSLEEP)
949 setrunnable(p);
950 else
951 unsleep(p);
952 } else if (p->p_flag & P_SELECT)
953 p->p_flag &= ~P_SELECT;
954 SCHED_UNLOCK(s);
955 }
956 }
957