sys_generic.c revision 1.54.2.2 1 /* $NetBSD: sys_generic.c,v 1.54.2.2 2001/06/21 20:07:01 nathanw Exp $ */
2
3 /*
4 * Copyright (c) 1982, 1986, 1989, 1993
5 * The Regents of the University of California. All rights reserved.
6 * (c) UNIX System Laboratories, Inc.
7 * All or some portions of this file are derived from material licensed
8 * to the University of California by American Telephone and Telegraph
9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
10 * the permission of UNIX System Laboratories, Inc.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. All advertising materials mentioning features or use of this software
21 * must display the following acknowledgement:
22 * This product includes software developed by the University of
23 * California, Berkeley and its contributors.
24 * 4. Neither the name of the University nor the names of its contributors
25 * may be used to endorse or promote products derived from this software
26 * without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
29 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
32 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
33 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
34 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
35 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
37 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38 * SUCH DAMAGE.
39 *
40 * @(#)sys_generic.c 8.9 (Berkeley) 2/14/95
41 */
42
43 #include "opt_ktrace.h"
44
45 #include <sys/param.h>
46 #include <sys/systm.h>
47 #include <sys/filedesc.h>
48 #include <sys/ioctl.h>
49 #include <sys/file.h>
50 #include <sys/lwp.h>
51 #include <sys/proc.h>
52 #include <sys/socketvar.h>
53 #include <sys/signalvar.h>
54 #include <sys/uio.h>
55 #include <sys/kernel.h>
56 #include <sys/stat.h>
57 #include <sys/malloc.h>
58 #include <sys/poll.h>
59 #ifdef KTRACE
60 #include <sys/ktrace.h>
61 #endif
62
63 #include <sys/mount.h>
64 #include <sys/syscallargs.h>
65
66 int selscan __P((struct proc *, fd_mask *, fd_mask *, int, register_t *));
67 int pollscan __P((struct proc *, struct pollfd *, int, register_t *));
68
69 /*
70 * Read system call.
71 */
72 /* ARGSUSED */
73 int
74 sys_read(struct lwp *l, void *v, register_t *retval)
75 {
76 struct sys_read_args /* {
77 syscallarg(int) fd;
78 syscallarg(void *) buf;
79 syscallarg(size_t) nbyte;
80 } */ *uap = v;
81 int fd;
82 struct file *fp;
83 struct proc *p;
84 struct filedesc *fdp;
85
86 fd = SCARG(uap, fd);
87 p = l->l_proc;
88 fdp = p->p_fd;
89
90 if ((fp = fd_getfile(fdp, fd)) == NULL)
91 return (EBADF);
92
93 if ((fp->f_flag & FREAD) == 0)
94 return (EBADF);
95
96 FILE_USE(fp);
97
98 /* dofileread() will unuse the descriptor for us */
99 return (dofileread(p, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
100 &fp->f_offset, FOF_UPDATE_OFFSET, retval));
101 }
102
103 int
104 dofileread(struct proc *p, int fd, struct file *fp, void *buf, size_t nbyte,
105 off_t *offset, int flags, register_t *retval)
106 {
107 struct uio auio;
108 struct iovec aiov;
109 long cnt, error;
110 #ifdef KTRACE
111 struct iovec ktriov;
112 #endif
113 error = 0;
114
115 aiov.iov_base = (caddr_t)buf;
116 aiov.iov_len = nbyte;
117 auio.uio_iov = &aiov;
118 auio.uio_iovcnt = 1;
119 auio.uio_resid = nbyte;
120 auio.uio_rw = UIO_READ;
121 auio.uio_segflg = UIO_USERSPACE;
122 auio.uio_procp = p;
123
124 /*
125 * Reads return ssize_t because -1 is returned on error. Therefore
126 * we must restrict the length to SSIZE_MAX to avoid garbage return
127 * values.
128 */
129 if (auio.uio_resid > SSIZE_MAX) {
130 error = EINVAL;
131 goto out;
132 }
133
134 #ifdef KTRACE
135 /*
136 * if tracing, save a copy of iovec
137 */
138 if (KTRPOINT(p, KTR_GENIO))
139 ktriov = aiov;
140 #endif
141 cnt = auio.uio_resid;
142 error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred, flags);
143 if (error)
144 if (auio.uio_resid != cnt && (error == ERESTART ||
145 error == EINTR || error == EWOULDBLOCK))
146 error = 0;
147 cnt -= auio.uio_resid;
148 #ifdef KTRACE
149 if (KTRPOINT(p, KTR_GENIO) && error == 0)
150 ktrgenio(p, fd, UIO_READ, &ktriov, cnt, error);
151 #endif
152 *retval = cnt;
153 out:
154 FILE_UNUSE(fp, p);
155 return (error);
156 }
157
158 /*
159 * Scatter read system call.
160 */
161 int
162 sys_readv(struct lwp *l, void *v, register_t *retval)
163 {
164 struct sys_readv_args /* {
165 syscallarg(int) fd;
166 syscallarg(const struct iovec *) iovp;
167 syscallarg(int) iovcnt;
168 } */ *uap = v;
169 int fd;
170 struct file *fp;
171 struct proc *p;
172 struct filedesc *fdp;
173
174 fd = SCARG(uap, fd);
175 p = l->l_proc;
176 fdp = p->p_fd;
177
178 if ((fp = fd_getfile(fdp, fd)) == NULL)
179 return (EBADF);
180
181 if ((fp->f_flag & FREAD) == 0)
182 return (EBADF);
183
184 FILE_USE(fp);
185
186 /* dofilereadv() will unuse the descriptor for us */
187 return (dofilereadv(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt),
188 &fp->f_offset, FOF_UPDATE_OFFSET, retval));
189 }
190
191 int
192 dofilereadv(struct proc *p, int fd, struct file *fp, const struct iovec *iovp,
193 int iovcnt, off_t *offset, int flags, register_t *retval)
194 {
195 struct uio auio;
196 struct iovec *iov, *needfree, aiov[UIO_SMALLIOV];
197 long i, cnt, error;
198 u_int iovlen;
199 #ifdef KTRACE
200 struct iovec *ktriov;
201 #endif
202
203 error = 0;
204 #ifdef KTRACE
205 ktriov = NULL;
206 #endif
207 /* note: can't use iovlen until iovcnt is validated */
208 iovlen = iovcnt * sizeof(struct iovec);
209 if ((u_int)iovcnt > UIO_SMALLIOV) {
210 if ((u_int)iovcnt > IOV_MAX) {
211 error = EINVAL;
212 goto out;
213 }
214 iov = malloc(iovlen, M_IOV, M_WAITOK);
215 needfree = iov;
216 } else if ((u_int)iovcnt > 0) {
217 iov = aiov;
218 needfree = NULL;
219 } else {
220 error = EINVAL;
221 goto out;
222 }
223
224 auio.uio_iov = iov;
225 auio.uio_iovcnt = iovcnt;
226 auio.uio_rw = UIO_READ;
227 auio.uio_segflg = UIO_USERSPACE;
228 auio.uio_procp = p;
229 error = copyin(iovp, iov, iovlen);
230 if (error)
231 goto done;
232 auio.uio_resid = 0;
233 for (i = 0; i < iovcnt; i++) {
234 auio.uio_resid += iov->iov_len;
235 /*
236 * Reads return ssize_t because -1 is returned on error.
237 * Therefore we must restrict the length to SSIZE_MAX to
238 * avoid garbage return values.
239 */
240 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) {
241 error = EINVAL;
242 goto done;
243 }
244 iov++;
245 }
246 #ifdef KTRACE
247 /*
248 * if tracing, save a copy of iovec
249 */
250 if (KTRPOINT(p, KTR_GENIO)) {
251 ktriov = malloc(iovlen, M_TEMP, M_WAITOK);
252 memcpy((caddr_t)ktriov, (caddr_t)auio.uio_iov, iovlen);
253 }
254 #endif
255 cnt = auio.uio_resid;
256 error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred, flags);
257 if (error)
258 if (auio.uio_resid != cnt && (error == ERESTART ||
259 error == EINTR || error == EWOULDBLOCK))
260 error = 0;
261 cnt -= auio.uio_resid;
262 #ifdef KTRACE
263 if (KTRPOINT(p, KTR_GENIO))
264 if (error == 0) {
265 ktrgenio(p, fd, UIO_READ, ktriov, cnt, error);
266 free(ktriov, M_TEMP);
267 }
268 #endif
269 *retval = cnt;
270 done:
271 if (needfree)
272 free(needfree, M_IOV);
273 out:
274 FILE_UNUSE(fp, p);
275 return (error);
276 }
277
278 /*
279 * Write system call
280 */
281 int
282 sys_write(struct lwp *l, void *v, register_t *retval)
283 {
284 struct sys_write_args /* {
285 syscallarg(int) fd;
286 syscallarg(const void *) buf;
287 syscallarg(size_t) nbyte;
288 } */ *uap = v;
289 int fd;
290 struct file *fp;
291 struct proc *p;
292 struct filedesc *fdp;
293
294 fd = SCARG(uap, fd);
295 p = l->l_proc;
296 fdp = p->p_fd;
297
298 if ((fp = fd_getfile(fdp, fd)) == NULL)
299 return (EBADF);
300
301 if ((fp->f_flag & FWRITE) == 0)
302 return (EBADF);
303
304 FILE_USE(fp);
305
306 /* dofilewrite() will unuse the descriptor for us */
307 return (dofilewrite(p, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
308 &fp->f_offset, FOF_UPDATE_OFFSET, retval));
309 }
310
311 int
312 dofilewrite(struct proc *p, int fd, struct file *fp, const void *buf,
313 size_t nbyte, off_t *offset, int flags, register_t *retval)
314 {
315 struct uio auio;
316 struct iovec aiov;
317 long cnt, error;
318 #ifdef KTRACE
319 struct iovec ktriov;
320 #endif
321
322 error = 0;
323 aiov.iov_base = (caddr_t)buf; /* XXX kills const */
324 aiov.iov_len = nbyte;
325 auio.uio_iov = &aiov;
326 auio.uio_iovcnt = 1;
327 auio.uio_resid = nbyte;
328 auio.uio_rw = UIO_WRITE;
329 auio.uio_segflg = UIO_USERSPACE;
330 auio.uio_procp = p;
331
332 /*
333 * Writes return ssize_t because -1 is returned on error. Therefore
334 * we must restrict the length to SSIZE_MAX to avoid garbage return
335 * values.
336 */
337 if (auio.uio_resid > SSIZE_MAX) {
338 error = EINVAL;
339 goto out;
340 }
341
342 #ifdef KTRACE
343 /*
344 * if tracing, save a copy of iovec
345 */
346 if (KTRPOINT(p, KTR_GENIO))
347 ktriov = aiov;
348 #endif
349 cnt = auio.uio_resid;
350 error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred, flags);
351 if (error) {
352 if (auio.uio_resid != cnt && (error == ERESTART ||
353 error == EINTR || error == EWOULDBLOCK))
354 error = 0;
355 if (error == EPIPE)
356 psignal(p, SIGPIPE);
357 }
358 cnt -= auio.uio_resid;
359 #ifdef KTRACE
360 if (KTRPOINT(p, KTR_GENIO) && error == 0)
361 ktrgenio(p, fd, UIO_WRITE, &ktriov, cnt, error);
362 #endif
363 *retval = cnt;
364 out:
365 FILE_UNUSE(fp, p);
366 return (error);
367 }
368
369 /*
370 * Gather write system call
371 */
372 int
373 sys_writev(struct lwp *l, void *v, register_t *retval)
374 {
375 struct sys_writev_args /* {
376 syscallarg(int) fd;
377 syscallarg(const struct iovec *) iovp;
378 syscallarg(int) iovcnt;
379 } */ *uap = v;
380 int fd;
381 struct file *fp;
382 struct proc *p;
383 struct filedesc *fdp;
384
385 fd = SCARG(uap, fd);
386 p = l->l_proc;
387 fdp = p->p_fd;
388
389 if ((fp = fd_getfile(fdp, fd)) == NULL)
390 return (EBADF);
391
392 if ((fp->f_flag & FWRITE) == 0)
393 return (EBADF);
394
395 FILE_USE(fp);
396
397 /* dofilewritev() will unuse the descriptor for us */
398 return (dofilewritev(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt),
399 &fp->f_offset, FOF_UPDATE_OFFSET, retval));
400 }
401
402 int
403 dofilewritev(struct proc *p, int fd, struct file *fp, const struct iovec *iovp,
404 int iovcnt, off_t *offset, int flags, register_t *retval)
405 {
406 struct uio auio;
407 struct iovec *iov, *needfree, aiov[UIO_SMALLIOV];
408 long i, cnt, error;
409 u_int iovlen;
410 #ifdef KTRACE
411 struct iovec *ktriov;
412 #endif
413
414 error = 0;
415 #ifdef KTRACE
416 ktriov = NULL;
417 #endif
418 /* note: can't use iovlen until iovcnt is validated */
419 iovlen = iovcnt * sizeof(struct iovec);
420 if ((u_int)iovcnt > UIO_SMALLIOV) {
421 if ((u_int)iovcnt > IOV_MAX)
422 return (EINVAL);
423 iov = malloc(iovlen, M_IOV, M_WAITOK);
424 needfree = iov;
425 } else if ((u_int)iovcnt > 0) {
426 iov = aiov;
427 needfree = NULL;
428 } else {
429 error = EINVAL;
430 goto out;
431 }
432
433 auio.uio_iov = iov;
434 auio.uio_iovcnt = iovcnt;
435 auio.uio_rw = UIO_WRITE;
436 auio.uio_segflg = UIO_USERSPACE;
437 auio.uio_procp = p;
438 error = copyin(iovp, iov, iovlen);
439 if (error)
440 goto done;
441 auio.uio_resid = 0;
442 for (i = 0; i < iovcnt; i++) {
443 auio.uio_resid += iov->iov_len;
444 /*
445 * Writes return ssize_t because -1 is returned on error.
446 * Therefore we must restrict the length to SSIZE_MAX to
447 * avoid garbage return values.
448 */
449 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) {
450 error = EINVAL;
451 goto done;
452 }
453 iov++;
454 }
455 #ifdef KTRACE
456 /*
457 * if tracing, save a copy of iovec
458 */
459 if (KTRPOINT(p, KTR_GENIO)) {
460 ktriov = malloc(iovlen, M_TEMP, M_WAITOK);
461 memcpy((caddr_t)ktriov, (caddr_t)auio.uio_iov, iovlen);
462 }
463 #endif
464 cnt = auio.uio_resid;
465 error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred, flags);
466 if (error) {
467 if (auio.uio_resid != cnt && (error == ERESTART ||
468 error == EINTR || error == EWOULDBLOCK))
469 error = 0;
470 if (error == EPIPE)
471 psignal(p, SIGPIPE);
472 }
473 cnt -= auio.uio_resid;
474 #ifdef KTRACE
475 if (KTRPOINT(p, KTR_GENIO))
476 if (error == 0) {
477 ktrgenio(p, fd, UIO_WRITE, ktriov, cnt, error);
478 free(ktriov, M_TEMP);
479 }
480 #endif
481 *retval = cnt;
482 done:
483 if (needfree)
484 free(needfree, M_IOV);
485 out:
486 FILE_UNUSE(fp, p);
487 return (error);
488 }
489
490 /*
491 * Ioctl system call
492 */
493 /* ARGSUSED */
494 int
495 sys_ioctl(struct lwp *l, void *v, register_t *retval)
496 {
497 struct sys_ioctl_args /* {
498 syscallarg(int) fd;
499 syscallarg(u_long) com;
500 syscallarg(caddr_t) data;
501 } */ *uap = v;
502 struct file *fp;
503 struct proc *p;
504 struct filedesc *fdp;
505 u_long com;
506 int error;
507 u_int size;
508 caddr_t data, memp;
509 int tmp;
510 #define STK_PARAMS 128
511 u_long stkbuf[STK_PARAMS/sizeof(u_long)];
512
513 error = 0;
514 p = l->l_proc;
515 fdp = p->p_fd;
516
517 if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL)
518 return (EBADF);
519
520 FILE_USE(fp);
521
522 if ((fp->f_flag & (FREAD | FWRITE)) == 0) {
523 error = EBADF;
524 goto out;
525 }
526
527 switch (com = SCARG(uap, com)) {
528 case FIONCLEX:
529 fdp->fd_ofileflags[SCARG(uap, fd)] &= ~UF_EXCLOSE;
530 goto out;
531
532 case FIOCLEX:
533 fdp->fd_ofileflags[SCARG(uap, fd)] |= UF_EXCLOSE;
534 goto out;
535 }
536
537 /*
538 * Interpret high order word to find amount of data to be
539 * copied to/from the user's address space.
540 */
541 size = IOCPARM_LEN(com);
542 if (size > IOCPARM_MAX) {
543 error = ENOTTY;
544 goto out;
545 }
546 memp = NULL;
547 if (size > sizeof(stkbuf)) {
548 memp = (caddr_t)malloc((u_long)size, M_IOCTLOPS, M_WAITOK);
549 data = memp;
550 } else
551 data = (caddr_t)stkbuf;
552 if (com&IOC_IN) {
553 if (size) {
554 error = copyin(SCARG(uap, data), data, size);
555 if (error) {
556 if (memp)
557 free(memp, M_IOCTLOPS);
558 goto out;
559 }
560 } else
561 *(caddr_t *)data = SCARG(uap, data);
562 } else if ((com&IOC_OUT) && size)
563 /*
564 * Zero the buffer so the user always
565 * gets back something deterministic.
566 */
567 memset(data, 0, size);
568 else if (com&IOC_VOID)
569 *(caddr_t *)data = SCARG(uap, data);
570
571 switch (com) {
572
573 case FIONBIO:
574 if ((tmp = *(int *)data) != 0)
575 fp->f_flag |= FNONBLOCK;
576 else
577 fp->f_flag &= ~FNONBLOCK;
578 error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p);
579 break;
580
581 case FIOASYNC:
582 if ((tmp = *(int *)data) != 0)
583 fp->f_flag |= FASYNC;
584 else
585 fp->f_flag &= ~FASYNC;
586 error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, (caddr_t)&tmp, p);
587 break;
588
589 case FIOSETOWN:
590 tmp = *(int *)data;
591 if (fp->f_type == DTYPE_SOCKET) {
592 ((struct socket *)fp->f_data)->so_pgid = tmp;
593 error = 0;
594 break;
595 }
596 if (tmp <= 0) {
597 tmp = -tmp;
598 } else {
599 struct proc *p1 = pfind(tmp);
600 if (p1 == 0) {
601 error = ESRCH;
602 break;
603 }
604 tmp = p1->p_pgrp->pg_id;
605 }
606 error = (*fp->f_ops->fo_ioctl)
607 (fp, TIOCSPGRP, (caddr_t)&tmp, p);
608 break;
609
610 case FIOGETOWN:
611 if (fp->f_type == DTYPE_SOCKET) {
612 error = 0;
613 *(int *)data = ((struct socket *)fp->f_data)->so_pgid;
614 break;
615 }
616 error = (*fp->f_ops->fo_ioctl)(fp, TIOCGPGRP, data, p);
617 if (error == 0)
618 *(int *)data = -*(int *)data;
619 break;
620
621 default:
622 error = (*fp->f_ops->fo_ioctl)(fp, com, data, p);
623 /*
624 * Copy any data to user, size was
625 * already set and checked above.
626 */
627 if (error == 0 && (com&IOC_OUT) && size)
628 error = copyout(data, SCARG(uap, data), size);
629 break;
630 }
631 if (memp)
632 free(memp, M_IOCTLOPS);
633 out:
634 FILE_UNUSE(fp, p);
635 return (error);
636 }
637
638 int selwait, nselcoll;
639
640 /*
641 * Select system call.
642 */
643 int
644 sys_select(struct lwp *l, void *v, register_t *retval)
645 {
646 struct sys_select_args /* {
647 syscallarg(int) nd;
648 syscallarg(fd_set *) in;
649 syscallarg(fd_set *) ou;
650 syscallarg(fd_set *) ex;
651 syscallarg(struct timeval *) tv;
652 } */ *uap = v;
653 struct proc *p;
654 caddr_t bits;
655 char smallbits[howmany(FD_SETSIZE, NFDBITS) *
656 sizeof(fd_mask) * 6];
657 struct timeval atv;
658 int s, ncoll, error, timo;
659 size_t ni;
660
661 error = 0;
662 p = l->l_proc;
663 if (SCARG(uap, nd) < 0)
664 return (EINVAL);
665 if (SCARG(uap, nd) > p->p_fd->fd_nfiles) {
666 /* forgiving; slightly wrong */
667 SCARG(uap, nd) = p->p_fd->fd_nfiles;
668 }
669 ni = howmany(SCARG(uap, nd), NFDBITS) * sizeof(fd_mask);
670 if (ni * 6 > sizeof(smallbits))
671 bits = malloc(ni * 6, M_TEMP, M_WAITOK);
672 else
673 bits = smallbits;
674
675 #define getbits(name, x) \
676 if (SCARG(uap, name)) { \
677 error = copyin(SCARG(uap, name), bits + ni * x, ni); \
678 if (error) \
679 goto done; \
680 } else \
681 memset(bits + ni * x, 0, ni);
682 getbits(in, 0);
683 getbits(ou, 1);
684 getbits(ex, 2);
685 #undef getbits
686
687 if (SCARG(uap, tv)) {
688 error = copyin(SCARG(uap, tv), (caddr_t)&atv,
689 sizeof(atv));
690 if (error)
691 goto done;
692 if (itimerfix(&atv)) {
693 error = EINVAL;
694 goto done;
695 }
696 s = splclock();
697 timeradd(&atv, &time, &atv);
698 splx(s);
699 } else
700 timo = 0;
701 retry:
702 ncoll = nselcoll;
703 l->l_flag |= L_SELECT;
704 error = selscan(p, (fd_mask *)(bits + ni * 0),
705 (fd_mask *)(bits + ni * 3), SCARG(uap, nd), retval);
706 if (error || *retval)
707 goto done;
708 if (SCARG(uap, tv)) {
709 /*
710 * We have to recalculate the timeout on every retry.
711 */
712 timo = hzto(&atv);
713 if (timo <= 0)
714 goto done;
715 }
716 s = splsched();
717 if ((l->l_flag & L_SELECT) == 0 || nselcoll != ncoll) {
718 splx(s);
719 goto retry;
720 }
721 l->l_flag &= ~L_SELECT;
722 error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "select", timo);
723 splx(s);
724 if (error == 0)
725 goto retry;
726 done:
727 l->l_flag &= ~L_SELECT;
728 /* select is not restarted after signals... */
729 if (error == ERESTART)
730 error = EINTR;
731 if (error == EWOULDBLOCK)
732 error = 0;
733 if (error == 0) {
734
735 #define putbits(name, x) \
736 if (SCARG(uap, name)) { \
737 error = copyout(bits + ni * x, SCARG(uap, name), ni); \
738 if (error) \
739 goto out; \
740 }
741 putbits(in, 3);
742 putbits(ou, 4);
743 putbits(ex, 5);
744 #undef putbits
745 }
746 out:
747 if (ni * 6 > sizeof(smallbits))
748 free(bits, M_TEMP);
749 return (error);
750 }
751
752 int
753 selscan(struct proc *p, fd_mask *ibitp, fd_mask *obitp, int nfd,
754 register_t *retval)
755 {
756 struct filedesc *fdp;
757 int msk, i, j, fd, n;
758 fd_mask ibits, obits;
759 struct file *fp;
760 static int flag[3] = { POLLRDNORM | POLLHUP | POLLERR,
761 POLLWRNORM | POLLHUP | POLLERR,
762 POLLRDBAND };
763
764 fdp = p->p_fd;
765 n = 0;
766 for (msk = 0; msk < 3; msk++) {
767 for (i = 0; i < nfd; i += NFDBITS) {
768 ibits = *ibitp++;
769 obits = 0;
770 while ((j = ffs(ibits)) && (fd = i + --j) < nfd) {
771 ibits &= ~(1 << j);
772 if ((fp = fd_getfile(fdp, fd)) == NULL)
773 return (EBADF);
774 FILE_USE(fp);
775 if ((*fp->f_ops->fo_poll)(fp, flag[msk], p)) {
776 obits |= (1 << j);
777 n++;
778 }
779 FILE_UNUSE(fp, p);
780 }
781 *obitp++ = obits;
782 }
783 }
784 *retval = n;
785 return (0);
786 }
787
788 /*
789 * Poll system call.
790 */
791 int
792 sys_poll(struct lwp *l, void *v, register_t *retval)
793 {
794 struct sys_poll_args /* {
795 syscallarg(struct pollfd *) fds;
796 syscallarg(u_int) nfds;
797 syscallarg(int) timeout;
798 } */ *uap = v;
799 struct proc *p;
800 caddr_t bits;
801 char smallbits[32 * sizeof(struct pollfd)];
802 struct timeval atv;
803 int s, ncoll, error, timo;
804 size_t ni;
805
806 error = 0;
807 p = l->l_proc;
808 if (SCARG(uap, nfds) > p->p_fd->fd_nfiles) {
809 /* forgiving; slightly wrong */
810 SCARG(uap, nfds) = p->p_fd->fd_nfiles;
811 }
812 ni = SCARG(uap, nfds) * sizeof(struct pollfd);
813 if (ni > sizeof(smallbits))
814 bits = malloc(ni, M_TEMP, M_WAITOK);
815 else
816 bits = smallbits;
817
818 error = copyin(SCARG(uap, fds), bits, ni);
819 if (error)
820 goto done;
821
822 if (SCARG(uap, timeout) != INFTIM) {
823 atv.tv_sec = SCARG(uap, timeout) / 1000;
824 atv.tv_usec = (SCARG(uap, timeout) % 1000) * 1000;
825 if (itimerfix(&atv)) {
826 error = EINVAL;
827 goto done;
828 }
829 s = splclock();
830 timeradd(&atv, &time, &atv);
831 splx(s);
832 } else
833 timo = 0;
834 retry:
835 ncoll = nselcoll;
836 l->l_flag |= L_SELECT;
837 error = pollscan(p, (struct pollfd *)bits, SCARG(uap, nfds), retval);
838 if (error || *retval)
839 goto done;
840 if (SCARG(uap, timeout) != INFTIM) {
841 /*
842 * We have to recalculate the timeout on every retry.
843 */
844 timo = hzto(&atv);
845 if (timo <= 0)
846 goto done;
847 }
848 s = splsched();
849 if ((l->l_flag & L_SELECT) == 0 || nselcoll != ncoll) {
850 splx(s);
851 goto retry;
852 }
853 l->l_flag &= ~L_SELECT;
854 error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "select", timo);
855 splx(s);
856 if (error == 0)
857 goto retry;
858 done:
859 l->l_flag &= ~L_SELECT;
860 /* poll is not restarted after signals... */
861 if (error == ERESTART)
862 error = EINTR;
863 if (error == EWOULDBLOCK)
864 error = 0;
865 if (error == 0) {
866 error = copyout(bits, SCARG(uap, fds), ni);
867 if (error)
868 goto out;
869 }
870 out:
871 if (ni > sizeof(smallbits))
872 free(bits, M_TEMP);
873 return (error);
874 }
875
876 int
877 pollscan(struct proc *p, struct pollfd *fds, int nfd, register_t *retval)
878 {
879 struct filedesc *fdp;
880 int i, n;
881 struct file *fp;
882
883 fdp = p->p_fd;
884 n = 0;
885 for (i = 0; i < nfd; i++, fds++) {
886 if ((u_int)fds->fd >= fdp->fd_nfiles) {
887 fds->revents = POLLNVAL;
888 n++;
889 } else {
890 if ((fp = fd_getfile(fdp, fds->fd)) == NULL) {
891 fds->revents = POLLNVAL;
892 n++;
893 } else {
894 FILE_USE(fp);
895 fds->revents = (*fp->f_ops->fo_poll)(fp,
896 fds->events | POLLERR | POLLHUP, p);
897 if (fds->revents != 0)
898 n++;
899 FILE_UNUSE(fp, p);
900 }
901 }
902 }
903 *retval = n;
904 return (0);
905 }
906
907 /*ARGSUSED*/
908 int
909 seltrue(dev_t dev, int events, struct proc *p)
910 {
911
912 return (events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM));
913 }
914
915 /*
916 * Record a select request.
917 */
918 void
919 selrecord(struct proc *selector, struct selinfo *sip)
920 {
921 struct lwp *l;
922 struct proc *p;
923 pid_t mypid;
924
925 mypid = selector->p_pid;
926 if (sip->si_pid == mypid)
927 return;
928 if (sip->si_pid && (p = pfind(sip->si_pid))) {
929 for (l = LIST_FIRST(&p->p_lwps); l != NULL;
930 l = LIST_NEXT(l, l_sibling)) {
931 if (l->l_wchan == (caddr_t)&selwait)
932 sip->si_flags |= SI_COLL;
933 }
934 } else
935 sip->si_pid = mypid;
936 }
937
938 /*
939 * Do a wakeup when a selectable event occurs.
940 */
941 void
942 selwakeup(sip)
943 struct selinfo *sip;
944 {
945 struct lwp *l;
946 struct proc *p;
947 int s;
948
949 if (sip->si_pid == 0)
950 return;
951 if (sip->si_flags & SI_COLL) {
952 nselcoll++;
953 sip->si_flags &= ~SI_COLL;
954 wakeup((caddr_t)&selwait);
955 }
956 p = pfind(sip->si_pid);
957 sip->si_pid = 0;
958 if (p != NULL) {
959 for (l = LIST_FIRST(&p->p_lwps); l != NULL;
960 l = LIST_NEXT(l, l_sibling)) {
961 SCHED_LOCK(s);
962 if (l->l_wchan == (caddr_t)&selwait) {
963 if (l->l_stat == LSSLEEP)
964 setrunnable(l);
965 else
966 unsleep(l);
967 } else if (l->l_flag & L_SELECT)
968 l->l_flag &= ~L_SELECT;
969 SCHED_UNLOCK(s);
970 }
971 }
972 }
973