sys_generic.c revision 1.58 1 /* $NetBSD: sys_generic.c,v 1.58 2001/10/30 13:37:21 itohy Exp $ */
2
3 /*
4 * Copyright (c) 1982, 1986, 1989, 1993
5 * The Regents of the University of California. All rights reserved.
6 * (c) UNIX System Laboratories, Inc.
7 * All or some portions of this file are derived from material licensed
8 * to the University of California by American Telephone and Telegraph
9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
10 * the permission of UNIX System Laboratories, Inc.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. All advertising materials mentioning features or use of this software
21 * must display the following acknowledgement:
22 * This product includes software developed by the University of
23 * California, Berkeley and its contributors.
24 * 4. Neither the name of the University nor the names of its contributors
25 * may be used to endorse or promote products derived from this software
26 * without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
29 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
32 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
33 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
34 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
35 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
37 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38 * SUCH DAMAGE.
39 *
40 * @(#)sys_generic.c 8.9 (Berkeley) 2/14/95
41 */
42
43 #include "opt_ktrace.h"
44
45 #include <sys/param.h>
46 #include <sys/systm.h>
47 #include <sys/filedesc.h>
48 #include <sys/ioctl.h>
49 #include <sys/file.h>
50 #include <sys/proc.h>
51 #include <sys/socketvar.h>
52 #include <sys/signalvar.h>
53 #include <sys/uio.h>
54 #include <sys/kernel.h>
55 #include <sys/stat.h>
56 #include <sys/malloc.h>
57 #include <sys/poll.h>
58 #ifdef KTRACE
59 #include <sys/ktrace.h>
60 #endif
61
62 #include <sys/mount.h>
63 #include <sys/syscallargs.h>
64
65 int selscan __P((struct proc *, fd_mask *, fd_mask *, int, register_t *));
66 int pollscan __P((struct proc *, struct pollfd *, int, register_t *));
67
68 /*
69 * Read system call.
70 */
71 /* ARGSUSED */
72 int
73 sys_read(struct proc *p, void *v, register_t *retval)
74 {
75 struct sys_read_args /* {
76 syscallarg(int) fd;
77 syscallarg(void *) buf;
78 syscallarg(size_t) nbyte;
79 } */ *uap = v;
80 int fd;
81 struct file *fp;
82 struct filedesc *fdp;
83
84 fd = SCARG(uap, fd);
85 fdp = p->p_fd;
86
87 if ((fp = fd_getfile(fdp, fd)) == NULL)
88 return (EBADF);
89
90 if ((fp->f_flag & FREAD) == 0)
91 return (EBADF);
92
93 FILE_USE(fp);
94
95 /* dofileread() will unuse the descriptor for us */
96 return (dofileread(p, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
97 &fp->f_offset, FOF_UPDATE_OFFSET, retval));
98 }
99
100 int
101 dofileread(struct proc *p, int fd, struct file *fp, void *buf, size_t nbyte,
102 off_t *offset, int flags, register_t *retval)
103 {
104 struct uio auio;
105 struct iovec aiov;
106 long cnt, error;
107 #ifdef KTRACE
108 struct iovec ktriov;
109 #endif
110 error = 0;
111
112 aiov.iov_base = (caddr_t)buf;
113 aiov.iov_len = nbyte;
114 auio.uio_iov = &aiov;
115 auio.uio_iovcnt = 1;
116 auio.uio_resid = nbyte;
117 auio.uio_rw = UIO_READ;
118 auio.uio_segflg = UIO_USERSPACE;
119 auio.uio_procp = p;
120
121 /*
122 * Reads return ssize_t because -1 is returned on error. Therefore
123 * we must restrict the length to SSIZE_MAX to avoid garbage return
124 * values.
125 */
126 if (auio.uio_resid > SSIZE_MAX) {
127 error = EINVAL;
128 goto out;
129 }
130
131 #ifdef KTRACE
132 /*
133 * if tracing, save a copy of iovec
134 */
135 if (KTRPOINT(p, KTR_GENIO))
136 ktriov = aiov;
137 #endif
138 cnt = auio.uio_resid;
139 error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred, flags);
140 if (error)
141 if (auio.uio_resid != cnt && (error == ERESTART ||
142 error == EINTR || error == EWOULDBLOCK))
143 error = 0;
144 cnt -= auio.uio_resid;
145 #ifdef KTRACE
146 if (KTRPOINT(p, KTR_GENIO) && error == 0)
147 ktrgenio(p, fd, UIO_READ, &ktriov, cnt, error);
148 #endif
149 *retval = cnt;
150 out:
151 FILE_UNUSE(fp, p);
152 return (error);
153 }
154
155 /*
156 * Scatter read system call.
157 */
158 int
159 sys_readv(struct proc *p, void *v, register_t *retval)
160 {
161 struct sys_readv_args /* {
162 syscallarg(int) fd;
163 syscallarg(const struct iovec *) iovp;
164 syscallarg(int) iovcnt;
165 } */ *uap = v;
166 int fd;
167 struct file *fp;
168 struct filedesc *fdp;
169
170 fd = SCARG(uap, fd);
171 fdp = p->p_fd;
172
173 if ((fp = fd_getfile(fdp, fd)) == NULL)
174 return (EBADF);
175
176 if ((fp->f_flag & FREAD) == 0)
177 return (EBADF);
178
179 FILE_USE(fp);
180
181 /* dofilereadv() will unuse the descriptor for us */
182 return (dofilereadv(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt),
183 &fp->f_offset, FOF_UPDATE_OFFSET, retval));
184 }
185
186 int
187 dofilereadv(struct proc *p, int fd, struct file *fp, const struct iovec *iovp,
188 int iovcnt, off_t *offset, int flags, register_t *retval)
189 {
190 struct uio auio;
191 struct iovec *iov, *needfree, aiov[UIO_SMALLIOV];
192 long i, cnt, error;
193 u_int iovlen;
194 #ifdef KTRACE
195 struct iovec *ktriov;
196 #endif
197
198 error = 0;
199 #ifdef KTRACE
200 ktriov = NULL;
201 #endif
202 /* note: can't use iovlen until iovcnt is validated */
203 iovlen = iovcnt * sizeof(struct iovec);
204 if ((u_int)iovcnt > UIO_SMALLIOV) {
205 if ((u_int)iovcnt > IOV_MAX) {
206 error = EINVAL;
207 goto out;
208 }
209 iov = malloc(iovlen, M_IOV, M_WAITOK);
210 needfree = iov;
211 } else if ((u_int)iovcnt > 0) {
212 iov = aiov;
213 needfree = NULL;
214 } else {
215 error = EINVAL;
216 goto out;
217 }
218
219 auio.uio_iov = iov;
220 auio.uio_iovcnt = iovcnt;
221 auio.uio_rw = UIO_READ;
222 auio.uio_segflg = UIO_USERSPACE;
223 auio.uio_procp = p;
224 error = copyin(iovp, iov, iovlen);
225 if (error)
226 goto done;
227 auio.uio_resid = 0;
228 for (i = 0; i < iovcnt; i++) {
229 auio.uio_resid += iov->iov_len;
230 /*
231 * Reads return ssize_t because -1 is returned on error.
232 * Therefore we must restrict the length to SSIZE_MAX to
233 * avoid garbage return values.
234 */
235 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) {
236 error = EINVAL;
237 goto done;
238 }
239 iov++;
240 }
241 #ifdef KTRACE
242 /*
243 * if tracing, save a copy of iovec
244 */
245 if (KTRPOINT(p, KTR_GENIO)) {
246 ktriov = malloc(iovlen, M_TEMP, M_WAITOK);
247 memcpy((caddr_t)ktriov, (caddr_t)auio.uio_iov, iovlen);
248 }
249 #endif
250 cnt = auio.uio_resid;
251 error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred, flags);
252 if (error)
253 if (auio.uio_resid != cnt && (error == ERESTART ||
254 error == EINTR || error == EWOULDBLOCK))
255 error = 0;
256 cnt -= auio.uio_resid;
257 #ifdef KTRACE
258 if (ktriov != NULL) {
259 if (error == 0)
260 ktrgenio(p, fd, UIO_READ, ktriov, cnt, error);
261 free(ktriov, M_TEMP);
262 }
263 #endif
264 *retval = cnt;
265 done:
266 if (needfree)
267 free(needfree, M_IOV);
268 out:
269 FILE_UNUSE(fp, p);
270 return (error);
271 }
272
273 /*
274 * Write system call
275 */
276 int
277 sys_write(struct proc *p, void *v, register_t *retval)
278 {
279 struct sys_write_args /* {
280 syscallarg(int) fd;
281 syscallarg(const void *) buf;
282 syscallarg(size_t) nbyte;
283 } */ *uap = v;
284 int fd;
285 struct file *fp;
286 struct filedesc *fdp;
287
288 fd = SCARG(uap, fd);
289 fdp = p->p_fd;
290
291 if ((fp = fd_getfile(fdp, fd)) == NULL)
292 return (EBADF);
293
294 if ((fp->f_flag & FWRITE) == 0)
295 return (EBADF);
296
297 FILE_USE(fp);
298
299 /* dofilewrite() will unuse the descriptor for us */
300 return (dofilewrite(p, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
301 &fp->f_offset, FOF_UPDATE_OFFSET, retval));
302 }
303
304 int
305 dofilewrite(struct proc *p, int fd, struct file *fp, const void *buf,
306 size_t nbyte, off_t *offset, int flags, register_t *retval)
307 {
308 struct uio auio;
309 struct iovec aiov;
310 long cnt, error;
311 #ifdef KTRACE
312 struct iovec ktriov;
313 #endif
314
315 error = 0;
316 aiov.iov_base = (caddr_t)buf; /* XXX kills const */
317 aiov.iov_len = nbyte;
318 auio.uio_iov = &aiov;
319 auio.uio_iovcnt = 1;
320 auio.uio_resid = nbyte;
321 auio.uio_rw = UIO_WRITE;
322 auio.uio_segflg = UIO_USERSPACE;
323 auio.uio_procp = p;
324
325 /*
326 * Writes return ssize_t because -1 is returned on error. Therefore
327 * we must restrict the length to SSIZE_MAX to avoid garbage return
328 * values.
329 */
330 if (auio.uio_resid > SSIZE_MAX) {
331 error = EINVAL;
332 goto out;
333 }
334
335 #ifdef KTRACE
336 /*
337 * if tracing, save a copy of iovec
338 */
339 if (KTRPOINT(p, KTR_GENIO))
340 ktriov = aiov;
341 #endif
342 cnt = auio.uio_resid;
343 error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred, flags);
344 if (error) {
345 if (auio.uio_resid != cnt && (error == ERESTART ||
346 error == EINTR || error == EWOULDBLOCK))
347 error = 0;
348 if (error == EPIPE)
349 psignal(p, SIGPIPE);
350 }
351 cnt -= auio.uio_resid;
352 #ifdef KTRACE
353 if (KTRPOINT(p, KTR_GENIO) && error == 0)
354 ktrgenio(p, fd, UIO_WRITE, &ktriov, cnt, error);
355 #endif
356 *retval = cnt;
357 out:
358 FILE_UNUSE(fp, p);
359 return (error);
360 }
361
362 /*
363 * Gather write system call
364 */
365 int
366 sys_writev(struct proc *p, void *v, register_t *retval)
367 {
368 struct sys_writev_args /* {
369 syscallarg(int) fd;
370 syscallarg(const struct iovec *) iovp;
371 syscallarg(int) iovcnt;
372 } */ *uap = v;
373 int fd;
374 struct file *fp;
375 struct filedesc *fdp;
376
377 fd = SCARG(uap, fd);
378 fdp = p->p_fd;
379
380 if ((fp = fd_getfile(fdp, fd)) == NULL)
381 return (EBADF);
382
383 if ((fp->f_flag & FWRITE) == 0)
384 return (EBADF);
385
386 FILE_USE(fp);
387
388 /* dofilewritev() will unuse the descriptor for us */
389 return (dofilewritev(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt),
390 &fp->f_offset, FOF_UPDATE_OFFSET, retval));
391 }
392
393 int
394 dofilewritev(struct proc *p, int fd, struct file *fp, const struct iovec *iovp,
395 int iovcnt, off_t *offset, int flags, register_t *retval)
396 {
397 struct uio auio;
398 struct iovec *iov, *needfree, aiov[UIO_SMALLIOV];
399 long i, cnt, error;
400 u_int iovlen;
401 #ifdef KTRACE
402 struct iovec *ktriov;
403 #endif
404
405 error = 0;
406 #ifdef KTRACE
407 ktriov = NULL;
408 #endif
409 /* note: can't use iovlen until iovcnt is validated */
410 iovlen = iovcnt * sizeof(struct iovec);
411 if ((u_int)iovcnt > UIO_SMALLIOV) {
412 if ((u_int)iovcnt > IOV_MAX)
413 return (EINVAL);
414 iov = malloc(iovlen, M_IOV, M_WAITOK);
415 needfree = iov;
416 } else if ((u_int)iovcnt > 0) {
417 iov = aiov;
418 needfree = NULL;
419 } else {
420 error = EINVAL;
421 goto out;
422 }
423
424 auio.uio_iov = iov;
425 auio.uio_iovcnt = iovcnt;
426 auio.uio_rw = UIO_WRITE;
427 auio.uio_segflg = UIO_USERSPACE;
428 auio.uio_procp = p;
429 error = copyin(iovp, iov, iovlen);
430 if (error)
431 goto done;
432 auio.uio_resid = 0;
433 for (i = 0; i < iovcnt; i++) {
434 auio.uio_resid += iov->iov_len;
435 /*
436 * Writes return ssize_t because -1 is returned on error.
437 * Therefore we must restrict the length to SSIZE_MAX to
438 * avoid garbage return values.
439 */
440 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) {
441 error = EINVAL;
442 goto done;
443 }
444 iov++;
445 }
446 #ifdef KTRACE
447 /*
448 * if tracing, save a copy of iovec
449 */
450 if (KTRPOINT(p, KTR_GENIO)) {
451 ktriov = malloc(iovlen, M_TEMP, M_WAITOK);
452 memcpy((caddr_t)ktriov, (caddr_t)auio.uio_iov, iovlen);
453 }
454 #endif
455 cnt = auio.uio_resid;
456 error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred, flags);
457 if (error) {
458 if (auio.uio_resid != cnt && (error == ERESTART ||
459 error == EINTR || error == EWOULDBLOCK))
460 error = 0;
461 if (error == EPIPE)
462 psignal(p, SIGPIPE);
463 }
464 cnt -= auio.uio_resid;
465 #ifdef KTRACE
466 if (KTRPOINT(p, KTR_GENIO))
467 if (error == 0) {
468 ktrgenio(p, fd, UIO_WRITE, ktriov, cnt, error);
469 free(ktriov, M_TEMP);
470 }
471 #endif
472 *retval = cnt;
473 done:
474 if (needfree)
475 free(needfree, M_IOV);
476 out:
477 FILE_UNUSE(fp, p);
478 return (error);
479 }
480
481 /*
482 * Ioctl system call
483 */
484 /* ARGSUSED */
485 int
486 sys_ioctl(struct proc *p, void *v, register_t *retval)
487 {
488 struct sys_ioctl_args /* {
489 syscallarg(int) fd;
490 syscallarg(u_long) com;
491 syscallarg(caddr_t) data;
492 } */ *uap = v;
493 struct file *fp;
494 struct filedesc *fdp;
495 u_long com;
496 int error;
497 u_int size;
498 caddr_t data, memp;
499 int tmp;
500 #define STK_PARAMS 128
501 u_long stkbuf[STK_PARAMS/sizeof(u_long)];
502
503 error = 0;
504 fdp = p->p_fd;
505
506 if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL)
507 return (EBADF);
508
509 FILE_USE(fp);
510
511 if ((fp->f_flag & (FREAD | FWRITE)) == 0) {
512 error = EBADF;
513 goto out;
514 }
515
516 switch (com = SCARG(uap, com)) {
517 case FIONCLEX:
518 fdp->fd_ofileflags[SCARG(uap, fd)] &= ~UF_EXCLOSE;
519 goto out;
520
521 case FIOCLEX:
522 fdp->fd_ofileflags[SCARG(uap, fd)] |= UF_EXCLOSE;
523 goto out;
524 }
525
526 /*
527 * Interpret high order word to find amount of data to be
528 * copied to/from the user's address space.
529 */
530 size = IOCPARM_LEN(com);
531 if (size > IOCPARM_MAX) {
532 error = ENOTTY;
533 goto out;
534 }
535 memp = NULL;
536 if (size > sizeof(stkbuf)) {
537 memp = (caddr_t)malloc((u_long)size, M_IOCTLOPS, M_WAITOK);
538 data = memp;
539 } else
540 data = (caddr_t)stkbuf;
541 if (com&IOC_IN) {
542 if (size) {
543 error = copyin(SCARG(uap, data), data, size);
544 if (error) {
545 if (memp)
546 free(memp, M_IOCTLOPS);
547 goto out;
548 }
549 } else
550 *(caddr_t *)data = SCARG(uap, data);
551 } else if ((com&IOC_OUT) && size)
552 /*
553 * Zero the buffer so the user always
554 * gets back something deterministic.
555 */
556 memset(data, 0, size);
557 else if (com&IOC_VOID)
558 *(caddr_t *)data = SCARG(uap, data);
559
560 switch (com) {
561
562 case FIONBIO:
563 if ((tmp = *(int *)data) != 0)
564 fp->f_flag |= FNONBLOCK;
565 else
566 fp->f_flag &= ~FNONBLOCK;
567 error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p);
568 break;
569
570 case FIOASYNC:
571 if ((tmp = *(int *)data) != 0)
572 fp->f_flag |= FASYNC;
573 else
574 fp->f_flag &= ~FASYNC;
575 error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, (caddr_t)&tmp, p);
576 break;
577
578 case FIOSETOWN:
579 tmp = *(int *)data;
580 if (fp->f_type == DTYPE_SOCKET) {
581 ((struct socket *)fp->f_data)->so_pgid = tmp;
582 error = 0;
583 break;
584 }
585 if (tmp <= 0) {
586 tmp = -tmp;
587 } else {
588 struct proc *p1 = pfind(tmp);
589 if (p1 == 0) {
590 error = ESRCH;
591 break;
592 }
593 tmp = p1->p_pgrp->pg_id;
594 }
595 error = (*fp->f_ops->fo_ioctl)
596 (fp, TIOCSPGRP, (caddr_t)&tmp, p);
597 break;
598
599 case FIOGETOWN:
600 if (fp->f_type == DTYPE_SOCKET) {
601 error = 0;
602 *(int *)data = ((struct socket *)fp->f_data)->so_pgid;
603 break;
604 }
605 error = (*fp->f_ops->fo_ioctl)(fp, TIOCGPGRP, data, p);
606 if (error == 0)
607 *(int *)data = -*(int *)data;
608 break;
609
610 default:
611 error = (*fp->f_ops->fo_ioctl)(fp, com, data, p);
612 /*
613 * Copy any data to user, size was
614 * already set and checked above.
615 */
616 if (error == 0 && (com&IOC_OUT) && size)
617 error = copyout(data, SCARG(uap, data), size);
618 break;
619 }
620 if (memp)
621 free(memp, M_IOCTLOPS);
622 out:
623 FILE_UNUSE(fp, p);
624 return (error);
625 }
626
627 int selwait, nselcoll;
628
629 /*
630 * Select system call.
631 */
632 int
633 sys_select(struct proc *p, void *v, register_t *retval)
634 {
635 struct sys_select_args /* {
636 syscallarg(int) nd;
637 syscallarg(fd_set *) in;
638 syscallarg(fd_set *) ou;
639 syscallarg(fd_set *) ex;
640 syscallarg(struct timeval *) tv;
641 } */ *uap = v;
642 caddr_t bits;
643 char smallbits[howmany(FD_SETSIZE, NFDBITS) *
644 sizeof(fd_mask) * 6];
645 struct timeval atv;
646 int s, ncoll, error, timo;
647 size_t ni;
648
649 error = 0;
650 if (SCARG(uap, nd) < 0)
651 return (EINVAL);
652 if (SCARG(uap, nd) > p->p_fd->fd_nfiles) {
653 /* forgiving; slightly wrong */
654 SCARG(uap, nd) = p->p_fd->fd_nfiles;
655 }
656 ni = howmany(SCARG(uap, nd), NFDBITS) * sizeof(fd_mask);
657 if (ni * 6 > sizeof(smallbits))
658 bits = malloc(ni * 6, M_TEMP, M_WAITOK);
659 else
660 bits = smallbits;
661
662 #define getbits(name, x) \
663 if (SCARG(uap, name)) { \
664 error = copyin(SCARG(uap, name), bits + ni * x, ni); \
665 if (error) \
666 goto done; \
667 } else \
668 memset(bits + ni * x, 0, ni);
669 getbits(in, 0);
670 getbits(ou, 1);
671 getbits(ex, 2);
672 #undef getbits
673
674 if (SCARG(uap, tv)) {
675 error = copyin(SCARG(uap, tv), (caddr_t)&atv,
676 sizeof(atv));
677 if (error)
678 goto done;
679 if (itimerfix(&atv)) {
680 error = EINVAL;
681 goto done;
682 }
683 s = splclock();
684 timeradd(&atv, &time, &atv);
685 splx(s);
686 } else
687 timo = 0;
688 retry:
689 ncoll = nselcoll;
690 p->p_flag |= P_SELECT;
691 error = selscan(p, (fd_mask *)(bits + ni * 0),
692 (fd_mask *)(bits + ni * 3), SCARG(uap, nd), retval);
693 if (error || *retval)
694 goto done;
695 if (SCARG(uap, tv)) {
696 /*
697 * We have to recalculate the timeout on every retry.
698 */
699 timo = hzto(&atv);
700 if (timo <= 0)
701 goto done;
702 }
703 s = splsched();
704 if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) {
705 splx(s);
706 goto retry;
707 }
708 p->p_flag &= ~P_SELECT;
709 error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "select", timo);
710 splx(s);
711 if (error == 0)
712 goto retry;
713 done:
714 p->p_flag &= ~P_SELECT;
715 /* select is not restarted after signals... */
716 if (error == ERESTART)
717 error = EINTR;
718 if (error == EWOULDBLOCK)
719 error = 0;
720 if (error == 0) {
721
722 #define putbits(name, x) \
723 if (SCARG(uap, name)) { \
724 error = copyout(bits + ni * x, SCARG(uap, name), ni); \
725 if (error) \
726 goto out; \
727 }
728 putbits(in, 3);
729 putbits(ou, 4);
730 putbits(ex, 5);
731 #undef putbits
732 }
733 out:
734 if (ni * 6 > sizeof(smallbits))
735 free(bits, M_TEMP);
736 return (error);
737 }
738
739 int
740 selscan(struct proc *p, fd_mask *ibitp, fd_mask *obitp, int nfd,
741 register_t *retval)
742 {
743 struct filedesc *fdp;
744 int msk, i, j, fd, n;
745 fd_mask ibits, obits;
746 struct file *fp;
747 static int flag[3] = { POLLRDNORM | POLLHUP | POLLERR,
748 POLLWRNORM | POLLHUP | POLLERR,
749 POLLRDBAND };
750
751 fdp = p->p_fd;
752 n = 0;
753 for (msk = 0; msk < 3; msk++) {
754 for (i = 0; i < nfd; i += NFDBITS) {
755 ibits = *ibitp++;
756 obits = 0;
757 while ((j = ffs(ibits)) && (fd = i + --j) < nfd) {
758 ibits &= ~(1 << j);
759 if ((fp = fd_getfile(fdp, fd)) == NULL)
760 return (EBADF);
761 FILE_USE(fp);
762 if ((*fp->f_ops->fo_poll)(fp, flag[msk], p)) {
763 obits |= (1 << j);
764 n++;
765 }
766 FILE_UNUSE(fp, p);
767 }
768 *obitp++ = obits;
769 }
770 }
771 *retval = n;
772 return (0);
773 }
774
775 /*
776 * Poll system call.
777 */
778 int
779 sys_poll(struct proc *p, void *v, register_t *retval)
780 {
781 struct sys_poll_args /* {
782 syscallarg(struct pollfd *) fds;
783 syscallarg(u_int) nfds;
784 syscallarg(int) timeout;
785 } */ *uap = v;
786 caddr_t bits;
787 char smallbits[32 * sizeof(struct pollfd)];
788 struct timeval atv;
789 int s, ncoll, error, timo;
790 size_t ni;
791
792 error = 0;
793 if (SCARG(uap, nfds) > p->p_fd->fd_nfiles) {
794 /* forgiving; slightly wrong */
795 SCARG(uap, nfds) = p->p_fd->fd_nfiles;
796 }
797 ni = SCARG(uap, nfds) * sizeof(struct pollfd);
798 if (ni > sizeof(smallbits))
799 bits = malloc(ni, M_TEMP, M_WAITOK);
800 else
801 bits = smallbits;
802
803 error = copyin(SCARG(uap, fds), bits, ni);
804 if (error)
805 goto done;
806
807 if (SCARG(uap, timeout) != INFTIM) {
808 atv.tv_sec = SCARG(uap, timeout) / 1000;
809 atv.tv_usec = (SCARG(uap, timeout) % 1000) * 1000;
810 if (itimerfix(&atv)) {
811 error = EINVAL;
812 goto done;
813 }
814 s = splclock();
815 timeradd(&atv, &time, &atv);
816 splx(s);
817 } else
818 timo = 0;
819 retry:
820 ncoll = nselcoll;
821 p->p_flag |= P_SELECT;
822 error = pollscan(p, (struct pollfd *)bits, SCARG(uap, nfds), retval);
823 if (error || *retval)
824 goto done;
825 if (SCARG(uap, timeout) != INFTIM) {
826 /*
827 * We have to recalculate the timeout on every retry.
828 */
829 timo = hzto(&atv);
830 if (timo <= 0)
831 goto done;
832 }
833 s = splsched();
834 if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) {
835 splx(s);
836 goto retry;
837 }
838 p->p_flag &= ~P_SELECT;
839 error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "select", timo);
840 splx(s);
841 if (error == 0)
842 goto retry;
843 done:
844 p->p_flag &= ~P_SELECT;
845 /* poll is not restarted after signals... */
846 if (error == ERESTART)
847 error = EINTR;
848 if (error == EWOULDBLOCK)
849 error = 0;
850 if (error == 0) {
851 error = copyout(bits, SCARG(uap, fds), ni);
852 if (error)
853 goto out;
854 }
855 out:
856 if (ni > sizeof(smallbits))
857 free(bits, M_TEMP);
858 return (error);
859 }
860
861 int
862 pollscan(struct proc *p, struct pollfd *fds, int nfd, register_t *retval)
863 {
864 struct filedesc *fdp;
865 int i, n;
866 struct file *fp;
867
868 fdp = p->p_fd;
869 n = 0;
870 for (i = 0; i < nfd; i++, fds++) {
871 if ((u_int)fds->fd >= fdp->fd_nfiles) {
872 fds->revents = POLLNVAL;
873 n++;
874 } else {
875 if ((fp = fd_getfile(fdp, fds->fd)) == NULL) {
876 fds->revents = POLLNVAL;
877 n++;
878 } else {
879 FILE_USE(fp);
880 fds->revents = (*fp->f_ops->fo_poll)(fp,
881 fds->events | POLLERR | POLLHUP, p);
882 if (fds->revents != 0)
883 n++;
884 FILE_UNUSE(fp, p);
885 }
886 }
887 }
888 *retval = n;
889 return (0);
890 }
891
892 /*ARGSUSED*/
893 int
894 seltrue(dev_t dev, int events, struct proc *p)
895 {
896
897 return (events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM));
898 }
899
900 /*
901 * Record a select request.
902 */
903 void
904 selrecord(struct proc *selector, struct selinfo *sip)
905 {
906 struct proc *p;
907 pid_t mypid;
908
909 mypid = selector->p_pid;
910 if (sip->si_pid == mypid)
911 return;
912 if (sip->si_pid && (p = pfind(sip->si_pid)) &&
913 p->p_wchan == (caddr_t)&selwait)
914 sip->si_flags |= SI_COLL;
915 else {
916 sip->si_flags &= ~SI_COLL;
917 sip->si_pid = mypid;
918 }
919 }
920
921 /*
922 * Do a wakeup when a selectable event occurs.
923 */
924 void
925 selwakeup(sip)
926 struct selinfo *sip;
927 {
928 struct proc *p;
929 int s;
930
931 if (sip->si_pid == 0)
932 return;
933 if (sip->si_flags & SI_COLL) {
934 nselcoll++;
935 sip->si_flags &= ~SI_COLL;
936 wakeup((caddr_t)&selwait);
937 }
938 p = pfind(sip->si_pid);
939 sip->si_pid = 0;
940 if (p != NULL) {
941 SCHED_LOCK(s);
942 if (p->p_wchan == (caddr_t)&selwait) {
943 if (p->p_stat == SSLEEP)
944 setrunnable(p);
945 else
946 unsleep(p);
947 } else if (p->p_flag & P_SELECT)
948 p->p_flag &= ~P_SELECT;
949 SCHED_UNLOCK(s);
950 }
951 }
952