sys_generic.c revision 1.84.6.1 1 /* $NetBSD: sys_generic.c,v 1.84.6.1 2006/02/04 14:30:17 simonb Exp $ */
2
3 /*
4 * Copyright (c) 1982, 1986, 1989, 1993
5 * The Regents of the University of California. All rights reserved.
6 * (c) UNIX System Laboratories, Inc.
7 * All or some portions of this file are derived from material licensed
8 * to the University of California by American Telephone and Telegraph
9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
10 * the permission of UNIX System Laboratories, Inc.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 *
36 * @(#)sys_generic.c 8.9 (Berkeley) 2/14/95
37 */
38
39 #include <sys/cdefs.h>
40 __KERNEL_RCSID(0, "$NetBSD: sys_generic.c,v 1.84.6.1 2006/02/04 14:30:17 simonb Exp $");
41
42 #include "opt_ktrace.h"
43
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #include <sys/filedesc.h>
47 #include <sys/ioctl.h>
48 #include <sys/file.h>
49 #include <sys/proc.h>
50 #include <sys/socketvar.h>
51 #include <sys/signalvar.h>
52 #include <sys/uio.h>
53 #include <sys/kernel.h>
54 #include <sys/stat.h>
55 #include <sys/malloc.h>
56 #include <sys/poll.h>
57 #ifdef KTRACE
58 #include <sys/ktrace.h>
59 #endif
60
61 #include <sys/mount.h>
62 #include <sys/sa.h>
63 #include <sys/syscallargs.h>
64
65 int selscan(struct lwp *, fd_mask *, fd_mask *, int, register_t *);
66 int pollscan(struct lwp *, struct pollfd *, int, register_t *);
67
68
69 /*
70 * Read system call.
71 */
72 /* ARGSUSED */
73 int
74 sys_read(struct lwp *l, void *v, register_t *retval)
75 {
76 struct sys_read_args /* {
77 syscallarg(int) fd;
78 syscallarg(void *) buf;
79 syscallarg(size_t) nbyte;
80 } */ *uap = v;
81 int fd;
82 struct file *fp;
83 struct proc *p;
84 struct filedesc *fdp;
85
86 fd = SCARG(uap, fd);
87 p = l->l_proc;
88 fdp = p->p_fd;
89
90 if ((fp = fd_getfile(fdp, fd)) == NULL)
91 return (EBADF);
92
93 if ((fp->f_flag & FREAD) == 0) {
94 simple_unlock(&fp->f_slock);
95 return (EBADF);
96 }
97
98 FILE_USE(fp);
99
100 /* dofileread() will unuse the descriptor for us */
101 return (dofileread(l, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
102 &fp->f_offset, FOF_UPDATE_OFFSET, retval));
103 }
104
105 int
106 dofileread(struct lwp *l, int fd, struct file *fp, void *buf, size_t nbyte,
107 off_t *offset, int flags, register_t *retval)
108 {
109 struct iovec aiov;
110 struct uio auio;
111 struct proc *p;
112 size_t cnt;
113 int error;
114 #ifdef KTRACE
115 struct iovec ktriov = {0};
116 #endif
117 p = l->l_proc;
118 error = 0;
119
120 aiov.iov_base = (caddr_t)buf;
121 aiov.iov_len = nbyte;
122 auio.uio_iov = &aiov;
123 auio.uio_iovcnt = 1;
124 auio.uio_resid = nbyte;
125 auio.uio_rw = UIO_READ;
126 auio.uio_segflg = UIO_USERSPACE;
127 auio.uio_lwp = l;
128
129 /*
130 * Reads return ssize_t because -1 is returned on error. Therefore
131 * we must restrict the length to SSIZE_MAX to avoid garbage return
132 * values.
133 */
134 if (auio.uio_resid > SSIZE_MAX) {
135 error = EINVAL;
136 goto out;
137 }
138
139 #ifdef KTRACE
140 /*
141 * if tracing, save a copy of iovec
142 */
143 if (KTRPOINT(p, KTR_GENIO))
144 ktriov = aiov;
145 #endif
146 cnt = auio.uio_resid;
147 error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred, flags);
148 if (error)
149 if (auio.uio_resid != cnt && (error == ERESTART ||
150 error == EINTR || error == EWOULDBLOCK))
151 error = 0;
152 cnt -= auio.uio_resid;
153 #ifdef KTRACE
154 if (KTRPOINT(p, KTR_GENIO) && error == 0)
155 ktrgenio(l, fd, UIO_READ, &ktriov, cnt, error);
156 #endif
157 *retval = cnt;
158 out:
159 FILE_UNUSE(fp, l);
160 return (error);
161 }
162
163 /*
164 * Scatter read system call.
165 */
166 int
167 sys_readv(struct lwp *l, void *v, register_t *retval)
168 {
169 struct sys_readv_args /* {
170 syscallarg(int) fd;
171 syscallarg(const struct iovec *) iovp;
172 syscallarg(int) iovcnt;
173 } */ *uap = v;
174 struct filedesc *fdp;
175 struct file *fp;
176 struct proc *p;
177 int fd;
178
179 fd = SCARG(uap, fd);
180 p = l->l_proc;
181 fdp = p->p_fd;
182
183 if ((fp = fd_getfile(fdp, fd)) == NULL)
184 return (EBADF);
185
186 if ((fp->f_flag & FREAD) == 0) {
187 simple_unlock(&fp->f_slock);
188 return (EBADF);
189 }
190
191 FILE_USE(fp);
192
193 /* dofilereadv() will unuse the descriptor for us */
194 return (dofilereadv(l, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt),
195 &fp->f_offset, FOF_UPDATE_OFFSET, retval));
196 }
197
198 int
199 dofilereadv(struct lwp *l, int fd, struct file *fp, const struct iovec *iovp,
200 int iovcnt, off_t *offset, int flags, register_t *retval)
201 {
202 struct proc *p;
203 struct uio auio;
204 struct iovec *iov, *needfree, aiov[UIO_SMALLIOV];
205 int i, error;
206 size_t cnt;
207 u_int iovlen;
208 #ifdef KTRACE
209 struct iovec *ktriov;
210 #endif
211
212 p = l->l_proc;
213 error = 0;
214 #ifdef KTRACE
215 ktriov = NULL;
216 #endif
217 /* note: can't use iovlen until iovcnt is validated */
218 iovlen = iovcnt * sizeof(struct iovec);
219 if ((u_int)iovcnt > UIO_SMALLIOV) {
220 if ((u_int)iovcnt > IOV_MAX) {
221 error = EINVAL;
222 goto out;
223 }
224 iov = malloc(iovlen, M_IOV, M_WAITOK);
225 needfree = iov;
226 } else if ((u_int)iovcnt > 0) {
227 iov = aiov;
228 needfree = NULL;
229 } else {
230 error = EINVAL;
231 goto out;
232 }
233
234 auio.uio_iov = iov;
235 auio.uio_iovcnt = iovcnt;
236 auio.uio_rw = UIO_READ;
237 auio.uio_segflg = UIO_USERSPACE;
238 auio.uio_lwp = l;
239 error = copyin(iovp, iov, iovlen);
240 if (error)
241 goto done;
242 auio.uio_resid = 0;
243 for (i = 0; i < iovcnt; i++) {
244 auio.uio_resid += iov->iov_len;
245 /*
246 * Reads return ssize_t because -1 is returned on error.
247 * Therefore we must restrict the length to SSIZE_MAX to
248 * avoid garbage return values.
249 */
250 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) {
251 error = EINVAL;
252 goto done;
253 }
254 iov++;
255 }
256 #ifdef KTRACE
257 /*
258 * if tracing, save a copy of iovec
259 */
260 if (KTRPOINT(p, KTR_GENIO)) {
261 ktriov = malloc(iovlen, M_TEMP, M_WAITOK);
262 memcpy((caddr_t)ktriov, (caddr_t)auio.uio_iov, iovlen);
263 }
264 #endif
265 cnt = auio.uio_resid;
266 error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred, flags);
267 if (error)
268 if (auio.uio_resid != cnt && (error == ERESTART ||
269 error == EINTR || error == EWOULDBLOCK))
270 error = 0;
271 cnt -= auio.uio_resid;
272 #ifdef KTRACE
273 if (ktriov != NULL) {
274 if (KTRPOINT(p, KTR_GENIO) && (error == 0))
275 ktrgenio(l, fd, UIO_READ, ktriov, cnt, error);
276 free(ktriov, M_TEMP);
277 }
278 #endif
279 *retval = cnt;
280 done:
281 if (needfree)
282 free(needfree, M_IOV);
283 out:
284 FILE_UNUSE(fp, l);
285 return (error);
286 }
287
288 /*
289 * Write system call
290 */
291 int
292 sys_write(struct lwp *l, void *v, register_t *retval)
293 {
294 struct sys_write_args /* {
295 syscallarg(int) fd;
296 syscallarg(const void *) buf;
297 syscallarg(size_t) nbyte;
298 } */ *uap = v;
299 int fd;
300 struct file *fp;
301 struct proc *p;
302 struct filedesc *fdp;
303
304 fd = SCARG(uap, fd);
305 p = l->l_proc;
306 fdp = p->p_fd;
307
308 if ((fp = fd_getfile(fdp, fd)) == NULL)
309 return (EBADF);
310
311 if ((fp->f_flag & FWRITE) == 0) {
312 simple_unlock(&fp->f_slock);
313 return (EBADF);
314 }
315
316 FILE_USE(fp);
317
318 /* dofilewrite() will unuse the descriptor for us */
319 return (dofilewrite(l, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
320 &fp->f_offset, FOF_UPDATE_OFFSET, retval));
321 }
322
323 int
324 dofilewrite(struct lwp *l, int fd, struct file *fp, const void *buf,
325 size_t nbyte, off_t *offset, int flags, register_t *retval)
326 {
327 struct iovec aiov;
328 struct uio auio;
329 struct proc *p;
330 size_t cnt;
331 int error;
332 #ifdef KTRACE
333 struct iovec ktriov = {0};
334 #endif
335
336 p = l->l_proc;
337 error = 0;
338 aiov.iov_base = __UNCONST(buf); /* XXXUNCONST kills const */
339 aiov.iov_len = nbyte;
340 auio.uio_iov = &aiov;
341 auio.uio_iovcnt = 1;
342 auio.uio_resid = nbyte;
343 auio.uio_rw = UIO_WRITE;
344 auio.uio_segflg = UIO_USERSPACE;
345 auio.uio_lwp = l;
346
347 /*
348 * Writes return ssize_t because -1 is returned on error. Therefore
349 * we must restrict the length to SSIZE_MAX to avoid garbage return
350 * values.
351 */
352 if (auio.uio_resid > SSIZE_MAX) {
353 error = EINVAL;
354 goto out;
355 }
356
357 #ifdef KTRACE
358 /*
359 * if tracing, save a copy of iovec
360 */
361 if (KTRPOINT(p, KTR_GENIO))
362 ktriov = aiov;
363 #endif
364 cnt = auio.uio_resid;
365 error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred, flags);
366 if (error) {
367 if (auio.uio_resid != cnt && (error == ERESTART ||
368 error == EINTR || error == EWOULDBLOCK))
369 error = 0;
370 if (error == EPIPE)
371 psignal(p, SIGPIPE);
372 }
373 cnt -= auio.uio_resid;
374 #ifdef KTRACE
375 if (KTRPOINT(p, KTR_GENIO) && error == 0)
376 ktrgenio(l, fd, UIO_WRITE, &ktriov, cnt, error);
377 #endif
378 *retval = cnt;
379 out:
380 FILE_UNUSE(fp, l);
381 return (error);
382 }
383
384 /*
385 * Gather write system call
386 */
387 int
388 sys_writev(struct lwp *l, void *v, register_t *retval)
389 {
390 struct sys_writev_args /* {
391 syscallarg(int) fd;
392 syscallarg(const struct iovec *) iovp;
393 syscallarg(int) iovcnt;
394 } */ *uap = v;
395 int fd;
396 struct file *fp;
397 struct proc *p;
398 struct filedesc *fdp;
399
400 fd = SCARG(uap, fd);
401 p = l->l_proc;
402 fdp = p->p_fd;
403
404 if ((fp = fd_getfile(fdp, fd)) == NULL)
405 return (EBADF);
406
407 if ((fp->f_flag & FWRITE) == 0) {
408 simple_unlock(&fp->f_slock);
409 return (EBADF);
410 }
411
412 FILE_USE(fp);
413
414 /* dofilewritev() will unuse the descriptor for us */
415 return (dofilewritev(l, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt),
416 &fp->f_offset, FOF_UPDATE_OFFSET, retval));
417 }
418
419 int
420 dofilewritev(struct lwp *l, int fd, struct file *fp, const struct iovec *iovp,
421 int iovcnt, off_t *offset, int flags, register_t *retval)
422 {
423 struct proc *p;
424 struct uio auio;
425 struct iovec *iov, *needfree, aiov[UIO_SMALLIOV];
426 int i, error;
427 size_t cnt;
428 u_int iovlen;
429 #ifdef KTRACE
430 struct iovec *ktriov;
431 #endif
432
433 p = l->l_proc;
434 error = 0;
435 #ifdef KTRACE
436 ktriov = NULL;
437 #endif
438 /* note: can't use iovlen until iovcnt is validated */
439 iovlen = iovcnt * sizeof(struct iovec);
440 if ((u_int)iovcnt > UIO_SMALLIOV) {
441 if ((u_int)iovcnt > IOV_MAX) {
442 error = EINVAL;
443 goto out;
444 }
445 iov = malloc(iovlen, M_IOV, M_WAITOK);
446 needfree = iov;
447 } else if ((u_int)iovcnt > 0) {
448 iov = aiov;
449 needfree = NULL;
450 } else {
451 error = EINVAL;
452 goto out;
453 }
454
455 auio.uio_iov = iov;
456 auio.uio_iovcnt = iovcnt;
457 auio.uio_rw = UIO_WRITE;
458 auio.uio_segflg = UIO_USERSPACE;
459 auio.uio_lwp = l;
460 error = copyin(iovp, iov, iovlen);
461 if (error)
462 goto done;
463 auio.uio_resid = 0;
464 for (i = 0; i < iovcnt; i++) {
465 auio.uio_resid += iov->iov_len;
466 /*
467 * Writes return ssize_t because -1 is returned on error.
468 * Therefore we must restrict the length to SSIZE_MAX to
469 * avoid garbage return values.
470 */
471 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) {
472 error = EINVAL;
473 goto done;
474 }
475 iov++;
476 }
477 #ifdef KTRACE
478 /*
479 * if tracing, save a copy of iovec
480 */
481 if (KTRPOINT(p, KTR_GENIO)) {
482 ktriov = malloc(iovlen, M_TEMP, M_WAITOK);
483 memcpy((caddr_t)ktriov, (caddr_t)auio.uio_iov, iovlen);
484 }
485 #endif
486 cnt = auio.uio_resid;
487 error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred, flags);
488 if (error) {
489 if (auio.uio_resid != cnt && (error == ERESTART ||
490 error == EINTR || error == EWOULDBLOCK))
491 error = 0;
492 if (error == EPIPE)
493 psignal(p, SIGPIPE);
494 }
495 cnt -= auio.uio_resid;
496 #ifdef KTRACE
497 if (ktriov != NULL) {
498 if (KTRPOINT(p, KTR_GENIO) && (error == 0))
499 ktrgenio(l, fd, UIO_WRITE, ktriov, cnt, error);
500 free(ktriov, M_TEMP);
501 }
502 #endif
503 *retval = cnt;
504 done:
505 if (needfree)
506 free(needfree, M_IOV);
507 out:
508 FILE_UNUSE(fp, l);
509 return (error);
510 }
511
512 /*
513 * Ioctl system call
514 */
515 /* ARGSUSED */
516 int
517 sys_ioctl(struct lwp *l, void *v, register_t *retval)
518 {
519 struct sys_ioctl_args /* {
520 syscallarg(int) fd;
521 syscallarg(u_long) com;
522 syscallarg(caddr_t) data;
523 } */ *uap = v;
524 struct file *fp;
525 struct proc *p;
526 struct filedesc *fdp;
527 u_long com;
528 int error;
529 u_int size;
530 caddr_t data, memp;
531 #define STK_PARAMS 128
532 u_long stkbuf[STK_PARAMS/sizeof(u_long)];
533
534 error = 0;
535 p = l->l_proc;
536 fdp = p->p_fd;
537
538 if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL)
539 return (EBADF);
540
541 FILE_USE(fp);
542
543 if ((fp->f_flag & (FREAD | FWRITE)) == 0) {
544 error = EBADF;
545 com = 0;
546 goto out;
547 }
548
549 switch (com = SCARG(uap, com)) {
550 case FIONCLEX:
551 fdp->fd_ofileflags[SCARG(uap, fd)] &= ~UF_EXCLOSE;
552 goto out;
553
554 case FIOCLEX:
555 fdp->fd_ofileflags[SCARG(uap, fd)] |= UF_EXCLOSE;
556 goto out;
557 }
558
559 /*
560 * Interpret high order word to find amount of data to be
561 * copied to/from the user's address space.
562 */
563 size = IOCPARM_LEN(com);
564 if (size > IOCPARM_MAX) {
565 error = ENOTTY;
566 goto out;
567 }
568 memp = NULL;
569 if (size > sizeof(stkbuf)) {
570 memp = (caddr_t)malloc((u_long)size, M_IOCTLOPS, M_WAITOK);
571 data = memp;
572 } else
573 data = (caddr_t)stkbuf;
574 if (com&IOC_IN) {
575 if (size) {
576 error = copyin(SCARG(uap, data), data, size);
577 if (error) {
578 if (memp)
579 free(memp, M_IOCTLOPS);
580 goto out;
581 }
582 #ifdef KTRACE
583 if (KTRPOINT(p, KTR_GENIO)) {
584 struct iovec iov;
585 iov.iov_base = SCARG(uap, data);
586 iov.iov_len = size;
587 ktrgenio(l, SCARG(uap, fd), UIO_WRITE, &iov,
588 size, 0);
589 }
590 #endif
591 } else
592 *(caddr_t *)data = SCARG(uap, data);
593 } else if ((com&IOC_OUT) && size)
594 /*
595 * Zero the buffer so the user always
596 * gets back something deterministic.
597 */
598 memset(data, 0, size);
599 else if (com&IOC_VOID)
600 *(caddr_t *)data = SCARG(uap, data);
601
602 switch (com) {
603
604 case FIONBIO:
605 if (*(int *)data != 0)
606 fp->f_flag |= FNONBLOCK;
607 else
608 fp->f_flag &= ~FNONBLOCK;
609 error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, data, l);
610 break;
611
612 case FIOASYNC:
613 if (*(int *)data != 0)
614 fp->f_flag |= FASYNC;
615 else
616 fp->f_flag &= ~FASYNC;
617 error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, data, l);
618 break;
619
620 default:
621 error = (*fp->f_ops->fo_ioctl)(fp, com, data, l);
622 /*
623 * Copy any data to user, size was
624 * already set and checked above.
625 */
626 if (error == 0 && (com&IOC_OUT) && size) {
627 error = copyout(data, SCARG(uap, data), size);
628 #ifdef KTRACE
629 if (KTRPOINT(p, KTR_GENIO)) {
630 struct iovec iov;
631 iov.iov_base = SCARG(uap, data);
632 iov.iov_len = size;
633 ktrgenio(l, SCARG(uap, fd), UIO_READ, &iov,
634 size, error);
635 }
636 #endif
637 }
638 break;
639 }
640 if (memp)
641 free(memp, M_IOCTLOPS);
642 out:
643 FILE_UNUSE(fp, l);
644 switch (error) {
645 case -1:
646 printf("sys_ioctl: _IO%s%s('%c', %lu, %lu) returned -1: "
647 "pid=%d comm=%s\n",
648 (com & IOC_IN) ? "W" : "", (com & IOC_OUT) ? "R" : "",
649 (char)IOCGROUP(com), (com & 0xff), IOCPARM_LEN(com),
650 p->p_pid, p->p_comm);
651 /* FALLTHROUGH */
652 case EPASSTHROUGH:
653 error = ENOTTY;
654 /* FALLTHROUGH */
655 default:
656 return (error);
657 }
658 }
659
660 int selwait, nselcoll;
661
662 /*
663 * Select system call.
664 */
665 int
666 sys_pselect(struct lwp *l, void *v, register_t *retval)
667 {
668 struct sys_pselect_args /* {
669 syscallarg(int) nd;
670 syscallarg(fd_set *) in;
671 syscallarg(fd_set *) ou;
672 syscallarg(fd_set *) ex;
673 syscallarg(const struct timespec *) ts;
674 syscallarg(sigset_t *) mask;
675 } */ * const uap = v;
676 struct timespec ats;
677 struct timeval atv, *tv = NULL;
678 sigset_t amask, *mask = NULL;
679 int error;
680
681 if (SCARG(uap, ts)) {
682 error = copyin(SCARG(uap, ts), &ats, sizeof(ats));
683 if (error)
684 return error;
685 atv.tv_sec = ats.tv_sec;
686 atv.tv_usec = ats.tv_nsec / 1000;
687 tv = &atv;
688 }
689 if (SCARG(uap, mask) != NULL) {
690 error = copyin(SCARG(uap, mask), &amask, sizeof(amask));
691 if (error)
692 return error;
693 mask = &amask;
694 }
695
696 return selcommon(l, retval, SCARG(uap, nd), SCARG(uap, in),
697 SCARG(uap, ou), SCARG(uap, ex), tv, mask);
698 }
699
700 int
701 sys_select(struct lwp *l, void *v, register_t *retval)
702 {
703 struct sys_select_args /* {
704 syscallarg(int) nd;
705 syscallarg(fd_set *) in;
706 syscallarg(fd_set *) ou;
707 syscallarg(fd_set *) ex;
708 syscallarg(struct timeval *) tv;
709 } */ * const uap = v;
710 struct timeval atv, *tv = NULL;
711 int error;
712
713 if (SCARG(uap, tv)) {
714 error = copyin(SCARG(uap, tv), (caddr_t)&atv,
715 sizeof(atv));
716 if (error)
717 return error;
718 tv = &atv;
719 }
720
721 return selcommon(l, retval, SCARG(uap, nd), SCARG(uap, in),
722 SCARG(uap, ou), SCARG(uap, ex), tv, NULL);
723 }
724
725 int
726 selcommon(struct lwp *l, register_t *retval, int nd, fd_set *u_in,
727 fd_set *u_ou, fd_set *u_ex, struct timeval *tv, sigset_t *mask)
728 {
729 char smallbits[howmany(FD_SETSIZE, NFDBITS) *
730 sizeof(fd_mask) * 6];
731 struct proc * const p = l->l_proc;
732 caddr_t bits;
733 int s, ncoll, error, timo;
734 size_t ni;
735 sigset_t oldmask;
736
737 error = 0;
738 if (nd < 0)
739 return (EINVAL);
740 if (nd > p->p_fd->fd_nfiles) {
741 /* forgiving; slightly wrong */
742 nd = p->p_fd->fd_nfiles;
743 }
744 ni = howmany(nd, NFDBITS) * sizeof(fd_mask);
745 if (ni * 6 > sizeof(smallbits))
746 bits = malloc(ni * 6, M_TEMP, M_WAITOK);
747 else
748 bits = smallbits;
749
750 #define getbits(name, x) \
751 if (u_ ## name) { \
752 error = copyin(u_ ## name, bits + ni * x, ni); \
753 if (error) \
754 goto done; \
755 } else \
756 memset(bits + ni * x, 0, ni);
757 getbits(in, 0);
758 getbits(ou, 1);
759 getbits(ex, 2);
760 #undef getbits
761
762 timo = 0;
763 if (tv && itimerfix(tv)) {
764 error = EINVAL;
765 goto done;
766 }
767 if (mask)
768 (void)sigprocmask1(p, SIG_SETMASK, mask, &oldmask);
769
770 retry:
771 ncoll = nselcoll;
772 l->l_flag |= L_SELECT;
773 error = selscan(l, (fd_mask *)(bits + ni * 0),
774 (fd_mask *)(bits + ni * 3), nd, retval);
775 if (error || *retval)
776 goto done;
777 if (tv) {
778 /*
779 * We have to recalculate the timeout on every retry.
780 */
781 timo = tvtohz(tv);
782 if (timo <= 0)
783 goto done;
784 }
785 s = splsched();
786 if ((l->l_flag & L_SELECT) == 0 || nselcoll != ncoll) {
787 splx(s);
788 goto retry;
789 }
790 l->l_flag &= ~L_SELECT;
791 error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "select", timo);
792 splx(s);
793 if (error == 0)
794 goto retry;
795 done:
796 if (mask)
797 (void)sigprocmask1(p, SIG_SETMASK, &oldmask, NULL);
798 l->l_flag &= ~L_SELECT;
799 /* select is not restarted after signals... */
800 if (error == ERESTART)
801 error = EINTR;
802 if (error == EWOULDBLOCK)
803 error = 0;
804 if (error == 0) {
805
806 #define putbits(name, x) \
807 if (u_ ## name) { \
808 error = copyout(bits + ni * x, u_ ## name, ni); \
809 if (error) \
810 goto out; \
811 }
812 putbits(in, 3);
813 putbits(ou, 4);
814 putbits(ex, 5);
815 #undef putbits
816 }
817 out:
818 if (ni * 6 > sizeof(smallbits))
819 free(bits, M_TEMP);
820 return (error);
821 }
822
823 int
824 selscan(struct lwp *l, fd_mask *ibitp, fd_mask *obitp, int nfd,
825 register_t *retval)
826 {
827 static const int flag[3] = { POLLRDNORM | POLLHUP | POLLERR,
828 POLLWRNORM | POLLHUP | POLLERR,
829 POLLRDBAND };
830 struct proc *p = l->l_proc;
831 struct filedesc *fdp;
832 int msk, i, j, fd, n;
833 fd_mask ibits, obits;
834 struct file *fp;
835
836 fdp = p->p_fd;
837 n = 0;
838 for (msk = 0; msk < 3; msk++) {
839 for (i = 0; i < nfd; i += NFDBITS) {
840 ibits = *ibitp++;
841 obits = 0;
842 while ((j = ffs(ibits)) && (fd = i + --j) < nfd) {
843 ibits &= ~(1 << j);
844 if ((fp = fd_getfile(fdp, fd)) == NULL)
845 return (EBADF);
846 FILE_USE(fp);
847 if ((*fp->f_ops->fo_poll)(fp, flag[msk], l)) {
848 obits |= (1 << j);
849 n++;
850 }
851 FILE_UNUSE(fp, l);
852 }
853 *obitp++ = obits;
854 }
855 }
856 *retval = n;
857 return (0);
858 }
859
860 /*
861 * Poll system call.
862 */
863 int
864 sys_poll(struct lwp *l, void *v, register_t *retval)
865 {
866 struct sys_poll_args /* {
867 syscallarg(struct pollfd *) fds;
868 syscallarg(u_int) nfds;
869 syscallarg(int) timeout;
870 } */ * const uap = v;
871 struct timeval atv, *tv = NULL;
872
873 if (SCARG(uap, timeout) != INFTIM) {
874 atv.tv_sec = SCARG(uap, timeout) / 1000;
875 atv.tv_usec = (SCARG(uap, timeout) % 1000) * 1000;
876 tv = &atv;
877 }
878
879 return pollcommon(l, retval, SCARG(uap, fds), SCARG(uap, nfds),
880 tv, NULL);
881 }
882
883 /*
884 * Poll system call.
885 */
886 int
887 sys_pollts(struct lwp *l, void *v, register_t *retval)
888 {
889 struct sys_pollts_args /* {
890 syscallarg(struct pollfd *) fds;
891 syscallarg(u_int) nfds;
892 syscallarg(const struct timespec *) ts;
893 syscallarg(const sigset_t *) mask;
894 } */ * const uap = v;
895 struct timespec ats;
896 struct timeval atv, *tv = NULL;
897 sigset_t amask, *mask = NULL;
898 int error;
899
900 if (SCARG(uap, ts)) {
901 error = copyin(SCARG(uap, ts), &ats, sizeof(ats));
902 if (error)
903 return error;
904 atv.tv_sec = ats.tv_sec;
905 atv.tv_usec = ats.tv_nsec / 1000;
906 tv = &atv;
907 }
908 if (SCARG(uap, mask)) {
909 error = copyin(SCARG(uap, mask), &amask, sizeof(amask));
910 if (error)
911 return error;
912 mask = &amask;
913 }
914
915 return pollcommon(l, retval, SCARG(uap, fds), SCARG(uap, nfds),
916 tv, mask);
917 }
918
919 int
920 pollcommon(struct lwp *l, register_t *retval,
921 struct pollfd *u_fds, u_int nfds,
922 struct timeval *tv, sigset_t *mask)
923 {
924 char smallbits[32 * sizeof(struct pollfd)];
925 struct proc * const p = l->l_proc;
926 caddr_t bits;
927 sigset_t oldmask;
928 int s, ncoll, error, timo;
929 size_t ni;
930
931 if (nfds > p->p_fd->fd_nfiles) {
932 /* forgiving; slightly wrong */
933 nfds = p->p_fd->fd_nfiles;
934 }
935 ni = nfds * sizeof(struct pollfd);
936 if (ni > sizeof(smallbits))
937 bits = malloc(ni, M_TEMP, M_WAITOK);
938 else
939 bits = smallbits;
940
941 error = copyin(u_fds, bits, ni);
942 if (error)
943 goto done;
944
945 timo = 0;
946 if (tv && itimerfix(tv)) {
947 error = EINVAL;
948 goto done;
949 }
950 if (mask != NULL)
951 (void)sigprocmask1(p, SIG_SETMASK, mask, &oldmask);
952
953 retry:
954 ncoll = nselcoll;
955 l->l_flag |= L_SELECT;
956 error = pollscan(l, (struct pollfd *)bits, nfds, retval);
957 if (error || *retval)
958 goto done;
959 if (tv) {
960 /*
961 * We have to recalculate the timeout on every retry.
962 */
963 timo = tvtohz(tv);
964 if (timo <= 0)
965 goto done;
966 }
967 s = splsched();
968 if ((l->l_flag & L_SELECT) == 0 || nselcoll != ncoll) {
969 splx(s);
970 goto retry;
971 }
972 l->l_flag &= ~L_SELECT;
973 error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "poll", timo);
974 splx(s);
975 if (error == 0)
976 goto retry;
977 done:
978 if (mask != NULL)
979 (void)sigprocmask1(p, SIG_SETMASK, &oldmask, NULL);
980 l->l_flag &= ~L_SELECT;
981 /* poll is not restarted after signals... */
982 if (error == ERESTART)
983 error = EINTR;
984 if (error == EWOULDBLOCK)
985 error = 0;
986 if (error == 0) {
987 error = copyout(bits, u_fds, ni);
988 if (error)
989 goto out;
990 }
991 out:
992 if (ni > sizeof(smallbits))
993 free(bits, M_TEMP);
994 return (error);
995 }
996
997 int
998 pollscan(struct lwp *l, struct pollfd *fds, int nfd, register_t *retval)
999 {
1000 struct proc *p = l->l_proc;
1001 struct filedesc *fdp;
1002 int i, n;
1003 struct file *fp;
1004
1005 fdp = p->p_fd;
1006 n = 0;
1007 for (i = 0; i < nfd; i++, fds++) {
1008 if (fds->fd >= fdp->fd_nfiles) {
1009 fds->revents = POLLNVAL;
1010 n++;
1011 } else if (fds->fd < 0) {
1012 fds->revents = 0;
1013 } else {
1014 if ((fp = fd_getfile(fdp, fds->fd)) == NULL) {
1015 fds->revents = POLLNVAL;
1016 n++;
1017 } else {
1018 FILE_USE(fp);
1019 fds->revents = (*fp->f_ops->fo_poll)(fp,
1020 fds->events | POLLERR | POLLHUP, l);
1021 if (fds->revents != 0)
1022 n++;
1023 FILE_UNUSE(fp, l);
1024 }
1025 }
1026 }
1027 *retval = n;
1028 return (0);
1029 }
1030
1031 /*ARGSUSED*/
1032 int
1033 seltrue(dev_t dev, int events, struct lwp *l)
1034 {
1035
1036 return (events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM));
1037 }
1038
1039 /*
1040 * Record a select request.
1041 */
1042 void
1043 selrecord(struct lwp *selector, struct selinfo *sip)
1044 {
1045 struct lwp *l;
1046 struct proc *p;
1047 pid_t mypid;
1048
1049 mypid = selector->l_proc->p_pid;
1050 if (sip->sel_pid == mypid)
1051 return;
1052 if (sip->sel_pid && (p = pfind(sip->sel_pid))) {
1053 LIST_FOREACH(l, &p->p_lwps, l_sibling) {
1054 if (l->l_wchan == (caddr_t)&selwait) {
1055 sip->sel_collision = 1;
1056 return;
1057 }
1058 }
1059 }
1060
1061 sip->sel_pid = mypid;
1062 }
1063
1064 /*
1065 * Do a wakeup when a selectable event occurs.
1066 */
1067 void
1068 selwakeup(sip)
1069 struct selinfo *sip;
1070 {
1071 struct lwp *l;
1072 struct proc *p;
1073 int s;
1074
1075 if (sip->sel_pid == 0)
1076 return;
1077 if (sip->sel_collision) {
1078 sip->sel_pid = 0;
1079 nselcoll++;
1080 sip->sel_collision = 0;
1081 wakeup((caddr_t)&selwait);
1082 return;
1083 }
1084 p = pfind(sip->sel_pid);
1085 sip->sel_pid = 0;
1086 if (p != NULL) {
1087 LIST_FOREACH(l, &p->p_lwps, l_sibling) {
1088 SCHED_LOCK(s);
1089 if (l->l_wchan == (caddr_t)&selwait) {
1090 if (l->l_stat == LSSLEEP)
1091 setrunnable(l);
1092 else
1093 unsleep(l);
1094 } else if (l->l_flag & L_SELECT)
1095 l->l_flag &= ~L_SELECT;
1096 SCHED_UNLOCK(s);
1097 }
1098 }
1099 }
1100