sys_generic.c revision 1.94 1 /* $NetBSD: sys_generic.c,v 1.94 2006/10/13 16:53:36 dogcow Exp $ */
2
3 /*
4 * Copyright (c) 1982, 1986, 1989, 1993
5 * The Regents of the University of California. All rights reserved.
6 * (c) UNIX System Laboratories, Inc.
7 * All or some portions of this file are derived from material licensed
8 * to the University of California by American Telephone and Telegraph
9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
10 * the permission of UNIX System Laboratories, Inc.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 *
36 * @(#)sys_generic.c 8.9 (Berkeley) 2/14/95
37 */
38
39 #include <sys/cdefs.h>
40 __KERNEL_RCSID(0, "$NetBSD: sys_generic.c,v 1.94 2006/10/13 16:53:36 dogcow Exp $");
41
42 #include "opt_ktrace.h"
43
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #include <sys/filedesc.h>
47 #include <sys/ioctl.h>
48 #include <sys/file.h>
49 #include <sys/proc.h>
50 #include <sys/socketvar.h>
51 #include <sys/signalvar.h>
52 #include <sys/uio.h>
53 #include <sys/kernel.h>
54 #include <sys/stat.h>
55 #include <sys/malloc.h>
56 #include <sys/poll.h>
57 #ifdef KTRACE
58 #include <sys/ktrace.h>
59 #endif
60
61 #include <sys/mount.h>
62 #include <sys/sa.h>
63 #include <sys/syscallargs.h>
64
65 #include <uvm/uvm_extern.h>
66
67 int selscan(struct lwp *, fd_mask *, fd_mask *, int, register_t *);
68 int pollscan(struct lwp *, struct pollfd *, int, register_t *);
69
70
71 /*
72 * Read system call.
73 */
74 /* ARGSUSED */
75 int
76 sys_read(struct lwp *l, void *v, register_t *retval)
77 {
78 struct sys_read_args /* {
79 syscallarg(int) fd;
80 syscallarg(void *) buf;
81 syscallarg(size_t) nbyte;
82 } */ *uap = v;
83 int fd;
84 struct file *fp;
85 struct proc *p;
86 struct filedesc *fdp;
87
88 fd = SCARG(uap, fd);
89 p = l->l_proc;
90 fdp = p->p_fd;
91
92 if ((fp = fd_getfile(fdp, fd)) == NULL)
93 return (EBADF);
94
95 if ((fp->f_flag & FREAD) == 0) {
96 simple_unlock(&fp->f_slock);
97 return (EBADF);
98 }
99
100 FILE_USE(fp);
101
102 /* dofileread() will unuse the descriptor for us */
103 return (dofileread(l, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
104 &fp->f_offset, FOF_UPDATE_OFFSET, retval));
105 }
106
107 int
108 dofileread(struct lwp *l, int fd, struct file *fp, void *buf, size_t nbyte,
109 off_t *offset, int flags, register_t *retval)
110 {
111 struct iovec aiov;
112 struct uio auio;
113 struct proc *p;
114 struct vmspace *vm;
115 size_t cnt;
116 int error;
117 #ifdef KTRACE
118 struct iovec ktriov = { .iov_base = NULL, };
119 #else
120 do { if (&fd) {} } while (/* CONSTCOND */ 0); /* shut up -Wunused */
121 #endif
122 p = l->l_proc;
123
124 error = proc_vmspace_getref(p, &vm);
125 if (error) {
126 goto out;
127 }
128
129 aiov.iov_base = (caddr_t)buf;
130 aiov.iov_len = nbyte;
131 auio.uio_iov = &aiov;
132 auio.uio_iovcnt = 1;
133 auio.uio_resid = nbyte;
134 auio.uio_rw = UIO_READ;
135 auio.uio_vmspace = vm;
136
137 /*
138 * Reads return ssize_t because -1 is returned on error. Therefore
139 * we must restrict the length to SSIZE_MAX to avoid garbage return
140 * values.
141 */
142 if (auio.uio_resid > SSIZE_MAX) {
143 error = EINVAL;
144 goto out;
145 }
146
147 #ifdef KTRACE
148 /*
149 * if tracing, save a copy of iovec
150 */
151 if (KTRPOINT(p, KTR_GENIO))
152 ktriov = aiov;
153 #endif
154 cnt = auio.uio_resid;
155 error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred, flags);
156 if (error)
157 if (auio.uio_resid != cnt && (error == ERESTART ||
158 error == EINTR || error == EWOULDBLOCK))
159 error = 0;
160 cnt -= auio.uio_resid;
161 #ifdef KTRACE
162 if (KTRPOINT(p, KTR_GENIO) && error == 0)
163 ktrgenio(l, fd, UIO_READ, &ktriov, cnt, error);
164 #endif
165 *retval = cnt;
166 out:
167 FILE_UNUSE(fp, l);
168 uvmspace_free(vm);
169 return (error);
170 }
171
172 /*
173 * Scatter read system call.
174 */
175 int
176 sys_readv(struct lwp *l, void *v, register_t *retval)
177 {
178 struct sys_readv_args /* {
179 syscallarg(int) fd;
180 syscallarg(const struct iovec *) iovp;
181 syscallarg(int) iovcnt;
182 } */ *uap = v;
183 struct filedesc *fdp;
184 struct file *fp;
185 struct proc *p;
186 int fd;
187
188 fd = SCARG(uap, fd);
189 p = l->l_proc;
190 fdp = p->p_fd;
191
192 if ((fp = fd_getfile(fdp, fd)) == NULL)
193 return (EBADF);
194
195 if ((fp->f_flag & FREAD) == 0) {
196 simple_unlock(&fp->f_slock);
197 return (EBADF);
198 }
199
200 FILE_USE(fp);
201
202 /* dofilereadv() will unuse the descriptor for us */
203 return (dofilereadv(l, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt),
204 &fp->f_offset, FOF_UPDATE_OFFSET, retval));
205 }
206
207 int
208 dofilereadv(struct lwp *l, int fd, struct file *fp, const struct iovec *iovp,
209 int iovcnt, off_t *offset, int flags, register_t *retval)
210 {
211 struct proc *p;
212 struct uio auio;
213 struct iovec *iov, *needfree, aiov[UIO_SMALLIOV];
214 struct vmspace *vm;
215 int i, error;
216 size_t cnt;
217 u_int iovlen;
218 #ifdef KTRACE
219 struct iovec *ktriov;
220 #else
221 do { if (&fd) {} } while (/* CONSTCOND */ 0); /* shut up -Wunused */
222 #endif
223
224 p = l->l_proc;
225 error = proc_vmspace_getref(p, &vm);
226 if (error) {
227 goto out;
228 }
229
230 #ifdef KTRACE
231 ktriov = NULL;
232 #endif
233 /* note: can't use iovlen until iovcnt is validated */
234 iovlen = iovcnt * sizeof(struct iovec);
235 if ((u_int)iovcnt > UIO_SMALLIOV) {
236 if ((u_int)iovcnt > IOV_MAX) {
237 error = EINVAL;
238 goto out;
239 }
240 iov = malloc(iovlen, M_IOV, M_WAITOK);
241 needfree = iov;
242 } else if ((u_int)iovcnt > 0) {
243 iov = aiov;
244 needfree = NULL;
245 } else {
246 error = EINVAL;
247 goto out;
248 }
249
250 auio.uio_iov = iov;
251 auio.uio_iovcnt = iovcnt;
252 auio.uio_rw = UIO_READ;
253 auio.uio_vmspace = vm;
254 error = copyin(iovp, iov, iovlen);
255 if (error)
256 goto done;
257 auio.uio_resid = 0;
258 for (i = 0; i < iovcnt; i++) {
259 auio.uio_resid += iov->iov_len;
260 /*
261 * Reads return ssize_t because -1 is returned on error.
262 * Therefore we must restrict the length to SSIZE_MAX to
263 * avoid garbage return values.
264 */
265 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) {
266 error = EINVAL;
267 goto done;
268 }
269 iov++;
270 }
271 #ifdef KTRACE
272 /*
273 * if tracing, save a copy of iovec
274 */
275 if (KTRPOINT(p, KTR_GENIO)) {
276 ktriov = malloc(iovlen, M_TEMP, M_WAITOK);
277 memcpy((caddr_t)ktriov, (caddr_t)auio.uio_iov, iovlen);
278 }
279 #endif
280 cnt = auio.uio_resid;
281 error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred, flags);
282 if (error)
283 if (auio.uio_resid != cnt && (error == ERESTART ||
284 error == EINTR || error == EWOULDBLOCK))
285 error = 0;
286 cnt -= auio.uio_resid;
287 #ifdef KTRACE
288 if (ktriov != NULL) {
289 if (KTRPOINT(p, KTR_GENIO) && (error == 0))
290 ktrgenio(l, fd, UIO_READ, ktriov, cnt, error);
291 free(ktriov, M_TEMP);
292 }
293 #endif
294 *retval = cnt;
295 done:
296 if (needfree)
297 free(needfree, M_IOV);
298 out:
299 FILE_UNUSE(fp, l);
300 uvmspace_free(vm);
301 return (error);
302 }
303
304 /*
305 * Write system call
306 */
307 int
308 sys_write(struct lwp *l, void *v, register_t *retval)
309 {
310 struct sys_write_args /* {
311 syscallarg(int) fd;
312 syscallarg(const void *) buf;
313 syscallarg(size_t) nbyte;
314 } */ *uap = v;
315 int fd;
316 struct file *fp;
317 struct proc *p;
318 struct filedesc *fdp;
319
320 fd = SCARG(uap, fd);
321 p = l->l_proc;
322 fdp = p->p_fd;
323
324 if ((fp = fd_getfile(fdp, fd)) == NULL)
325 return (EBADF);
326
327 if ((fp->f_flag & FWRITE) == 0) {
328 simple_unlock(&fp->f_slock);
329 return (EBADF);
330 }
331
332 FILE_USE(fp);
333
334 /* dofilewrite() will unuse the descriptor for us */
335 return (dofilewrite(l, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
336 &fp->f_offset, FOF_UPDATE_OFFSET, retval));
337 }
338
339 int
340 dofilewrite(struct lwp *l, int fd, struct file *fp, const void *buf,
341 size_t nbyte, off_t *offset, int flags, register_t *retval)
342 {
343 struct iovec aiov;
344 struct uio auio;
345 struct proc *p;
346 struct vmspace *vm;
347 size_t cnt;
348 int error;
349 #ifdef KTRACE
350 struct iovec ktriov = { .iov_base = NULL, };
351 #else
352 do { if (&fd) {} } while (/* CONSTCOND */ 0); /* shut up -Wunused */
353 #endif
354
355 p = l->l_proc;
356 error = proc_vmspace_getref(p, &vm);
357 if (error) {
358 goto out;
359 }
360 aiov.iov_base = __UNCONST(buf); /* XXXUNCONST kills const */
361 aiov.iov_len = nbyte;
362 auio.uio_iov = &aiov;
363 auio.uio_iovcnt = 1;
364 auio.uio_resid = nbyte;
365 auio.uio_rw = UIO_WRITE;
366 auio.uio_vmspace = vm;
367
368 /*
369 * Writes return ssize_t because -1 is returned on error. Therefore
370 * we must restrict the length to SSIZE_MAX to avoid garbage return
371 * values.
372 */
373 if (auio.uio_resid > SSIZE_MAX) {
374 error = EINVAL;
375 goto out;
376 }
377
378 #ifdef KTRACE
379 /*
380 * if tracing, save a copy of iovec
381 */
382 if (KTRPOINT(p, KTR_GENIO))
383 ktriov = aiov;
384 #endif
385 cnt = auio.uio_resid;
386 error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred, flags);
387 if (error) {
388 if (auio.uio_resid != cnt && (error == ERESTART ||
389 error == EINTR || error == EWOULDBLOCK))
390 error = 0;
391 if (error == EPIPE)
392 psignal(p, SIGPIPE);
393 }
394 cnt -= auio.uio_resid;
395 #ifdef KTRACE
396 if (KTRPOINT(p, KTR_GENIO) && error == 0)
397 ktrgenio(l, fd, UIO_WRITE, &ktriov, cnt, error);
398 #endif
399 *retval = cnt;
400 out:
401 FILE_UNUSE(fp, l);
402 uvmspace_free(vm);
403 return (error);
404 }
405
406 /*
407 * Gather write system call
408 */
409 int
410 sys_writev(struct lwp *l, void *v, register_t *retval)
411 {
412 struct sys_writev_args /* {
413 syscallarg(int) fd;
414 syscallarg(const struct iovec *) iovp;
415 syscallarg(int) iovcnt;
416 } */ *uap = v;
417 int fd;
418 struct file *fp;
419 struct proc *p;
420 struct filedesc *fdp;
421
422 fd = SCARG(uap, fd);
423 p = l->l_proc;
424 fdp = p->p_fd;
425
426 if ((fp = fd_getfile(fdp, fd)) == NULL)
427 return (EBADF);
428
429 if ((fp->f_flag & FWRITE) == 0) {
430 simple_unlock(&fp->f_slock);
431 return (EBADF);
432 }
433
434 FILE_USE(fp);
435
436 /* dofilewritev() will unuse the descriptor for us */
437 return (dofilewritev(l, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt),
438 &fp->f_offset, FOF_UPDATE_OFFSET, retval));
439 }
440
441 int
442 dofilewritev(struct lwp *l, int fd, struct file *fp, const struct iovec *iovp,
443 int iovcnt, off_t *offset, int flags, register_t *retval)
444 {
445 struct proc *p;
446 struct uio auio;
447 struct iovec *iov, *needfree, aiov[UIO_SMALLIOV];
448 struct vmspace *vm;
449 int i, error;
450 size_t cnt;
451 u_int iovlen;
452 #ifdef KTRACE
453 struct iovec *ktriov;
454 #else
455 do { if (&fd) {} } while (/* CONSTCOND */ 0); /* shut up -Wunused */
456 #endif
457
458 p = l->l_proc;
459 error = proc_vmspace_getref(p, &vm);
460 if (error) {
461 goto out;
462 }
463 #ifdef KTRACE
464 ktriov = NULL;
465 #endif
466 /* note: can't use iovlen until iovcnt is validated */
467 iovlen = iovcnt * sizeof(struct iovec);
468 if ((u_int)iovcnt > UIO_SMALLIOV) {
469 if ((u_int)iovcnt > IOV_MAX) {
470 error = EINVAL;
471 goto out;
472 }
473 iov = malloc(iovlen, M_IOV, M_WAITOK);
474 needfree = iov;
475 } else if ((u_int)iovcnt > 0) {
476 iov = aiov;
477 needfree = NULL;
478 } else {
479 error = EINVAL;
480 goto out;
481 }
482
483 auio.uio_iov = iov;
484 auio.uio_iovcnt = iovcnt;
485 auio.uio_rw = UIO_WRITE;
486 auio.uio_vmspace = vm;
487 error = copyin(iovp, iov, iovlen);
488 if (error)
489 goto done;
490 auio.uio_resid = 0;
491 for (i = 0; i < iovcnt; i++) {
492 auio.uio_resid += iov->iov_len;
493 /*
494 * Writes return ssize_t because -1 is returned on error.
495 * Therefore we must restrict the length to SSIZE_MAX to
496 * avoid garbage return values.
497 */
498 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) {
499 error = EINVAL;
500 goto done;
501 }
502 iov++;
503 }
504 #ifdef KTRACE
505 /*
506 * if tracing, save a copy of iovec
507 */
508 if (KTRPOINT(p, KTR_GENIO)) {
509 ktriov = malloc(iovlen, M_TEMP, M_WAITOK);
510 memcpy((caddr_t)ktriov, (caddr_t)auio.uio_iov, iovlen);
511 }
512 #endif
513 cnt = auio.uio_resid;
514 error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred, flags);
515 if (error) {
516 if (auio.uio_resid != cnt && (error == ERESTART ||
517 error == EINTR || error == EWOULDBLOCK))
518 error = 0;
519 if (error == EPIPE)
520 psignal(p, SIGPIPE);
521 }
522 cnt -= auio.uio_resid;
523 #ifdef KTRACE
524 if (ktriov != NULL) {
525 if (KTRPOINT(p, KTR_GENIO) && (error == 0))
526 ktrgenio(l, fd, UIO_WRITE, ktriov, cnt, error);
527 free(ktriov, M_TEMP);
528 }
529 #endif
530 *retval = cnt;
531 done:
532 if (needfree)
533 free(needfree, M_IOV);
534 out:
535 FILE_UNUSE(fp, l);
536 uvmspace_free(vm);
537 return (error);
538 }
539
540 /*
541 * Ioctl system call
542 */
543 /* ARGSUSED */
544 int
545 sys_ioctl(struct lwp *l, void *v, register_t *retval __unused)
546 {
547 struct sys_ioctl_args /* {
548 syscallarg(int) fd;
549 syscallarg(u_long) com;
550 syscallarg(caddr_t) data;
551 } */ *uap = v;
552 struct file *fp;
553 struct proc *p;
554 struct filedesc *fdp;
555 u_long com;
556 int error;
557 u_int size;
558 caddr_t data, memp;
559 #define STK_PARAMS 128
560 u_long stkbuf[STK_PARAMS/sizeof(u_long)];
561
562 error = 0;
563 p = l->l_proc;
564 fdp = p->p_fd;
565
566 if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL)
567 return (EBADF);
568
569 FILE_USE(fp);
570
571 if ((fp->f_flag & (FREAD | FWRITE)) == 0) {
572 error = EBADF;
573 com = 0;
574 goto out;
575 }
576
577 switch (com = SCARG(uap, com)) {
578 case FIONCLEX:
579 fdp->fd_ofileflags[SCARG(uap, fd)] &= ~UF_EXCLOSE;
580 goto out;
581
582 case FIOCLEX:
583 fdp->fd_ofileflags[SCARG(uap, fd)] |= UF_EXCLOSE;
584 goto out;
585 }
586
587 /*
588 * Interpret high order word to find amount of data to be
589 * copied to/from the user's address space.
590 */
591 size = IOCPARM_LEN(com);
592 if (size > IOCPARM_MAX) {
593 error = ENOTTY;
594 goto out;
595 }
596 memp = NULL;
597 if (size > sizeof(stkbuf)) {
598 memp = (caddr_t)malloc((u_long)size, M_IOCTLOPS, M_WAITOK);
599 data = memp;
600 } else
601 data = (caddr_t)stkbuf;
602 if (com&IOC_IN) {
603 if (size) {
604 error = copyin(SCARG(uap, data), data, size);
605 if (error) {
606 if (memp)
607 free(memp, M_IOCTLOPS);
608 goto out;
609 }
610 #ifdef KTRACE
611 if (KTRPOINT(p, KTR_GENIO)) {
612 struct iovec iov;
613 iov.iov_base = SCARG(uap, data);
614 iov.iov_len = size;
615 ktrgenio(l, SCARG(uap, fd), UIO_WRITE, &iov,
616 size, 0);
617 }
618 #endif
619 } else
620 *(caddr_t *)data = SCARG(uap, data);
621 } else if ((com&IOC_OUT) && size)
622 /*
623 * Zero the buffer so the user always
624 * gets back something deterministic.
625 */
626 memset(data, 0, size);
627 else if (com&IOC_VOID)
628 *(caddr_t *)data = SCARG(uap, data);
629
630 switch (com) {
631
632 case FIONBIO:
633 if (*(int *)data != 0)
634 fp->f_flag |= FNONBLOCK;
635 else
636 fp->f_flag &= ~FNONBLOCK;
637 error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, data, l);
638 break;
639
640 case FIOASYNC:
641 if (*(int *)data != 0)
642 fp->f_flag |= FASYNC;
643 else
644 fp->f_flag &= ~FASYNC;
645 error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, data, l);
646 break;
647
648 default:
649 error = (*fp->f_ops->fo_ioctl)(fp, com, data, l);
650 /*
651 * Copy any data to user, size was
652 * already set and checked above.
653 */
654 if (error == 0 && (com&IOC_OUT) && size) {
655 error = copyout(data, SCARG(uap, data), size);
656 #ifdef KTRACE
657 if (KTRPOINT(p, KTR_GENIO)) {
658 struct iovec iov;
659 iov.iov_base = SCARG(uap, data);
660 iov.iov_len = size;
661 ktrgenio(l, SCARG(uap, fd), UIO_READ, &iov,
662 size, error);
663 }
664 #endif
665 }
666 break;
667 }
668 if (memp)
669 free(memp, M_IOCTLOPS);
670 out:
671 FILE_UNUSE(fp, l);
672 switch (error) {
673 case -1:
674 printf("sys_ioctl: _IO%s%s('%c', %lu, %lu) returned -1: "
675 "pid=%d comm=%s\n",
676 (com & IOC_IN) ? "W" : "", (com & IOC_OUT) ? "R" : "",
677 (char)IOCGROUP(com), (com & 0xff), IOCPARM_LEN(com),
678 p->p_pid, p->p_comm);
679 /* FALLTHROUGH */
680 case EPASSTHROUGH:
681 error = ENOTTY;
682 /* FALLTHROUGH */
683 default:
684 return (error);
685 }
686 }
687
688 int selwait, nselcoll;
689
690 /*
691 * Select system call.
692 */
693 int
694 sys_pselect(struct lwp *l, void *v, register_t *retval)
695 {
696 struct sys_pselect_args /* {
697 syscallarg(int) nd;
698 syscallarg(fd_set *) in;
699 syscallarg(fd_set *) ou;
700 syscallarg(fd_set *) ex;
701 syscallarg(const struct timespec *) ts;
702 syscallarg(sigset_t *) mask;
703 } */ * const uap = v;
704 struct timespec ats;
705 struct timeval atv, *tv = NULL;
706 sigset_t amask, *mask = NULL;
707 int error;
708
709 if (SCARG(uap, ts)) {
710 error = copyin(SCARG(uap, ts), &ats, sizeof(ats));
711 if (error)
712 return error;
713 atv.tv_sec = ats.tv_sec;
714 atv.tv_usec = ats.tv_nsec / 1000;
715 tv = &atv;
716 }
717 if (SCARG(uap, mask) != NULL) {
718 error = copyin(SCARG(uap, mask), &amask, sizeof(amask));
719 if (error)
720 return error;
721 mask = &amask;
722 }
723
724 return selcommon(l, retval, SCARG(uap, nd), SCARG(uap, in),
725 SCARG(uap, ou), SCARG(uap, ex), tv, mask);
726 }
727
728 int
729 inittimeleft(struct timeval *tv, struct timeval *sleeptv)
730 {
731 if (itimerfix(tv))
732 return -1;
733 getmicrouptime(sleeptv);
734 return 0;
735 }
736
737 int
738 gettimeleft(struct timeval *tv, struct timeval *sleeptv)
739 {
740 /*
741 * We have to recalculate the timeout on every retry.
742 */
743 struct timeval slepttv;
744 /*
745 * reduce tv by elapsed time
746 * based on monotonic time scale
747 */
748 getmicrouptime(&slepttv);
749 timeradd(tv, sleeptv, tv);
750 timersub(tv, &slepttv, tv);
751 *sleeptv = slepttv;
752 return tvtohz(tv);
753 }
754
755 int
756 sys_select(struct lwp *l, void *v, register_t *retval)
757 {
758 struct sys_select_args /* {
759 syscallarg(int) nd;
760 syscallarg(fd_set *) in;
761 syscallarg(fd_set *) ou;
762 syscallarg(fd_set *) ex;
763 syscallarg(struct timeval *) tv;
764 } */ * const uap = v;
765 struct timeval atv, *tv = NULL;
766 int error;
767
768 if (SCARG(uap, tv)) {
769 error = copyin(SCARG(uap, tv), (caddr_t)&atv,
770 sizeof(atv));
771 if (error)
772 return error;
773 tv = &atv;
774 }
775
776 return selcommon(l, retval, SCARG(uap, nd), SCARG(uap, in),
777 SCARG(uap, ou), SCARG(uap, ex), tv, NULL);
778 }
779
780 int
781 selcommon(struct lwp *l, register_t *retval, int nd, fd_set *u_in,
782 fd_set *u_ou, fd_set *u_ex, struct timeval *tv, sigset_t *mask)
783 {
784 char smallbits[howmany(FD_SETSIZE, NFDBITS) *
785 sizeof(fd_mask) * 6];
786 struct proc * const p = l->l_proc;
787 caddr_t bits;
788 int s, ncoll, error, timo;
789 size_t ni;
790 sigset_t oldmask;
791 struct timeval sleeptv;
792
793 error = 0;
794 if (nd < 0)
795 return (EINVAL);
796 if (nd > p->p_fd->fd_nfiles) {
797 /* forgiving; slightly wrong */
798 nd = p->p_fd->fd_nfiles;
799 }
800 ni = howmany(nd, NFDBITS) * sizeof(fd_mask);
801 if (ni * 6 > sizeof(smallbits))
802 bits = malloc(ni * 6, M_TEMP, M_WAITOK);
803 else
804 bits = smallbits;
805
806 #define getbits(name, x) \
807 if (u_ ## name) { \
808 error = copyin(u_ ## name, bits + ni * x, ni); \
809 if (error) \
810 goto done; \
811 } else \
812 memset(bits + ni * x, 0, ni);
813 getbits(in, 0);
814 getbits(ou, 1);
815 getbits(ex, 2);
816 #undef getbits
817
818 timo = 0;
819 if (tv && inittimeleft(tv, &sleeptv) == -1) {
820 error = EINVAL;
821 goto done;
822 }
823
824 if (mask)
825 (void)sigprocmask1(p, SIG_SETMASK, mask, &oldmask);
826
827 retry:
828 ncoll = nselcoll;
829 l->l_flag |= L_SELECT;
830 error = selscan(l, (fd_mask *)(bits + ni * 0),
831 (fd_mask *)(bits + ni * 3), nd, retval);
832 if (error || *retval)
833 goto done;
834 if (tv && (timo = gettimeleft(tv, &sleeptv)) <= 0)
835 goto done;
836 s = splsched();
837 if ((l->l_flag & L_SELECT) == 0 || nselcoll != ncoll) {
838 splx(s);
839 goto retry;
840 }
841 l->l_flag &= ~L_SELECT;
842 error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "select", timo);
843 splx(s);
844 if (error == 0)
845 goto retry;
846 done:
847 if (mask)
848 (void)sigprocmask1(p, SIG_SETMASK, &oldmask, NULL);
849 l->l_flag &= ~L_SELECT;
850 /* select is not restarted after signals... */
851 if (error == ERESTART)
852 error = EINTR;
853 if (error == EWOULDBLOCK)
854 error = 0;
855 if (error == 0) {
856
857 #define putbits(name, x) \
858 if (u_ ## name) { \
859 error = copyout(bits + ni * x, u_ ## name, ni); \
860 if (error) \
861 goto out; \
862 }
863 putbits(in, 3);
864 putbits(ou, 4);
865 putbits(ex, 5);
866 #undef putbits
867 }
868 out:
869 if (ni * 6 > sizeof(smallbits))
870 free(bits, M_TEMP);
871 return (error);
872 }
873
874 int
875 selscan(struct lwp *l, fd_mask *ibitp, fd_mask *obitp, int nfd,
876 register_t *retval)
877 {
878 static const int flag[3] = { POLLRDNORM | POLLHUP | POLLERR,
879 POLLWRNORM | POLLHUP | POLLERR,
880 POLLRDBAND };
881 struct proc *p = l->l_proc;
882 struct filedesc *fdp;
883 int msk, i, j, fd, n;
884 fd_mask ibits, obits;
885 struct file *fp;
886
887 fdp = p->p_fd;
888 n = 0;
889 for (msk = 0; msk < 3; msk++) {
890 for (i = 0; i < nfd; i += NFDBITS) {
891 ibits = *ibitp++;
892 obits = 0;
893 while ((j = ffs(ibits)) && (fd = i + --j) < nfd) {
894 ibits &= ~(1 << j);
895 if ((fp = fd_getfile(fdp, fd)) == NULL)
896 return (EBADF);
897 FILE_USE(fp);
898 if ((*fp->f_ops->fo_poll)(fp, flag[msk], l)) {
899 obits |= (1 << j);
900 n++;
901 }
902 FILE_UNUSE(fp, l);
903 }
904 *obitp++ = obits;
905 }
906 }
907 *retval = n;
908 return (0);
909 }
910
911 /*
912 * Poll system call.
913 */
914 int
915 sys_poll(struct lwp *l, void *v, register_t *retval)
916 {
917 struct sys_poll_args /* {
918 syscallarg(struct pollfd *) fds;
919 syscallarg(u_int) nfds;
920 syscallarg(int) timeout;
921 } */ * const uap = v;
922 struct timeval atv, *tv = NULL;
923
924 if (SCARG(uap, timeout) != INFTIM) {
925 atv.tv_sec = SCARG(uap, timeout) / 1000;
926 atv.tv_usec = (SCARG(uap, timeout) % 1000) * 1000;
927 tv = &atv;
928 }
929
930 return pollcommon(l, retval, SCARG(uap, fds), SCARG(uap, nfds),
931 tv, NULL);
932 }
933
934 /*
935 * Poll system call.
936 */
937 int
938 sys_pollts(struct lwp *l, void *v, register_t *retval)
939 {
940 struct sys_pollts_args /* {
941 syscallarg(struct pollfd *) fds;
942 syscallarg(u_int) nfds;
943 syscallarg(const struct timespec *) ts;
944 syscallarg(const sigset_t *) mask;
945 } */ * const uap = v;
946 struct timespec ats;
947 struct timeval atv, *tv = NULL;
948 sigset_t amask, *mask = NULL;
949 int error;
950
951 if (SCARG(uap, ts)) {
952 error = copyin(SCARG(uap, ts), &ats, sizeof(ats));
953 if (error)
954 return error;
955 atv.tv_sec = ats.tv_sec;
956 atv.tv_usec = ats.tv_nsec / 1000;
957 tv = &atv;
958 }
959 if (SCARG(uap, mask)) {
960 error = copyin(SCARG(uap, mask), &amask, sizeof(amask));
961 if (error)
962 return error;
963 mask = &amask;
964 }
965
966 return pollcommon(l, retval, SCARG(uap, fds), SCARG(uap, nfds),
967 tv, mask);
968 }
969
970 int
971 pollcommon(struct lwp *l, register_t *retval,
972 struct pollfd *u_fds, u_int nfds,
973 struct timeval *tv, sigset_t *mask)
974 {
975 char smallbits[32 * sizeof(struct pollfd)];
976 struct proc * const p = l->l_proc;
977 caddr_t bits;
978 sigset_t oldmask;
979 int s, ncoll, error, timo;
980 size_t ni;
981 struct timeval sleeptv;
982
983 if (nfds > p->p_fd->fd_nfiles) {
984 /* forgiving; slightly wrong */
985 nfds = p->p_fd->fd_nfiles;
986 }
987 ni = nfds * sizeof(struct pollfd);
988 if (ni > sizeof(smallbits))
989 bits = malloc(ni, M_TEMP, M_WAITOK);
990 else
991 bits = smallbits;
992
993 error = copyin(u_fds, bits, ni);
994 if (error)
995 goto done;
996
997 timo = 0;
998 if (tv && inittimeleft(tv, &sleeptv) == -1) {
999 error = EINVAL;
1000 goto done;
1001 }
1002
1003 if (mask != NULL)
1004 (void)sigprocmask1(p, SIG_SETMASK, mask, &oldmask);
1005
1006 retry:
1007 ncoll = nselcoll;
1008 l->l_flag |= L_SELECT;
1009 error = pollscan(l, (struct pollfd *)bits, nfds, retval);
1010 if (error || *retval)
1011 goto done;
1012 if (tv && (timo = gettimeleft(tv, &sleeptv)) <= 0)
1013 goto done;
1014 s = splsched();
1015 if ((l->l_flag & L_SELECT) == 0 || nselcoll != ncoll) {
1016 splx(s);
1017 goto retry;
1018 }
1019 l->l_flag &= ~L_SELECT;
1020 error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "poll", timo);
1021 splx(s);
1022 if (error == 0)
1023 goto retry;
1024 done:
1025 if (mask != NULL)
1026 (void)sigprocmask1(p, SIG_SETMASK, &oldmask, NULL);
1027 l->l_flag &= ~L_SELECT;
1028 /* poll is not restarted after signals... */
1029 if (error == ERESTART)
1030 error = EINTR;
1031 if (error == EWOULDBLOCK)
1032 error = 0;
1033 if (error == 0) {
1034 error = copyout(bits, u_fds, ni);
1035 if (error)
1036 goto out;
1037 }
1038 out:
1039 if (ni > sizeof(smallbits))
1040 free(bits, M_TEMP);
1041 return (error);
1042 }
1043
1044 int
1045 pollscan(struct lwp *l, struct pollfd *fds, int nfd, register_t *retval)
1046 {
1047 struct proc *p = l->l_proc;
1048 struct filedesc *fdp;
1049 int i, n;
1050 struct file *fp;
1051
1052 fdp = p->p_fd;
1053 n = 0;
1054 for (i = 0; i < nfd; i++, fds++) {
1055 if (fds->fd >= fdp->fd_nfiles) {
1056 fds->revents = POLLNVAL;
1057 n++;
1058 } else if (fds->fd < 0) {
1059 fds->revents = 0;
1060 } else {
1061 if ((fp = fd_getfile(fdp, fds->fd)) == NULL) {
1062 fds->revents = POLLNVAL;
1063 n++;
1064 } else {
1065 FILE_USE(fp);
1066 fds->revents = (*fp->f_ops->fo_poll)(fp,
1067 fds->events | POLLERR | POLLHUP, l);
1068 if (fds->revents != 0)
1069 n++;
1070 FILE_UNUSE(fp, l);
1071 }
1072 }
1073 }
1074 *retval = n;
1075 return (0);
1076 }
1077
1078 /*ARGSUSED*/
1079 int
1080 seltrue(dev_t dev __unused, int events, struct lwp *l __unused)
1081 {
1082
1083 return (events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM));
1084 }
1085
1086 /*
1087 * Record a select request.
1088 */
1089 void
1090 selrecord(struct lwp *selector, struct selinfo *sip)
1091 {
1092 struct lwp *l;
1093 struct proc *p;
1094 pid_t mypid;
1095
1096 mypid = selector->l_proc->p_pid;
1097 if (sip->sel_pid == mypid)
1098 return;
1099 if (sip->sel_pid && (p = pfind(sip->sel_pid))) {
1100 LIST_FOREACH(l, &p->p_lwps, l_sibling) {
1101 if (l->l_wchan == (caddr_t)&selwait) {
1102 sip->sel_collision = 1;
1103 return;
1104 }
1105 }
1106 }
1107
1108 sip->sel_pid = mypid;
1109 }
1110
1111 /*
1112 * Do a wakeup when a selectable event occurs.
1113 */
1114 void
1115 selwakeup(sip)
1116 struct selinfo *sip;
1117 {
1118 struct lwp *l;
1119 struct proc *p;
1120 int s;
1121
1122 if (sip->sel_pid == 0)
1123 return;
1124 if (sip->sel_collision) {
1125 sip->sel_pid = 0;
1126 nselcoll++;
1127 sip->sel_collision = 0;
1128 wakeup((caddr_t)&selwait);
1129 return;
1130 }
1131 p = pfind(sip->sel_pid);
1132 sip->sel_pid = 0;
1133 if (p != NULL) {
1134 LIST_FOREACH(l, &p->p_lwps, l_sibling) {
1135 SCHED_LOCK(s);
1136 if (l->l_wchan == (caddr_t)&selwait) {
1137 if (l->l_stat == LSSLEEP)
1138 setrunnable(l);
1139 else
1140 unsleep(l);
1141 } else if (l->l_flag & L_SELECT)
1142 l->l_flag &= ~L_SELECT;
1143 SCHED_UNLOCK(s);
1144 }
1145 }
1146 }
1147