sys_generic.c revision 1.92.2.4 1 /* $NetBSD: sys_generic.c,v 1.92.2.4 2006/12/29 20:27:44 ad Exp $ */
2
3 /*
4 * Copyright (c) 1982, 1986, 1989, 1993
5 * The Regents of the University of California. All rights reserved.
6 * (c) UNIX System Laboratories, Inc.
7 * All or some portions of this file are derived from material licensed
8 * to the University of California by American Telephone and Telegraph
9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
10 * the permission of UNIX System Laboratories, Inc.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 *
36 * @(#)sys_generic.c 8.9 (Berkeley) 2/14/95
37 */
38
39 #include <sys/cdefs.h>
40 __KERNEL_RCSID(0, "$NetBSD: sys_generic.c,v 1.92.2.4 2006/12/29 20:27:44 ad Exp $");
41
42 #include "opt_ktrace.h"
43
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #include <sys/filedesc.h>
47 #include <sys/ioctl.h>
48 #include <sys/file.h>
49 #include <sys/proc.h>
50 #include <sys/socketvar.h>
51 #include <sys/signalvar.h>
52 #include <sys/uio.h>
53 #include <sys/kernel.h>
54 #include <sys/stat.h>
55 #include <sys/malloc.h>
56 #include <sys/poll.h>
57 #ifdef KTRACE
58 #include <sys/ktrace.h>
59 #endif
60
61 #include <sys/mount.h>
62 #include <sys/sa.h>
63 #include <sys/syscallargs.h>
64
65 #include <uvm/uvm_extern.h>
66
67 int selscan(struct lwp *, fd_mask *, fd_mask *, int, register_t *);
68 int pollscan(struct lwp *, struct pollfd *, int, register_t *);
69
70
71 /*
72 * Read system call.
73 */
74 /* ARGSUSED */
75 int
76 sys_read(struct lwp *l, void *v, register_t *retval)
77 {
78 struct sys_read_args /* {
79 syscallarg(int) fd;
80 syscallarg(void *) buf;
81 syscallarg(size_t) nbyte;
82 } */ *uap = v;
83 int fd;
84 struct file *fp;
85 struct proc *p;
86 struct filedesc *fdp;
87
88 fd = SCARG(uap, fd);
89 p = l->l_proc;
90 fdp = p->p_fd;
91
92 if ((fp = fd_getfile(fdp, fd)) == NULL)
93 return (EBADF);
94
95 if ((fp->f_flag & FREAD) == 0) {
96 simple_unlock(&fp->f_slock);
97 return (EBADF);
98 }
99
100 FILE_USE(fp);
101
102 /* dofileread() will unuse the descriptor for us */
103 return (dofileread(l, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
104 &fp->f_offset, FOF_UPDATE_OFFSET, retval));
105 }
106
107 int
108 dofileread(struct lwp *l, int fd, struct file *fp, void *buf, size_t nbyte,
109 off_t *offset, int flags, register_t *retval)
110 {
111 struct iovec aiov;
112 struct uio auio;
113 struct proc *p;
114 struct vmspace *vm;
115 size_t cnt;
116 int error;
117 #ifdef KTRACE
118 struct iovec ktriov = { .iov_base = NULL, };
119 #endif
120 p = l->l_proc;
121
122 error = proc_vmspace_getref(p, &vm);
123 if (error) {
124 goto out;
125 }
126
127 aiov.iov_base = (caddr_t)buf;
128 aiov.iov_len = nbyte;
129 auio.uio_iov = &aiov;
130 auio.uio_iovcnt = 1;
131 auio.uio_resid = nbyte;
132 auio.uio_rw = UIO_READ;
133 auio.uio_vmspace = vm;
134
135 /*
136 * Reads return ssize_t because -1 is returned on error. Therefore
137 * we must restrict the length to SSIZE_MAX to avoid garbage return
138 * values.
139 */
140 if (auio.uio_resid > SSIZE_MAX) {
141 error = EINVAL;
142 goto out;
143 }
144
145 #ifdef KTRACE
146 /*
147 * if tracing, save a copy of iovec
148 */
149 if (KTRPOINT(p, KTR_GENIO))
150 ktriov = aiov;
151 #endif
152 cnt = auio.uio_resid;
153 error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred, flags);
154 if (error)
155 if (auio.uio_resid != cnt && (error == ERESTART ||
156 error == EINTR || error == EWOULDBLOCK))
157 error = 0;
158 cnt -= auio.uio_resid;
159 #ifdef KTRACE
160 if (KTRPOINT(p, KTR_GENIO) && error == 0)
161 ktrgenio(l, fd, UIO_READ, &ktriov, cnt, error);
162 #endif
163 *retval = cnt;
164 out:
165 FILE_UNUSE(fp, l);
166 uvmspace_free(vm);
167 return (error);
168 }
169
170 /*
171 * Scatter read system call.
172 */
173 int
174 sys_readv(struct lwp *l, void *v, register_t *retval)
175 {
176 struct sys_readv_args /* {
177 syscallarg(int) fd;
178 syscallarg(const struct iovec *) iovp;
179 syscallarg(int) iovcnt;
180 } */ *uap = v;
181 struct filedesc *fdp;
182 struct file *fp;
183 struct proc *p;
184 int fd;
185
186 fd = SCARG(uap, fd);
187 p = l->l_proc;
188 fdp = p->p_fd;
189
190 if ((fp = fd_getfile(fdp, fd)) == NULL)
191 return (EBADF);
192
193 if ((fp->f_flag & FREAD) == 0) {
194 simple_unlock(&fp->f_slock);
195 return (EBADF);
196 }
197
198 FILE_USE(fp);
199
200 /* dofilereadv() will unuse the descriptor for us */
201 return (dofilereadv(l, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt),
202 &fp->f_offset, FOF_UPDATE_OFFSET, retval));
203 }
204
205 int
206 dofilereadv(struct lwp *l, int fd, struct file *fp, const struct iovec *iovp,
207 int iovcnt, off_t *offset, int flags, register_t *retval)
208 {
209 struct proc *p;
210 struct uio auio;
211 struct iovec *iov, *needfree, aiov[UIO_SMALLIOV];
212 struct vmspace *vm;
213 int i, error;
214 size_t cnt;
215 u_int iovlen;
216 #ifdef KTRACE
217 struct iovec *ktriov;
218 #endif
219
220 p = l->l_proc;
221 error = proc_vmspace_getref(p, &vm);
222 if (error) {
223 goto out;
224 }
225
226 #ifdef KTRACE
227 ktriov = NULL;
228 #endif
229 /* note: can't use iovlen until iovcnt is validated */
230 iovlen = iovcnt * sizeof(struct iovec);
231 if ((u_int)iovcnt > UIO_SMALLIOV) {
232 if ((u_int)iovcnt > IOV_MAX) {
233 error = EINVAL;
234 goto out;
235 }
236 iov = malloc(iovlen, M_IOV, M_WAITOK);
237 needfree = iov;
238 } else if ((u_int)iovcnt > 0) {
239 iov = aiov;
240 needfree = NULL;
241 } else {
242 error = EINVAL;
243 goto out;
244 }
245
246 auio.uio_iov = iov;
247 auio.uio_iovcnt = iovcnt;
248 auio.uio_rw = UIO_READ;
249 auio.uio_vmspace = vm;
250 error = copyin(iovp, iov, iovlen);
251 if (error)
252 goto done;
253 auio.uio_resid = 0;
254 for (i = 0; i < iovcnt; i++) {
255 auio.uio_resid += iov->iov_len;
256 /*
257 * Reads return ssize_t because -1 is returned on error.
258 * Therefore we must restrict the length to SSIZE_MAX to
259 * avoid garbage return values.
260 */
261 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) {
262 error = EINVAL;
263 goto done;
264 }
265 iov++;
266 }
267 #ifdef KTRACE
268 /*
269 * if tracing, save a copy of iovec
270 */
271 if (KTRPOINT(p, KTR_GENIO)) {
272 ktriov = malloc(iovlen, M_TEMP, M_WAITOK);
273 memcpy((caddr_t)ktriov, (caddr_t)auio.uio_iov, iovlen);
274 }
275 #endif
276 cnt = auio.uio_resid;
277 error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred, flags);
278 if (error)
279 if (auio.uio_resid != cnt && (error == ERESTART ||
280 error == EINTR || error == EWOULDBLOCK))
281 error = 0;
282 cnt -= auio.uio_resid;
283 #ifdef KTRACE
284 if (ktriov != NULL) {
285 if (KTRPOINT(p, KTR_GENIO) && (error == 0))
286 ktrgenio(l, fd, UIO_READ, ktriov, cnt, error);
287 free(ktriov, M_TEMP);
288 }
289 #endif
290 *retval = cnt;
291 done:
292 if (needfree)
293 free(needfree, M_IOV);
294 out:
295 FILE_UNUSE(fp, l);
296 uvmspace_free(vm);
297 return (error);
298 }
299
300 /*
301 * Write system call
302 */
303 int
304 sys_write(struct lwp *l, void *v, register_t *retval)
305 {
306 struct sys_write_args /* {
307 syscallarg(int) fd;
308 syscallarg(const void *) buf;
309 syscallarg(size_t) nbyte;
310 } */ *uap = v;
311 int fd;
312 struct file *fp;
313 struct proc *p;
314 struct filedesc *fdp;
315
316 fd = SCARG(uap, fd);
317 p = l->l_proc;
318 fdp = p->p_fd;
319
320 if ((fp = fd_getfile(fdp, fd)) == NULL)
321 return (EBADF);
322
323 if ((fp->f_flag & FWRITE) == 0) {
324 simple_unlock(&fp->f_slock);
325 return (EBADF);
326 }
327
328 FILE_USE(fp);
329
330 /* dofilewrite() will unuse the descriptor for us */
331 return (dofilewrite(l, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
332 &fp->f_offset, FOF_UPDATE_OFFSET, retval));
333 }
334
335 int
336 dofilewrite(struct lwp *l, int fd, struct file *fp, const void *buf,
337 size_t nbyte, off_t *offset, int flags, register_t *retval)
338 {
339 struct iovec aiov;
340 struct uio auio;
341 struct proc *p;
342 struct vmspace *vm;
343 size_t cnt;
344 int error;
345 #ifdef KTRACE
346 struct iovec ktriov = { .iov_base = NULL, };
347 #endif
348
349 p = l->l_proc;
350 error = proc_vmspace_getref(p, &vm);
351 if (error) {
352 goto out;
353 }
354 aiov.iov_base = __UNCONST(buf); /* XXXUNCONST kills const */
355 aiov.iov_len = nbyte;
356 auio.uio_iov = &aiov;
357 auio.uio_iovcnt = 1;
358 auio.uio_resid = nbyte;
359 auio.uio_rw = UIO_WRITE;
360 auio.uio_vmspace = vm;
361
362 /*
363 * Writes return ssize_t because -1 is returned on error. Therefore
364 * we must restrict the length to SSIZE_MAX to avoid garbage return
365 * values.
366 */
367 if (auio.uio_resid > SSIZE_MAX) {
368 error = EINVAL;
369 goto out;
370 }
371
372 #ifdef KTRACE
373 /*
374 * if tracing, save a copy of iovec
375 */
376 if (KTRPOINT(p, KTR_GENIO))
377 ktriov = aiov;
378 #endif
379 cnt = auio.uio_resid;
380 error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred, flags);
381 if (error) {
382 if (auio.uio_resid != cnt && (error == ERESTART ||
383 error == EINTR || error == EWOULDBLOCK))
384 error = 0;
385 if (error == EPIPE) {
386 mutex_enter(&proclist_mutex);
387 psignal(p, SIGPIPE);
388 mutex_exit(&proclist_mutex);
389 }
390 }
391 cnt -= auio.uio_resid;
392 #ifdef KTRACE
393 if (KTRPOINT(p, KTR_GENIO) && error == 0)
394 ktrgenio(l, fd, UIO_WRITE, &ktriov, cnt, error);
395 #endif
396 *retval = cnt;
397 out:
398 FILE_UNUSE(fp, l);
399 uvmspace_free(vm);
400 return (error);
401 }
402
403 /*
404 * Gather write system call
405 */
406 int
407 sys_writev(struct lwp *l, void *v, register_t *retval)
408 {
409 struct sys_writev_args /* {
410 syscallarg(int) fd;
411 syscallarg(const struct iovec *) iovp;
412 syscallarg(int) iovcnt;
413 } */ *uap = v;
414 int fd;
415 struct file *fp;
416 struct proc *p;
417 struct filedesc *fdp;
418
419 fd = SCARG(uap, fd);
420 p = l->l_proc;
421 fdp = p->p_fd;
422
423 if ((fp = fd_getfile(fdp, fd)) == NULL)
424 return (EBADF);
425
426 if ((fp->f_flag & FWRITE) == 0) {
427 simple_unlock(&fp->f_slock);
428 return (EBADF);
429 }
430
431 FILE_USE(fp);
432
433 /* dofilewritev() will unuse the descriptor for us */
434 return (dofilewritev(l, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt),
435 &fp->f_offset, FOF_UPDATE_OFFSET, retval));
436 }
437
438 int
439 dofilewritev(struct lwp *l, int fd, struct file *fp, const struct iovec *iovp,
440 int iovcnt, off_t *offset, int flags, register_t *retval)
441 {
442 struct proc *p;
443 struct uio auio;
444 struct iovec *iov, *needfree, aiov[UIO_SMALLIOV];
445 struct vmspace *vm;
446 int i, error;
447 size_t cnt;
448 u_int iovlen;
449 #ifdef KTRACE
450 struct iovec *ktriov;
451 #endif
452
453 p = l->l_proc;
454 error = proc_vmspace_getref(p, &vm);
455 if (error) {
456 goto out;
457 }
458 #ifdef KTRACE
459 ktriov = NULL;
460 #endif
461 /* note: can't use iovlen until iovcnt is validated */
462 iovlen = iovcnt * sizeof(struct iovec);
463 if ((u_int)iovcnt > UIO_SMALLIOV) {
464 if ((u_int)iovcnt > IOV_MAX) {
465 error = EINVAL;
466 goto out;
467 }
468 iov = malloc(iovlen, M_IOV, M_WAITOK);
469 needfree = iov;
470 } else if ((u_int)iovcnt > 0) {
471 iov = aiov;
472 needfree = NULL;
473 } else {
474 error = EINVAL;
475 goto out;
476 }
477
478 auio.uio_iov = iov;
479 auio.uio_iovcnt = iovcnt;
480 auio.uio_rw = UIO_WRITE;
481 auio.uio_vmspace = vm;
482 error = copyin(iovp, iov, iovlen);
483 if (error)
484 goto done;
485 auio.uio_resid = 0;
486 for (i = 0; i < iovcnt; i++) {
487 auio.uio_resid += iov->iov_len;
488 /*
489 * Writes return ssize_t because -1 is returned on error.
490 * Therefore we must restrict the length to SSIZE_MAX to
491 * avoid garbage return values.
492 */
493 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) {
494 error = EINVAL;
495 goto done;
496 }
497 iov++;
498 }
499 #ifdef KTRACE
500 /*
501 * if tracing, save a copy of iovec
502 */
503 if (KTRPOINT(p, KTR_GENIO)) {
504 ktriov = malloc(iovlen, M_TEMP, M_WAITOK);
505 memcpy((caddr_t)ktriov, (caddr_t)auio.uio_iov, iovlen);
506 }
507 #endif
508 cnt = auio.uio_resid;
509 error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred, flags);
510 if (error) {
511 if (auio.uio_resid != cnt && (error == ERESTART ||
512 error == EINTR || error == EWOULDBLOCK))
513 error = 0;
514 if (error == EPIPE) {
515 mutex_enter(&proclist_mutex);
516 psignal(p, SIGPIPE);
517 mutex_exit(&proclist_mutex);
518 }
519 }
520 cnt -= auio.uio_resid;
521 #ifdef KTRACE
522 if (ktriov != NULL) {
523 if (KTRPOINT(p, KTR_GENIO) && (error == 0))
524 ktrgenio(l, fd, UIO_WRITE, ktriov, cnt, error);
525 free(ktriov, M_TEMP);
526 }
527 #endif
528 *retval = cnt;
529 done:
530 if (needfree)
531 free(needfree, M_IOV);
532 out:
533 FILE_UNUSE(fp, l);
534 uvmspace_free(vm);
535 return (error);
536 }
537
538 /*
539 * Ioctl system call
540 */
541 /* ARGSUSED */
542 int
543 sys_ioctl(struct lwp *l, void *v, register_t *retval)
544 {
545 struct sys_ioctl_args /* {
546 syscallarg(int) fd;
547 syscallarg(u_long) com;
548 syscallarg(caddr_t) data;
549 } */ *uap = v;
550 struct file *fp;
551 struct proc *p;
552 struct filedesc *fdp;
553 u_long com;
554 int error;
555 u_int size;
556 caddr_t data, memp;
557 #define STK_PARAMS 128
558 u_long stkbuf[STK_PARAMS/sizeof(u_long)];
559
560 error = 0;
561 p = l->l_proc;
562 fdp = p->p_fd;
563
564 if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL)
565 return (EBADF);
566
567 FILE_USE(fp);
568
569 if ((fp->f_flag & (FREAD | FWRITE)) == 0) {
570 error = EBADF;
571 com = 0;
572 goto out;
573 }
574
575 switch (com = SCARG(uap, com)) {
576 case FIONCLEX:
577 fdp->fd_ofileflags[SCARG(uap, fd)] &= ~UF_EXCLOSE;
578 goto out;
579
580 case FIOCLEX:
581 fdp->fd_ofileflags[SCARG(uap, fd)] |= UF_EXCLOSE;
582 goto out;
583 }
584
585 /*
586 * Interpret high order word to find amount of data to be
587 * copied to/from the user's address space.
588 */
589 size = IOCPARM_LEN(com);
590 if (size > IOCPARM_MAX) {
591 error = ENOTTY;
592 goto out;
593 }
594 memp = NULL;
595 if (size > sizeof(stkbuf)) {
596 memp = (caddr_t)malloc((u_long)size, M_IOCTLOPS, M_WAITOK);
597 data = memp;
598 } else
599 data = (caddr_t)stkbuf;
600 if (com&IOC_IN) {
601 if (size) {
602 error = copyin(SCARG(uap, data), data, size);
603 if (error) {
604 if (memp)
605 free(memp, M_IOCTLOPS);
606 goto out;
607 }
608 #ifdef KTRACE
609 if (KTRPOINT(p, KTR_GENIO)) {
610 struct iovec iov;
611 iov.iov_base = SCARG(uap, data);
612 iov.iov_len = size;
613 ktrgenio(l, SCARG(uap, fd), UIO_WRITE, &iov,
614 size, 0);
615 }
616 #endif
617 } else
618 *(caddr_t *)data = SCARG(uap, data);
619 } else if ((com&IOC_OUT) && size)
620 /*
621 * Zero the buffer so the user always
622 * gets back something deterministic.
623 */
624 memset(data, 0, size);
625 else if (com&IOC_VOID)
626 *(caddr_t *)data = SCARG(uap, data);
627
628 switch (com) {
629
630 case FIONBIO:
631 if (*(int *)data != 0)
632 fp->f_flag |= FNONBLOCK;
633 else
634 fp->f_flag &= ~FNONBLOCK;
635 error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, data, l);
636 break;
637
638 case FIOASYNC:
639 if (*(int *)data != 0)
640 fp->f_flag |= FASYNC;
641 else
642 fp->f_flag &= ~FASYNC;
643 error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, data, l);
644 break;
645
646 default:
647 error = (*fp->f_ops->fo_ioctl)(fp, com, data, l);
648 /*
649 * Copy any data to user, size was
650 * already set and checked above.
651 */
652 if (error == 0 && (com&IOC_OUT) && size) {
653 error = copyout(data, SCARG(uap, data), size);
654 #ifdef KTRACE
655 if (KTRPOINT(p, KTR_GENIO)) {
656 struct iovec iov;
657 iov.iov_base = SCARG(uap, data);
658 iov.iov_len = size;
659 ktrgenio(l, SCARG(uap, fd), UIO_READ, &iov,
660 size, error);
661 }
662 #endif
663 }
664 break;
665 }
666 if (memp)
667 free(memp, M_IOCTLOPS);
668 out:
669 FILE_UNUSE(fp, l);
670 switch (error) {
671 case -1:
672 printf("sys_ioctl: _IO%s%s('%c', %lu, %lu) returned -1: "
673 "pid=%d comm=%s\n",
674 (com & IOC_IN) ? "W" : "", (com & IOC_OUT) ? "R" : "",
675 (char)IOCGROUP(com), (com & 0xff), IOCPARM_LEN(com),
676 p->p_pid, p->p_comm);
677 /* FALLTHROUGH */
678 case EPASSTHROUGH:
679 error = ENOTTY;
680 /* FALLTHROUGH */
681 default:
682 return (error);
683 }
684 }
685
686 int selwait, nselcoll;
687
688 /*
689 * Select system call.
690 */
691 int
692 sys_pselect(struct lwp *l, void *v, register_t *retval)
693 {
694 struct sys_pselect_args /* {
695 syscallarg(int) nd;
696 syscallarg(fd_set *) in;
697 syscallarg(fd_set *) ou;
698 syscallarg(fd_set *) ex;
699 syscallarg(const struct timespec *) ts;
700 syscallarg(sigset_t *) mask;
701 } */ * const uap = v;
702 struct timespec ats;
703 struct timeval atv, *tv = NULL;
704 sigset_t amask, *mask = NULL;
705 int error;
706
707 if (SCARG(uap, ts)) {
708 error = copyin(SCARG(uap, ts), &ats, sizeof(ats));
709 if (error)
710 return error;
711 atv.tv_sec = ats.tv_sec;
712 atv.tv_usec = ats.tv_nsec / 1000;
713 tv = &atv;
714 }
715 if (SCARG(uap, mask) != NULL) {
716 error = copyin(SCARG(uap, mask), &amask, sizeof(amask));
717 if (error)
718 return error;
719 mask = &amask;
720 }
721
722 return selcommon(l, retval, SCARG(uap, nd), SCARG(uap, in),
723 SCARG(uap, ou), SCARG(uap, ex), tv, mask);
724 }
725
726 int
727 inittimeleft(struct timeval *tv, struct timeval *sleeptv)
728 {
729 if (itimerfix(tv))
730 return -1;
731 getmicrouptime(sleeptv);
732 return 0;
733 }
734
735 int
736 gettimeleft(struct timeval *tv, struct timeval *sleeptv)
737 {
738 /*
739 * We have to recalculate the timeout on every retry.
740 */
741 struct timeval slepttv;
742 /*
743 * reduce tv by elapsed time
744 * based on monotonic time scale
745 */
746 getmicrouptime(&slepttv);
747 timeradd(tv, sleeptv, tv);
748 timersub(tv, &slepttv, tv);
749 *sleeptv = slepttv;
750 return tvtohz(tv);
751 }
752
753 int
754 sys_select(struct lwp *l, void *v, register_t *retval)
755 {
756 struct sys_select_args /* {
757 syscallarg(int) nd;
758 syscallarg(fd_set *) in;
759 syscallarg(fd_set *) ou;
760 syscallarg(fd_set *) ex;
761 syscallarg(struct timeval *) tv;
762 } */ * const uap = v;
763 struct timeval atv, *tv = NULL;
764 int error;
765
766 if (SCARG(uap, tv)) {
767 error = copyin(SCARG(uap, tv), (caddr_t)&atv,
768 sizeof(atv));
769 if (error)
770 return error;
771 tv = &atv;
772 }
773
774 return selcommon(l, retval, SCARG(uap, nd), SCARG(uap, in),
775 SCARG(uap, ou), SCARG(uap, ex), tv, NULL);
776 }
777
778 int
779 selcommon(struct lwp *l, register_t *retval, int nd, fd_set *u_in,
780 fd_set *u_ou, fd_set *u_ex, struct timeval *tv, sigset_t *mask)
781 {
782 char smallbits[howmany(FD_SETSIZE, NFDBITS) *
783 sizeof(fd_mask) * 6];
784 struct proc * const p = l->l_proc;
785 caddr_t bits;
786 int s, ncoll, error, timo;
787 size_t ni;
788 sigset_t *oldmask = NULL; /* XXXgcc */
789 struct timeval sleeptv;
790
791 error = 0;
792 if (nd < 0)
793 return (EINVAL);
794 if (nd > p->p_fd->fd_nfiles) {
795 /* forgiving; slightly wrong */
796 nd = p->p_fd->fd_nfiles;
797 }
798 ni = howmany(nd, NFDBITS) * sizeof(fd_mask);
799 if (ni * 6 > sizeof(smallbits))
800 bits = malloc(ni * 6, M_TEMP, M_WAITOK);
801 else
802 bits = smallbits;
803
804 #define getbits(name, x) \
805 if (u_ ## name) { \
806 error = copyin(u_ ## name, bits + ni * x, ni); \
807 if (error) \
808 goto done; \
809 } else \
810 memset(bits + ni * x, 0, ni);
811 getbits(in, 0);
812 getbits(ou, 1);
813 getbits(ex, 2);
814 #undef getbits
815
816 timo = 0;
817 if (tv && inittimeleft(tv, &sleeptv) == -1) {
818 error = EINVAL;
819 goto done;
820 }
821
822 if (mask) {
823 sigminusset(&sigcantmask, mask);
824 mutex_enter(&p->p_smutex);
825 l->l_sigoldmask = *mask;
826 oldmask = l->l_sigmask;
827 l->l_sigmask = &l->l_sigoldmask;
828 mutex_exit(&p->p_smutex);
829 }
830
831 retry:
832 ncoll = nselcoll;
833 l->l_flag |= L_SELECT;
834 error = selscan(l, (fd_mask *)(bits + ni * 0),
835 (fd_mask *)(bits + ni * 3), nd, retval);
836 if (error || *retval)
837 goto donemask;
838 if (tv && (timo = gettimeleft(tv, &sleeptv)) <= 0)
839 goto donemask;
840 s = splsched();
841 if ((l->l_flag & L_SELECT) == 0 || nselcoll != ncoll) {
842 splx(s);
843 goto retry;
844 }
845 l->l_flag &= ~L_SELECT;
846 error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "select", timo);
847 splx(s);
848 if (error == 0)
849 goto retry;
850 donemask:
851 if (mask) {
852 mutex_enter(&p->p_smutex);
853 l->l_sigmask = oldmask;
854 mutex_exit(&p->p_smutex);
855 }
856 l->l_flag &= ~L_SELECT;
857 done:
858 /* select is not restarted after signals... */
859 if (error == ERESTART)
860 error = EINTR;
861 if (error == EWOULDBLOCK)
862 error = 0;
863 if (error == 0) {
864
865 #define putbits(name, x) \
866 if (u_ ## name) { \
867 error = copyout(bits + ni * x, u_ ## name, ni); \
868 if (error) \
869 goto out; \
870 }
871 putbits(in, 3);
872 putbits(ou, 4);
873 putbits(ex, 5);
874 #undef putbits
875 }
876 out:
877 if (ni * 6 > sizeof(smallbits))
878 free(bits, M_TEMP);
879 return (error);
880 }
881
882 int
883 selscan(struct lwp *l, fd_mask *ibitp, fd_mask *obitp, int nfd,
884 register_t *retval)
885 {
886 static const int flag[3] = { POLLRDNORM | POLLHUP | POLLERR,
887 POLLWRNORM | POLLHUP | POLLERR,
888 POLLRDBAND };
889 struct proc *p = l->l_proc;
890 struct filedesc *fdp;
891 int msk, i, j, fd, n;
892 fd_mask ibits, obits;
893 struct file *fp;
894
895 fdp = p->p_fd;
896 n = 0;
897 for (msk = 0; msk < 3; msk++) {
898 for (i = 0; i < nfd; i += NFDBITS) {
899 ibits = *ibitp++;
900 obits = 0;
901 while ((j = ffs(ibits)) && (fd = i + --j) < nfd) {
902 ibits &= ~(1 << j);
903 if ((fp = fd_getfile(fdp, fd)) == NULL)
904 return (EBADF);
905 FILE_USE(fp);
906 if ((*fp->f_ops->fo_poll)(fp, flag[msk], l)) {
907 obits |= (1 << j);
908 n++;
909 }
910 FILE_UNUSE(fp, l);
911 }
912 *obitp++ = obits;
913 }
914 }
915 *retval = n;
916 return (0);
917 }
918
919 /*
920 * Poll system call.
921 */
922 int
923 sys_poll(struct lwp *l, void *v, register_t *retval)
924 {
925 struct sys_poll_args /* {
926 syscallarg(struct pollfd *) fds;
927 syscallarg(u_int) nfds;
928 syscallarg(int) timeout;
929 } */ * const uap = v;
930 struct timeval atv, *tv = NULL;
931
932 if (SCARG(uap, timeout) != INFTIM) {
933 atv.tv_sec = SCARG(uap, timeout) / 1000;
934 atv.tv_usec = (SCARG(uap, timeout) % 1000) * 1000;
935 tv = &atv;
936 }
937
938 return pollcommon(l, retval, SCARG(uap, fds), SCARG(uap, nfds),
939 tv, NULL);
940 }
941
942 /*
943 * Poll system call.
944 */
945 int
946 sys_pollts(struct lwp *l, void *v, register_t *retval)
947 {
948 struct sys_pollts_args /* {
949 syscallarg(struct pollfd *) fds;
950 syscallarg(u_int) nfds;
951 syscallarg(const struct timespec *) ts;
952 syscallarg(const sigset_t *) mask;
953 } */ * const uap = v;
954 struct timespec ats;
955 struct timeval atv, *tv = NULL;
956 sigset_t amask, *mask = NULL;
957 int error;
958
959 if (SCARG(uap, ts)) {
960 error = copyin(SCARG(uap, ts), &ats, sizeof(ats));
961 if (error)
962 return error;
963 atv.tv_sec = ats.tv_sec;
964 atv.tv_usec = ats.tv_nsec / 1000;
965 tv = &atv;
966 }
967 if (SCARG(uap, mask)) {
968 error = copyin(SCARG(uap, mask), &amask, sizeof(amask));
969 if (error)
970 return error;
971 mask = &amask;
972 }
973
974 return pollcommon(l, retval, SCARG(uap, fds), SCARG(uap, nfds),
975 tv, mask);
976 }
977
978 int
979 pollcommon(struct lwp *l, register_t *retval,
980 struct pollfd *u_fds, u_int nfds,
981 struct timeval *tv, sigset_t *mask)
982 {
983 char smallbits[32 * sizeof(struct pollfd)];
984 struct proc * const p = l->l_proc;
985 caddr_t bits;
986 sigset_t *oldmask = NULL; /* XXXgcc */
987 int s, ncoll, error, timo;
988 size_t ni;
989 struct timeval sleeptv;
990
991 if (nfds > p->p_fd->fd_nfiles) {
992 /* forgiving; slightly wrong */
993 nfds = p->p_fd->fd_nfiles;
994 }
995 ni = nfds * sizeof(struct pollfd);
996 if (ni > sizeof(smallbits))
997 bits = malloc(ni, M_TEMP, M_WAITOK);
998 else
999 bits = smallbits;
1000
1001 error = copyin(u_fds, bits, ni);
1002 if (error)
1003 goto done;
1004
1005 timo = 0;
1006 if (tv && inittimeleft(tv, &sleeptv) == -1) {
1007 error = EINVAL;
1008 goto done;
1009 }
1010
1011 if (mask) {
1012 sigminusset(&sigcantmask, mask);
1013 mutex_enter(&p->p_smutex);
1014 l->l_sigoldmask = *mask;
1015 oldmask = l->l_sigmask;
1016 l->l_sigmask = &l->l_sigoldmask;
1017 mutex_exit(&p->p_smutex);
1018 }
1019
1020 retry:
1021 ncoll = nselcoll;
1022 l->l_flag |= L_SELECT;
1023 error = pollscan(l, (struct pollfd *)bits, nfds, retval);
1024 if (error || *retval)
1025 goto donemask;
1026 if (tv && (timo = gettimeleft(tv, &sleeptv)) <= 0)
1027 goto donemask;
1028 s = splsched();
1029 if ((l->l_flag & L_SELECT) == 0 || nselcoll != ncoll) {
1030 splx(s);
1031 goto retry;
1032 }
1033 l->l_flag &= ~L_SELECT;
1034 error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "poll", timo);
1035 splx(s);
1036 if (error == 0)
1037 goto retry;
1038 donemask:
1039 if (mask) {
1040 mutex_enter(&p->p_smutex);
1041 l->l_sigmask = oldmask;
1042 mutex_exit(&p->p_smutex);
1043 }
1044
1045 l->l_flag &= ~L_SELECT;
1046 done:
1047 /* poll is not restarted after signals... */
1048 if (error == ERESTART)
1049 error = EINTR;
1050 if (error == EWOULDBLOCK)
1051 error = 0;
1052 if (error == 0) {
1053 error = copyout(bits, u_fds, ni);
1054 if (error)
1055 goto out;
1056 }
1057 out:
1058 if (ni > sizeof(smallbits))
1059 free(bits, M_TEMP);
1060 return (error);
1061 }
1062
1063 int
1064 pollscan(struct lwp *l, struct pollfd *fds, int nfd, register_t *retval)
1065 {
1066 struct proc *p = l->l_proc;
1067 struct filedesc *fdp;
1068 int i, n;
1069 struct file *fp;
1070
1071 fdp = p->p_fd;
1072 n = 0;
1073 for (i = 0; i < nfd; i++, fds++) {
1074 if (fds->fd >= fdp->fd_nfiles) {
1075 fds->revents = POLLNVAL;
1076 n++;
1077 } else if (fds->fd < 0) {
1078 fds->revents = 0;
1079 } else {
1080 if ((fp = fd_getfile(fdp, fds->fd)) == NULL) {
1081 fds->revents = POLLNVAL;
1082 n++;
1083 } else {
1084 FILE_USE(fp);
1085 fds->revents = (*fp->f_ops->fo_poll)(fp,
1086 fds->events | POLLERR | POLLHUP, l);
1087 if (fds->revents != 0)
1088 n++;
1089 FILE_UNUSE(fp, l);
1090 }
1091 }
1092 }
1093 *retval = n;
1094 return (0);
1095 }
1096
1097 /*ARGSUSED*/
1098 int
1099 seltrue(dev_t dev, int events, struct lwp *l)
1100 {
1101
1102 return (events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM));
1103 }
1104
1105 /*
1106 * Record a select request.
1107 */
1108 void
1109 selrecord(struct lwp *selector, struct selinfo *sip)
1110 {
1111 struct lwp *l;
1112 struct proc *p;
1113 pid_t mypid;
1114
1115 mypid = selector->l_proc->p_pid;
1116 if (sip->sel_pid == mypid)
1117 return;
1118
1119 mutex_enter(&proclist_mutex);
1120 if (sip->sel_pid && (p = p_find(sip->sel_pid, PFIND_LOCKED))) {
1121 mutex_enter(&p->p_smutex);
1122 LIST_FOREACH(l, &p->p_lwps, l_sibling) {
1123 lwp_lock(l);
1124 if (l->l_wchan == (caddr_t)&selwait &&
1125 l->l_stat == LSSLEEP) {
1126 sip->sel_collision = 1;
1127 lwp_unlock(l);
1128 break;
1129 }
1130 lwp_unlock(l);
1131 }
1132 mutex_exit(&p->p_smutex);
1133 }
1134 mutex_exit(&proclist_mutex);
1135
1136 if (!sip->sel_collision)
1137 sip->sel_pid = mypid;
1138 }
1139
1140 /*
1141 * Do a wakeup when a selectable event occurs.
1142 */
1143 void
1144 selwakeup(sip)
1145 struct selinfo *sip;
1146 {
1147 struct lwp *l;
1148 struct proc *p;
1149
1150 if (sip->sel_pid == 0)
1151 return;
1152 if (sip->sel_collision) {
1153 sip->sel_pid = 0;
1154 nselcoll++;
1155 sip->sel_collision = 0;
1156 wakeup((caddr_t)&selwait);
1157 return;
1158 }
1159
1160 /*
1161 * We must use the proclist_mutex as we can be called from an
1162 * interrupt context.
1163 */
1164 mutex_enter(&proclist_mutex);
1165 p = p_find(sip->sel_pid, PFIND_LOCKED);
1166 sip->sel_pid = 0;
1167 if (p == NULL) {
1168 mutex_exit(&proclist_mutex);
1169 return;
1170 }
1171
1172 mutex_enter(&p->p_smutex);
1173 LIST_FOREACH(l, &p->p_lwps, l_sibling) {
1174 lwp_lock(l);
1175 if (l->l_wchan == (wchan_t)&selwait && l->l_stat == LSSLEEP) {
1176 /* setrunnable() will release the lock. */
1177 setrunnable(l);
1178 } else {
1179 if (l->l_flag & L_SELECT)
1180 l->l_flag &= ~L_SELECT;
1181 lwp_unlock(l);
1182 }
1183 }
1184 mutex_exit(&p->p_smutex);
1185 mutex_exit(&proclist_mutex);
1186 }
1187
1188 int
1189 sys_sched_yield(struct lwp *l, void *v, register_t *retval)
1190 {
1191
1192 preempt(0);
1193 return 0;
1194 }
1195