sys_generic.c revision 1.101 1 /* $NetBSD: sys_generic.c,v 1.101 2007/06/02 13:38:31 dsl Exp $ */
2
3 /*
4 * Copyright (c) 1982, 1986, 1989, 1993
5 * The Regents of the University of California. All rights reserved.
6 * (c) UNIX System Laboratories, Inc.
7 * All or some portions of this file are derived from material licensed
8 * to the University of California by American Telephone and Telegraph
9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
10 * the permission of UNIX System Laboratories, Inc.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 *
36 * @(#)sys_generic.c 8.9 (Berkeley) 2/14/95
37 */
38
39 #include <sys/cdefs.h>
40 __KERNEL_RCSID(0, "$NetBSD: sys_generic.c,v 1.101 2007/06/02 13:38:31 dsl Exp $");
41
42 #include "opt_ktrace.h"
43
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #include <sys/filedesc.h>
47 #include <sys/ioctl.h>
48 #include <sys/file.h>
49 #include <sys/proc.h>
50 #include <sys/socketvar.h>
51 #include <sys/signalvar.h>
52 #include <sys/uio.h>
53 #include <sys/kernel.h>
54 #include <sys/stat.h>
55 #include <sys/malloc.h>
56 #include <sys/poll.h>
57 #ifdef KTRACE
58 #include <sys/ktrace.h>
59 #endif
60
61 #include <sys/mount.h>
62 #include <sys/syscallargs.h>
63
64 #include <uvm/uvm_extern.h>
65
66 int selscan(struct lwp *, fd_mask *, fd_mask *, int, register_t *);
67 int pollscan(struct lwp *, struct pollfd *, int, register_t *);
68
69
70 /*
71 * Read system call.
72 */
73 /* ARGSUSED */
74 int
75 sys_read(struct lwp *l, void *v, register_t *retval)
76 {
77 struct sys_read_args /* {
78 syscallarg(int) fd;
79 syscallarg(void *) buf;
80 syscallarg(size_t) nbyte;
81 } */ *uap = v;
82 int fd;
83 struct file *fp;
84 struct proc *p;
85 struct filedesc *fdp;
86
87 fd = SCARG(uap, fd);
88 p = l->l_proc;
89 fdp = p->p_fd;
90
91 if ((fp = fd_getfile(fdp, fd)) == NULL)
92 return (EBADF);
93
94 if ((fp->f_flag & FREAD) == 0) {
95 simple_unlock(&fp->f_slock);
96 return (EBADF);
97 }
98
99 FILE_USE(fp);
100
101 /* dofileread() will unuse the descriptor for us */
102 return (dofileread(l, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
103 &fp->f_offset, FOF_UPDATE_OFFSET, retval));
104 }
105
106 int
107 dofileread(struct lwp *l, int fd, struct file *fp, void *buf, size_t nbyte,
108 off_t *offset, int flags, register_t *retval)
109 {
110 struct iovec aiov;
111 struct uio auio;
112 struct proc *p;
113 struct vmspace *vm;
114 size_t cnt;
115 int error;
116 #ifdef KTRACE
117 struct iovec ktriov;
118 #endif
119 p = l->l_proc;
120
121 error = proc_vmspace_getref(p, &vm);
122 if (error) {
123 goto out;
124 }
125
126 aiov.iov_base = (void *)buf;
127 aiov.iov_len = nbyte;
128 auio.uio_iov = &aiov;
129 auio.uio_iovcnt = 1;
130 auio.uio_resid = nbyte;
131 auio.uio_rw = UIO_READ;
132 auio.uio_vmspace = vm;
133
134 /*
135 * Reads return ssize_t because -1 is returned on error. Therefore
136 * we must restrict the length to SSIZE_MAX to avoid garbage return
137 * values.
138 */
139 if (auio.uio_resid > SSIZE_MAX) {
140 error = EINVAL;
141 goto out;
142 }
143
144 #ifdef KTRACE
145 /* In case we are tracing, save a copy of iovec */
146 ktriov = aiov;
147 #endif
148 cnt = auio.uio_resid;
149 error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred, flags);
150 if (error)
151 if (auio.uio_resid != cnt && (error == ERESTART ||
152 error == EINTR || error == EWOULDBLOCK))
153 error = 0;
154 cnt -= auio.uio_resid;
155 #ifdef KTRACE
156 if (KTRPOINT(p, KTR_GENIO) && error == 0)
157 ktrgenio(l, fd, UIO_READ, &ktriov, cnt, error);
158 #endif
159 *retval = cnt;
160 out:
161 FILE_UNUSE(fp, l);
162 uvmspace_free(vm);
163 return (error);
164 }
165
166 /*
167 * Scatter read system call.
168 */
169 int
170 sys_readv(struct lwp *l, void *v, register_t *retval)
171 {
172 struct sys_readv_args /* {
173 syscallarg(int) fd;
174 syscallarg(const struct iovec *) iovp;
175 syscallarg(int) iovcnt;
176 } */ *uap = v;
177 struct filedesc *fdp;
178 struct file *fp;
179 struct proc *p;
180 int fd;
181
182 fd = SCARG(uap, fd);
183 p = l->l_proc;
184 fdp = p->p_fd;
185
186 if ((fp = fd_getfile(fdp, fd)) == NULL)
187 return (EBADF);
188
189 if ((fp->f_flag & FREAD) == 0) {
190 simple_unlock(&fp->f_slock);
191 return (EBADF);
192 }
193
194 FILE_USE(fp);
195
196 /* dofilereadv() will unuse the descriptor for us */
197 return (dofilereadv(l, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt),
198 &fp->f_offset, FOF_UPDATE_OFFSET, retval));
199 }
200
201 int
202 dofilereadv(struct lwp *l, int fd, struct file *fp, const struct iovec *iovp,
203 int iovcnt, off_t *offset, int flags, register_t *retval)
204 {
205 struct proc *p;
206 struct uio auio;
207 struct iovec *iov, *needfree, aiov[UIO_SMALLIOV];
208 struct vmspace *vm;
209 int i, error;
210 size_t cnt;
211 u_int iovlen;
212 #ifdef KTRACE
213 struct iovec *ktriov;
214 #endif
215
216 p = l->l_proc;
217 error = proc_vmspace_getref(p, &vm);
218 if (error) {
219 goto out;
220 }
221
222 #ifdef KTRACE
223 ktriov = NULL;
224 #endif
225 /* note: can't use iovlen until iovcnt is validated */
226 iovlen = iovcnt * sizeof(struct iovec);
227 if ((u_int)iovcnt > UIO_SMALLIOV) {
228 if ((u_int)iovcnt > IOV_MAX) {
229 error = EINVAL;
230 goto out;
231 }
232 iov = malloc(iovlen, M_IOV, M_WAITOK);
233 needfree = iov;
234 } else if ((u_int)iovcnt > 0) {
235 iov = aiov;
236 needfree = NULL;
237 } else {
238 error = EINVAL;
239 goto out;
240 }
241
242 auio.uio_iov = iov;
243 auio.uio_iovcnt = iovcnt;
244 auio.uio_rw = UIO_READ;
245 auio.uio_vmspace = vm;
246 error = copyin(iovp, iov, iovlen);
247 if (error)
248 goto done;
249 auio.uio_resid = 0;
250 for (i = 0; i < iovcnt; i++) {
251 auio.uio_resid += iov->iov_len;
252 /*
253 * Reads return ssize_t because -1 is returned on error.
254 * Therefore we must restrict the length to SSIZE_MAX to
255 * avoid garbage return values.
256 */
257 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) {
258 error = EINVAL;
259 goto done;
260 }
261 iov++;
262 }
263 #ifdef KTRACE
264 /*
265 * if tracing, save a copy of iovec
266 */
267 if (KTRPOINT(p, KTR_GENIO)) {
268 ktriov = malloc(iovlen, M_TEMP, M_WAITOK);
269 memcpy((void *)ktriov, (void *)auio.uio_iov, iovlen);
270 }
271 #endif
272 cnt = auio.uio_resid;
273 error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred, flags);
274 if (error)
275 if (auio.uio_resid != cnt && (error == ERESTART ||
276 error == EINTR || error == EWOULDBLOCK))
277 error = 0;
278 cnt -= auio.uio_resid;
279 #ifdef KTRACE
280 if (ktriov != NULL) {
281 if (KTRPOINT(p, KTR_GENIO) && (error == 0))
282 ktrgenio(l, fd, UIO_READ, ktriov, cnt, error);
283 free(ktriov, M_TEMP);
284 }
285 #endif
286 *retval = cnt;
287 done:
288 if (needfree)
289 free(needfree, M_IOV);
290 out:
291 FILE_UNUSE(fp, l);
292 uvmspace_free(vm);
293 return (error);
294 }
295
296 /*
297 * Write system call
298 */
299 int
300 sys_write(struct lwp *l, void *v, register_t *retval)
301 {
302 struct sys_write_args /* {
303 syscallarg(int) fd;
304 syscallarg(const void *) buf;
305 syscallarg(size_t) nbyte;
306 } */ *uap = v;
307 int fd;
308 struct file *fp;
309 struct proc *p;
310 struct filedesc *fdp;
311
312 fd = SCARG(uap, fd);
313 p = l->l_proc;
314 fdp = p->p_fd;
315
316 if ((fp = fd_getfile(fdp, fd)) == NULL)
317 return (EBADF);
318
319 if ((fp->f_flag & FWRITE) == 0) {
320 simple_unlock(&fp->f_slock);
321 return (EBADF);
322 }
323
324 FILE_USE(fp);
325
326 /* dofilewrite() will unuse the descriptor for us */
327 return (dofilewrite(l, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
328 &fp->f_offset, FOF_UPDATE_OFFSET, retval));
329 }
330
331 int
332 dofilewrite(struct lwp *l, int fd, struct file *fp, const void *buf,
333 size_t nbyte, off_t *offset, int flags, register_t *retval)
334 {
335 struct iovec aiov;
336 struct uio auio;
337 struct proc *p;
338 struct vmspace *vm;
339 size_t cnt;
340 int error;
341 #ifdef KTRACE
342 struct iovec ktriov;
343 #endif
344
345 p = l->l_proc;
346 error = proc_vmspace_getref(p, &vm);
347 if (error) {
348 goto out;
349 }
350 aiov.iov_base = __UNCONST(buf); /* XXXUNCONST kills const */
351 aiov.iov_len = nbyte;
352 auio.uio_iov = &aiov;
353 auio.uio_iovcnt = 1;
354 auio.uio_resid = nbyte;
355 auio.uio_rw = UIO_WRITE;
356 auio.uio_vmspace = vm;
357
358 /*
359 * Writes return ssize_t because -1 is returned on error. Therefore
360 * we must restrict the length to SSIZE_MAX to avoid garbage return
361 * values.
362 */
363 if (auio.uio_resid > SSIZE_MAX) {
364 error = EINVAL;
365 goto out;
366 }
367
368 #ifdef KTRACE
369 /* In case we are tracing, save a copy of iovec */
370 ktriov = aiov;
371 #endif
372 cnt = auio.uio_resid;
373 error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred, flags);
374 if (error) {
375 if (auio.uio_resid != cnt && (error == ERESTART ||
376 error == EINTR || error == EWOULDBLOCK))
377 error = 0;
378 if (error == EPIPE) {
379 mutex_enter(&proclist_mutex);
380 psignal(p, SIGPIPE);
381 mutex_exit(&proclist_mutex);
382 }
383 }
384 cnt -= auio.uio_resid;
385 #ifdef KTRACE
386 if (KTRPOINT(p, KTR_GENIO) && error == 0)
387 ktrgenio(l, fd, UIO_WRITE, &ktriov, cnt, error);
388 #endif
389 *retval = cnt;
390 out:
391 FILE_UNUSE(fp, l);
392 uvmspace_free(vm);
393 return (error);
394 }
395
396 /*
397 * Gather write system call
398 */
399 int
400 sys_writev(struct lwp *l, void *v, register_t *retval)
401 {
402 struct sys_writev_args /* {
403 syscallarg(int) fd;
404 syscallarg(const struct iovec *) iovp;
405 syscallarg(int) iovcnt;
406 } */ *uap = v;
407 int fd;
408 struct file *fp;
409 struct proc *p;
410 struct filedesc *fdp;
411
412 fd = SCARG(uap, fd);
413 p = l->l_proc;
414 fdp = p->p_fd;
415
416 if ((fp = fd_getfile(fdp, fd)) == NULL)
417 return (EBADF);
418
419 if ((fp->f_flag & FWRITE) == 0) {
420 simple_unlock(&fp->f_slock);
421 return (EBADF);
422 }
423
424 FILE_USE(fp);
425
426 /* dofilewritev() will unuse the descriptor for us */
427 return (dofilewritev(l, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt),
428 &fp->f_offset, FOF_UPDATE_OFFSET, retval));
429 }
430
431 int
432 dofilewritev(struct lwp *l, int fd, struct file *fp, const struct iovec *iovp,
433 int iovcnt, off_t *offset, int flags, register_t *retval)
434 {
435 struct proc *p;
436 struct uio auio;
437 struct iovec *iov, *needfree, aiov[UIO_SMALLIOV];
438 struct vmspace *vm;
439 int i, error;
440 size_t cnt;
441 u_int iovlen;
442 #ifdef KTRACE
443 struct iovec *ktriov;
444 #endif
445
446 p = l->l_proc;
447 error = proc_vmspace_getref(p, &vm);
448 if (error) {
449 goto out;
450 }
451 #ifdef KTRACE
452 ktriov = NULL;
453 #endif
454 /* note: can't use iovlen until iovcnt is validated */
455 iovlen = iovcnt * sizeof(struct iovec);
456 if ((u_int)iovcnt > UIO_SMALLIOV) {
457 if ((u_int)iovcnt > IOV_MAX) {
458 error = EINVAL;
459 goto out;
460 }
461 iov = malloc(iovlen, M_IOV, M_WAITOK);
462 needfree = iov;
463 } else if ((u_int)iovcnt > 0) {
464 iov = aiov;
465 needfree = NULL;
466 } else {
467 error = EINVAL;
468 goto out;
469 }
470
471 auio.uio_iov = iov;
472 auio.uio_iovcnt = iovcnt;
473 auio.uio_rw = UIO_WRITE;
474 auio.uio_vmspace = vm;
475 error = copyin(iovp, iov, iovlen);
476 if (error)
477 goto done;
478 auio.uio_resid = 0;
479 for (i = 0; i < iovcnt; i++) {
480 auio.uio_resid += iov->iov_len;
481 /*
482 * Writes return ssize_t because -1 is returned on error.
483 * Therefore we must restrict the length to SSIZE_MAX to
484 * avoid garbage return values.
485 */
486 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) {
487 error = EINVAL;
488 goto done;
489 }
490 iov++;
491 }
492 #ifdef KTRACE
493 /*
494 * if tracing, save a copy of iovec
495 */
496 if (KTRPOINT(p, KTR_GENIO)) {
497 ktriov = malloc(iovlen, M_TEMP, M_WAITOK);
498 memcpy((void *)ktriov, (void *)auio.uio_iov, iovlen);
499 }
500 #endif
501 cnt = auio.uio_resid;
502 error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred, flags);
503 if (error) {
504 if (auio.uio_resid != cnt && (error == ERESTART ||
505 error == EINTR || error == EWOULDBLOCK))
506 error = 0;
507 if (error == EPIPE) {
508 mutex_enter(&proclist_mutex);
509 psignal(p, SIGPIPE);
510 mutex_exit(&proclist_mutex);
511 }
512 }
513 cnt -= auio.uio_resid;
514 #ifdef KTRACE
515 if (ktriov != NULL) {
516 if (KTRPOINT(p, KTR_GENIO) && (error == 0))
517 ktrgenio(l, fd, UIO_WRITE, ktriov, cnt, error);
518 free(ktriov, M_TEMP);
519 }
520 #endif
521 *retval = cnt;
522 done:
523 if (needfree)
524 free(needfree, M_IOV);
525 out:
526 FILE_UNUSE(fp, l);
527 uvmspace_free(vm);
528 return (error);
529 }
530
531 /*
532 * Ioctl system call
533 */
534 /* ARGSUSED */
535 int
536 sys_ioctl(struct lwp *l, void *v, register_t *retval)
537 {
538 struct sys_ioctl_args /* {
539 syscallarg(int) fd;
540 syscallarg(u_long) com;
541 syscallarg(void *) data;
542 } */ *uap = v;
543 struct file *fp;
544 struct proc *p;
545 struct filedesc *fdp;
546 u_long com;
547 int error;
548 u_int size;
549 void *data, *memp;
550 #define STK_PARAMS 128
551 u_long stkbuf[STK_PARAMS/sizeof(u_long)];
552
553 error = 0;
554 p = l->l_proc;
555 fdp = p->p_fd;
556
557 if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL)
558 return (EBADF);
559
560 FILE_USE(fp);
561
562 if ((fp->f_flag & (FREAD | FWRITE)) == 0) {
563 error = EBADF;
564 com = 0;
565 goto out;
566 }
567
568 switch (com = SCARG(uap, com)) {
569 case FIONCLEX:
570 fdp->fd_ofileflags[SCARG(uap, fd)] &= ~UF_EXCLOSE;
571 goto out;
572
573 case FIOCLEX:
574 fdp->fd_ofileflags[SCARG(uap, fd)] |= UF_EXCLOSE;
575 goto out;
576 }
577
578 /*
579 * Interpret high order word to find amount of data to be
580 * copied to/from the user's address space.
581 */
582 size = IOCPARM_LEN(com);
583 if (size > IOCPARM_MAX) {
584 error = ENOTTY;
585 goto out;
586 }
587 memp = NULL;
588 if (size > sizeof(stkbuf)) {
589 memp = malloc((u_long)size, M_IOCTLOPS, M_WAITOK);
590 data = memp;
591 } else
592 data = (void *)stkbuf;
593 if (com&IOC_IN) {
594 if (size) {
595 error = copyin(SCARG(uap, data), data, size);
596 if (error) {
597 if (memp)
598 free(memp, M_IOCTLOPS);
599 goto out;
600 }
601 #ifdef KTRACE
602 if (KTRPOINT(p, KTR_GENIO)) {
603 struct iovec iov;
604 iov.iov_base = SCARG(uap, data);
605 iov.iov_len = size;
606 ktrgenio(l, SCARG(uap, fd), UIO_WRITE, &iov,
607 size, 0);
608 }
609 #endif
610 } else
611 *(void **)data = SCARG(uap, data);
612 } else if ((com&IOC_OUT) && size)
613 /*
614 * Zero the buffer so the user always
615 * gets back something deterministic.
616 */
617 memset(data, 0, size);
618 else if (com&IOC_VOID)
619 *(void **)data = SCARG(uap, data);
620
621 switch (com) {
622
623 case FIONBIO:
624 if (*(int *)data != 0)
625 fp->f_flag |= FNONBLOCK;
626 else
627 fp->f_flag &= ~FNONBLOCK;
628 error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, data, l);
629 break;
630
631 case FIOASYNC:
632 if (*(int *)data != 0)
633 fp->f_flag |= FASYNC;
634 else
635 fp->f_flag &= ~FASYNC;
636 error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, data, l);
637 break;
638
639 default:
640 error = (*fp->f_ops->fo_ioctl)(fp, com, data, l);
641 /*
642 * Copy any data to user, size was
643 * already set and checked above.
644 */
645 if (error == 0 && (com&IOC_OUT) && size) {
646 error = copyout(data, SCARG(uap, data), size);
647 #ifdef KTRACE
648 if (KTRPOINT(p, KTR_GENIO)) {
649 struct iovec iov;
650 iov.iov_base = SCARG(uap, data);
651 iov.iov_len = size;
652 ktrgenio(l, SCARG(uap, fd), UIO_READ, &iov,
653 size, error);
654 }
655 #endif
656 }
657 break;
658 }
659 if (memp)
660 free(memp, M_IOCTLOPS);
661 out:
662 FILE_UNUSE(fp, l);
663 switch (error) {
664 case -1:
665 printf("sys_ioctl: _IO%s%s('%c', %lu, %lu) returned -1: "
666 "pid=%d comm=%s\n",
667 (com & IOC_IN) ? "W" : "", (com & IOC_OUT) ? "R" : "",
668 (char)IOCGROUP(com), (com & 0xff), IOCPARM_LEN(com),
669 p->p_pid, p->p_comm);
670 /* FALLTHROUGH */
671 case EPASSTHROUGH:
672 error = ENOTTY;
673 /* FALLTHROUGH */
674 default:
675 return (error);
676 }
677 }
678
679 int selwait, nselcoll;
680
681 /*
682 * Select system call.
683 */
684 int
685 sys_pselect(struct lwp *l, void *v, register_t *retval)
686 {
687 struct sys_pselect_args /* {
688 syscallarg(int) nd;
689 syscallarg(fd_set *) in;
690 syscallarg(fd_set *) ou;
691 syscallarg(fd_set *) ex;
692 syscallarg(const struct timespec *) ts;
693 syscallarg(sigset_t *) mask;
694 } */ * const uap = v;
695 struct timespec ats;
696 struct timeval atv, *tv = NULL;
697 sigset_t amask, *mask = NULL;
698 int error;
699
700 if (SCARG(uap, ts)) {
701 error = copyin(SCARG(uap, ts), &ats, sizeof(ats));
702 if (error)
703 return error;
704 atv.tv_sec = ats.tv_sec;
705 atv.tv_usec = ats.tv_nsec / 1000;
706 tv = &atv;
707 }
708 if (SCARG(uap, mask) != NULL) {
709 error = copyin(SCARG(uap, mask), &amask, sizeof(amask));
710 if (error)
711 return error;
712 mask = &amask;
713 }
714
715 return selcommon(l, retval, SCARG(uap, nd), SCARG(uap, in),
716 SCARG(uap, ou), SCARG(uap, ex), tv, mask);
717 }
718
719 int
720 inittimeleft(struct timeval *tv, struct timeval *sleeptv)
721 {
722 if (itimerfix(tv))
723 return -1;
724 getmicrouptime(sleeptv);
725 return 0;
726 }
727
728 int
729 gettimeleft(struct timeval *tv, struct timeval *sleeptv)
730 {
731 /*
732 * We have to recalculate the timeout on every retry.
733 */
734 struct timeval slepttv;
735 /*
736 * reduce tv by elapsed time
737 * based on monotonic time scale
738 */
739 getmicrouptime(&slepttv);
740 timeradd(tv, sleeptv, tv);
741 timersub(tv, &slepttv, tv);
742 *sleeptv = slepttv;
743 return tvtohz(tv);
744 }
745
746 int
747 sys_select(struct lwp *l, void *v, register_t *retval)
748 {
749 struct sys_select_args /* {
750 syscallarg(int) nd;
751 syscallarg(fd_set *) in;
752 syscallarg(fd_set *) ou;
753 syscallarg(fd_set *) ex;
754 syscallarg(struct timeval *) tv;
755 } */ * const uap = v;
756 struct timeval atv, *tv = NULL;
757 int error;
758
759 if (SCARG(uap, tv)) {
760 error = copyin(SCARG(uap, tv), (void *)&atv,
761 sizeof(atv));
762 if (error)
763 return error;
764 tv = &atv;
765 }
766
767 return selcommon(l, retval, SCARG(uap, nd), SCARG(uap, in),
768 SCARG(uap, ou), SCARG(uap, ex), tv, NULL);
769 }
770
771 int
772 selcommon(struct lwp *l, register_t *retval, int nd, fd_set *u_in,
773 fd_set *u_ou, fd_set *u_ex, struct timeval *tv, sigset_t *mask)
774 {
775 char smallbits[howmany(FD_SETSIZE, NFDBITS) *
776 sizeof(fd_mask) * 6];
777 struct proc * const p = l->l_proc;
778 char *bits;
779 int s, ncoll, error, timo;
780 size_t ni;
781 sigset_t oldmask;
782 struct timeval sleeptv;
783
784 error = 0;
785 if (nd < 0)
786 return (EINVAL);
787 if (nd > p->p_fd->fd_nfiles) {
788 /* forgiving; slightly wrong */
789 nd = p->p_fd->fd_nfiles;
790 }
791 ni = howmany(nd, NFDBITS) * sizeof(fd_mask);
792 if (ni * 6 > sizeof(smallbits))
793 bits = malloc(ni * 6, M_TEMP, M_WAITOK);
794 else
795 bits = smallbits;
796
797 #define getbits(name, x) \
798 if (u_ ## name) { \
799 error = copyin(u_ ## name, bits + ni * x, ni); \
800 if (error) \
801 goto done; \
802 } else \
803 memset(bits + ni * x, 0, ni);
804 getbits(in, 0);
805 getbits(ou, 1);
806 getbits(ex, 2);
807 #undef getbits
808
809 timo = 0;
810 if (tv && inittimeleft(tv, &sleeptv) == -1) {
811 error = EINVAL;
812 goto done;
813 }
814
815 if (mask) {
816 sigminusset(&sigcantmask, mask);
817 mutex_enter(&p->p_smutex);
818 oldmask = l->l_sigmask;
819 l->l_sigmask = *mask;
820 mutex_exit(&p->p_smutex);
821 } else
822 oldmask = l->l_sigmask; /* XXXgcc */
823
824 retry:
825 ncoll = nselcoll;
826 l->l_flag |= LW_SELECT;
827 error = selscan(l, (fd_mask *)(bits + ni * 0),
828 (fd_mask *)(bits + ni * 3), nd, retval);
829 if (error || *retval)
830 goto donemask;
831 if (tv && (timo = gettimeleft(tv, &sleeptv)) <= 0)
832 goto donemask;
833 s = splsched();
834 if ((l->l_flag & LW_SELECT) == 0 || nselcoll != ncoll) {
835 splx(s);
836 goto retry;
837 }
838 l->l_flag &= ~LW_SELECT;
839 error = tsleep((void *)&selwait, PSOCK | PCATCH, "select", timo);
840 splx(s);
841 if (error == 0)
842 goto retry;
843 donemask:
844 if (mask) {
845 mutex_enter(&p->p_smutex);
846 l->l_sigmask = oldmask;
847 mutex_exit(&p->p_smutex);
848 }
849 l->l_flag &= ~LW_SELECT;
850 done:
851 /* select is not restarted after signals... */
852 if (error == ERESTART)
853 error = EINTR;
854 if (error == EWOULDBLOCK)
855 error = 0;
856 if (error == 0) {
857
858 #define putbits(name, x) \
859 if (u_ ## name) { \
860 error = copyout(bits + ni * x, u_ ## name, ni); \
861 if (error) \
862 goto out; \
863 }
864 putbits(in, 3);
865 putbits(ou, 4);
866 putbits(ex, 5);
867 #undef putbits
868 }
869 out:
870 if (ni * 6 > sizeof(smallbits))
871 free(bits, M_TEMP);
872 return (error);
873 }
874
875 int
876 selscan(struct lwp *l, fd_mask *ibitp, fd_mask *obitp, int nfd,
877 register_t *retval)
878 {
879 static const int flag[3] = { POLLRDNORM | POLLHUP | POLLERR,
880 POLLWRNORM | POLLHUP | POLLERR,
881 POLLRDBAND };
882 struct proc *p = l->l_proc;
883 struct filedesc *fdp;
884 int msk, i, j, fd, n;
885 fd_mask ibits, obits;
886 struct file *fp;
887
888 fdp = p->p_fd;
889 n = 0;
890 for (msk = 0; msk < 3; msk++) {
891 for (i = 0; i < nfd; i += NFDBITS) {
892 ibits = *ibitp++;
893 obits = 0;
894 while ((j = ffs(ibits)) && (fd = i + --j) < nfd) {
895 ibits &= ~(1 << j);
896 if ((fp = fd_getfile(fdp, fd)) == NULL)
897 return (EBADF);
898 FILE_USE(fp);
899 if ((*fp->f_ops->fo_poll)(fp, flag[msk], l)) {
900 obits |= (1 << j);
901 n++;
902 }
903 FILE_UNUSE(fp, l);
904 }
905 *obitp++ = obits;
906 }
907 }
908 *retval = n;
909 return (0);
910 }
911
912 /*
913 * Poll system call.
914 */
915 int
916 sys_poll(struct lwp *l, void *v, register_t *retval)
917 {
918 struct sys_poll_args /* {
919 syscallarg(struct pollfd *) fds;
920 syscallarg(u_int) nfds;
921 syscallarg(int) timeout;
922 } */ * const uap = v;
923 struct timeval atv, *tv = NULL;
924
925 if (SCARG(uap, timeout) != INFTIM) {
926 atv.tv_sec = SCARG(uap, timeout) / 1000;
927 atv.tv_usec = (SCARG(uap, timeout) % 1000) * 1000;
928 tv = &atv;
929 }
930
931 return pollcommon(l, retval, SCARG(uap, fds), SCARG(uap, nfds),
932 tv, NULL);
933 }
934
935 /*
936 * Poll system call.
937 */
938 int
939 sys_pollts(struct lwp *l, void *v, register_t *retval)
940 {
941 struct sys_pollts_args /* {
942 syscallarg(struct pollfd *) fds;
943 syscallarg(u_int) nfds;
944 syscallarg(const struct timespec *) ts;
945 syscallarg(const sigset_t *) mask;
946 } */ * const uap = v;
947 struct timespec ats;
948 struct timeval atv, *tv = NULL;
949 sigset_t amask, *mask = NULL;
950 int error;
951
952 if (SCARG(uap, ts)) {
953 error = copyin(SCARG(uap, ts), &ats, sizeof(ats));
954 if (error)
955 return error;
956 atv.tv_sec = ats.tv_sec;
957 atv.tv_usec = ats.tv_nsec / 1000;
958 tv = &atv;
959 }
960 if (SCARG(uap, mask)) {
961 error = copyin(SCARG(uap, mask), &amask, sizeof(amask));
962 if (error)
963 return error;
964 mask = &amask;
965 }
966
967 return pollcommon(l, retval, SCARG(uap, fds), SCARG(uap, nfds),
968 tv, mask);
969 }
970
971 int
972 pollcommon(struct lwp *l, register_t *retval,
973 struct pollfd *u_fds, u_int nfds,
974 struct timeval *tv, sigset_t *mask)
975 {
976 char smallbits[32 * sizeof(struct pollfd)];
977 struct proc * const p = l->l_proc;
978 void * bits;
979 sigset_t oldmask;
980 int s, ncoll, error, timo;
981 size_t ni;
982 struct timeval sleeptv;
983
984 if (nfds > p->p_fd->fd_nfiles) {
985 /* forgiving; slightly wrong */
986 nfds = p->p_fd->fd_nfiles;
987 }
988 ni = nfds * sizeof(struct pollfd);
989 if (ni > sizeof(smallbits))
990 bits = malloc(ni, M_TEMP, M_WAITOK);
991 else
992 bits = smallbits;
993
994 error = copyin(u_fds, bits, ni);
995 if (error)
996 goto done;
997
998 timo = 0;
999 if (tv && inittimeleft(tv, &sleeptv) == -1) {
1000 error = EINVAL;
1001 goto done;
1002 }
1003
1004 if (mask) {
1005 sigminusset(&sigcantmask, mask);
1006 mutex_enter(&p->p_smutex);
1007 oldmask = l->l_sigmask;
1008 l->l_sigmask = *mask;
1009 mutex_exit(&p->p_smutex);
1010 } else
1011 oldmask = l->l_sigmask; /* XXXgcc */
1012
1013 retry:
1014 ncoll = nselcoll;
1015 l->l_flag |= LW_SELECT;
1016 error = pollscan(l, (struct pollfd *)bits, nfds, retval);
1017 if (error || *retval)
1018 goto donemask;
1019 if (tv && (timo = gettimeleft(tv, &sleeptv)) <= 0)
1020 goto donemask;
1021 s = splsched();
1022 if ((l->l_flag & LW_SELECT) == 0 || nselcoll != ncoll) {
1023 splx(s);
1024 goto retry;
1025 }
1026 l->l_flag &= ~LW_SELECT;
1027 error = tsleep((void *)&selwait, PSOCK | PCATCH, "poll", timo);
1028 splx(s);
1029 if (error == 0)
1030 goto retry;
1031 donemask:
1032 if (mask) {
1033 mutex_enter(&p->p_smutex);
1034 l->l_sigmask = oldmask;
1035 mutex_exit(&p->p_smutex);
1036 }
1037
1038 l->l_flag &= ~LW_SELECT;
1039 done:
1040 /* poll is not restarted after signals... */
1041 if (error == ERESTART)
1042 error = EINTR;
1043 if (error == EWOULDBLOCK)
1044 error = 0;
1045 if (error == 0) {
1046 error = copyout(bits, u_fds, ni);
1047 if (error)
1048 goto out;
1049 }
1050 out:
1051 if (ni > sizeof(smallbits))
1052 free(bits, M_TEMP);
1053 return (error);
1054 }
1055
1056 int
1057 pollscan(struct lwp *l, struct pollfd *fds, int nfd, register_t *retval)
1058 {
1059 struct proc *p = l->l_proc;
1060 struct filedesc *fdp;
1061 int i, n;
1062 struct file *fp;
1063
1064 fdp = p->p_fd;
1065 n = 0;
1066 for (i = 0; i < nfd; i++, fds++) {
1067 if (fds->fd >= fdp->fd_nfiles) {
1068 fds->revents = POLLNVAL;
1069 n++;
1070 } else if (fds->fd < 0) {
1071 fds->revents = 0;
1072 } else {
1073 if ((fp = fd_getfile(fdp, fds->fd)) == NULL) {
1074 fds->revents = POLLNVAL;
1075 n++;
1076 } else {
1077 FILE_USE(fp);
1078 fds->revents = (*fp->f_ops->fo_poll)(fp,
1079 fds->events | POLLERR | POLLHUP, l);
1080 if (fds->revents != 0)
1081 n++;
1082 FILE_UNUSE(fp, l);
1083 }
1084 }
1085 }
1086 *retval = n;
1087 return (0);
1088 }
1089
1090 /*ARGSUSED*/
1091 int
1092 seltrue(dev_t dev, int events, struct lwp *l)
1093 {
1094
1095 return (events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM));
1096 }
1097
1098 /*
1099 * Record a select request.
1100 */
1101 void
1102 selrecord(struct lwp *selector, struct selinfo *sip)
1103 {
1104 struct lwp *l;
1105 struct proc *p;
1106 pid_t mypid;
1107
1108 mypid = selector->l_proc->p_pid;
1109 if (sip->sel_pid == mypid)
1110 return;
1111
1112 mutex_enter(&proclist_mutex);
1113 if (sip->sel_pid && (p = p_find(sip->sel_pid, PFIND_LOCKED))) {
1114 mutex_enter(&p->p_smutex);
1115 LIST_FOREACH(l, &p->p_lwps, l_sibling) {
1116 lwp_lock(l);
1117 if (l->l_wchan == (void *)&selwait &&
1118 l->l_stat == LSSLEEP) {
1119 sip->sel_collision = 1;
1120 lwp_unlock(l);
1121 break;
1122 }
1123 lwp_unlock(l);
1124 }
1125 mutex_exit(&p->p_smutex);
1126 }
1127 mutex_exit(&proclist_mutex);
1128
1129 if (!sip->sel_collision)
1130 sip->sel_pid = mypid;
1131 }
1132
1133 /*
1134 * Do a wakeup when a selectable event occurs.
1135 */
1136 void
1137 selwakeup(sip)
1138 struct selinfo *sip;
1139 {
1140 struct lwp *l;
1141 struct proc *p;
1142
1143 if (sip->sel_pid == 0)
1144 return;
1145 if (sip->sel_collision) {
1146 sip->sel_pid = 0;
1147 nselcoll++;
1148 sip->sel_collision = 0;
1149 wakeup((void *)&selwait);
1150 return;
1151 }
1152
1153 /*
1154 * We must use the proclist_mutex as we can be called from an
1155 * interrupt context.
1156 */
1157 mutex_enter(&proclist_mutex);
1158 p = p_find(sip->sel_pid, PFIND_LOCKED);
1159 sip->sel_pid = 0;
1160 if (p == NULL) {
1161 mutex_exit(&proclist_mutex);
1162 return;
1163 }
1164
1165 mutex_enter(&p->p_smutex);
1166 LIST_FOREACH(l, &p->p_lwps, l_sibling) {
1167 lwp_lock(l);
1168 if (l->l_wchan == (wchan_t)&selwait && l->l_stat == LSSLEEP) {
1169 /* setrunnable() will release the lock. */
1170 setrunnable(l);
1171 } else {
1172 if (l->l_flag & LW_SELECT)
1173 l->l_flag &= ~LW_SELECT;
1174 lwp_unlock(l);
1175 }
1176 }
1177 mutex_exit(&p->p_smutex);
1178 mutex_exit(&proclist_mutex);
1179 }
1180