sys_generic.c revision 1.84 1 /* $NetBSD: sys_generic.c,v 1.84 2005/12/11 12:24:30 christos Exp $ */
2
3 /*
4 * Copyright (c) 1982, 1986, 1989, 1993
5 * The Regents of the University of California. All rights reserved.
6 * (c) UNIX System Laboratories, Inc.
7 * All or some portions of this file are derived from material licensed
8 * to the University of California by American Telephone and Telegraph
9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
10 * the permission of UNIX System Laboratories, Inc.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 *
36 * @(#)sys_generic.c 8.9 (Berkeley) 2/14/95
37 */
38
39 #include <sys/cdefs.h>
40 __KERNEL_RCSID(0, "$NetBSD: sys_generic.c,v 1.84 2005/12/11 12:24:30 christos Exp $");
41
42 #include "opt_ktrace.h"
43
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #include <sys/filedesc.h>
47 #include <sys/ioctl.h>
48 #include <sys/file.h>
49 #include <sys/proc.h>
50 #include <sys/socketvar.h>
51 #include <sys/signalvar.h>
52 #include <sys/uio.h>
53 #include <sys/kernel.h>
54 #include <sys/stat.h>
55 #include <sys/malloc.h>
56 #include <sys/poll.h>
57 #ifdef KTRACE
58 #include <sys/ktrace.h>
59 #endif
60
61 #include <sys/mount.h>
62 #include <sys/sa.h>
63 #include <sys/syscallargs.h>
64
65 int selscan(struct lwp *, fd_mask *, fd_mask *, int, register_t *);
66 int pollscan(struct lwp *, struct pollfd *, int, register_t *);
67
68
69 /*
70 * Read system call.
71 */
72 /* ARGSUSED */
73 int
74 sys_read(struct lwp *l, void *v, register_t *retval)
75 {
76 struct sys_read_args /* {
77 syscallarg(int) fd;
78 syscallarg(void *) buf;
79 syscallarg(size_t) nbyte;
80 } */ *uap = v;
81 int fd;
82 struct file *fp;
83 struct proc *p;
84 struct filedesc *fdp;
85
86 fd = SCARG(uap, fd);
87 p = l->l_proc;
88 fdp = p->p_fd;
89
90 if ((fp = fd_getfile(fdp, fd)) == NULL)
91 return (EBADF);
92
93 if ((fp->f_flag & FREAD) == 0) {
94 simple_unlock(&fp->f_slock);
95 return (EBADF);
96 }
97
98 FILE_USE(fp);
99
100 /* dofileread() will unuse the descriptor for us */
101 return (dofileread(l, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
102 &fp->f_offset, FOF_UPDATE_OFFSET, retval));
103 }
104
105 int
106 dofileread(struct lwp *l, int fd, struct file *fp, void *buf, size_t nbyte,
107 off_t *offset, int flags, register_t *retval)
108 {
109 struct iovec aiov;
110 struct uio auio;
111 struct proc *p;
112 size_t cnt;
113 int error;
114 #ifdef KTRACE
115 struct iovec ktriov = {0};
116 #endif
117 p = l->l_proc;
118 error = 0;
119
120 aiov.iov_base = (caddr_t)buf;
121 aiov.iov_len = nbyte;
122 auio.uio_iov = &aiov;
123 auio.uio_iovcnt = 1;
124 auio.uio_resid = nbyte;
125 auio.uio_rw = UIO_READ;
126 auio.uio_segflg = UIO_USERSPACE;
127 auio.uio_lwp = l;
128
129 /*
130 * Reads return ssize_t because -1 is returned on error. Therefore
131 * we must restrict the length to SSIZE_MAX to avoid garbage return
132 * values.
133 */
134 if (auio.uio_resid > SSIZE_MAX) {
135 error = EINVAL;
136 goto out;
137 }
138
139 #ifdef KTRACE
140 /*
141 * if tracing, save a copy of iovec
142 */
143 if (KTRPOINT(p, KTR_GENIO))
144 ktriov = aiov;
145 #endif
146 cnt = auio.uio_resid;
147 error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred, flags);
148 if (error)
149 if (auio.uio_resid != cnt && (error == ERESTART ||
150 error == EINTR || error == EWOULDBLOCK))
151 error = 0;
152 cnt -= auio.uio_resid;
153 #ifdef KTRACE
154 if (KTRPOINT(p, KTR_GENIO) && error == 0)
155 ktrgenio(l, fd, UIO_READ, &ktriov, cnt, error);
156 #endif
157 *retval = cnt;
158 out:
159 FILE_UNUSE(fp, l);
160 return (error);
161 }
162
163 /*
164 * Scatter read system call.
165 */
166 int
167 sys_readv(struct lwp *l, void *v, register_t *retval)
168 {
169 struct sys_readv_args /* {
170 syscallarg(int) fd;
171 syscallarg(const struct iovec *) iovp;
172 syscallarg(int) iovcnt;
173 } */ *uap = v;
174 struct filedesc *fdp;
175 struct file *fp;
176 struct proc *p;
177 int fd;
178
179 fd = SCARG(uap, fd);
180 p = l->l_proc;
181 fdp = p->p_fd;
182
183 if ((fp = fd_getfile(fdp, fd)) == NULL)
184 return (EBADF);
185
186 if ((fp->f_flag & FREAD) == 0) {
187 simple_unlock(&fp->f_slock);
188 return (EBADF);
189 }
190
191 FILE_USE(fp);
192
193 /* dofilereadv() will unuse the descriptor for us */
194 return (dofilereadv(l, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt),
195 &fp->f_offset, FOF_UPDATE_OFFSET, retval));
196 }
197
198 int
199 dofilereadv(struct lwp *l, int fd, struct file *fp, const struct iovec *iovp,
200 int iovcnt, off_t *offset, int flags, register_t *retval)
201 {
202 struct proc *p;
203 struct uio auio;
204 struct iovec *iov, *needfree, aiov[UIO_SMALLIOV];
205 int i, error;
206 size_t cnt;
207 u_int iovlen;
208 #ifdef KTRACE
209 struct iovec *ktriov;
210 #endif
211
212 p = l->l_proc;
213 error = 0;
214 #ifdef KTRACE
215 ktriov = NULL;
216 #endif
217 /* note: can't use iovlen until iovcnt is validated */
218 iovlen = iovcnt * sizeof(struct iovec);
219 if ((u_int)iovcnt > UIO_SMALLIOV) {
220 if ((u_int)iovcnt > IOV_MAX) {
221 error = EINVAL;
222 goto out;
223 }
224 iov = malloc(iovlen, M_IOV, M_WAITOK);
225 needfree = iov;
226 } else if ((u_int)iovcnt > 0) {
227 iov = aiov;
228 needfree = NULL;
229 } else {
230 error = EINVAL;
231 goto out;
232 }
233
234 auio.uio_iov = iov;
235 auio.uio_iovcnt = iovcnt;
236 auio.uio_rw = UIO_READ;
237 auio.uio_segflg = UIO_USERSPACE;
238 auio.uio_lwp = l;
239 error = copyin(iovp, iov, iovlen);
240 if (error)
241 goto done;
242 auio.uio_resid = 0;
243 for (i = 0; i < iovcnt; i++) {
244 auio.uio_resid += iov->iov_len;
245 /*
246 * Reads return ssize_t because -1 is returned on error.
247 * Therefore we must restrict the length to SSIZE_MAX to
248 * avoid garbage return values.
249 */
250 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) {
251 error = EINVAL;
252 goto done;
253 }
254 iov++;
255 }
256 #ifdef KTRACE
257 /*
258 * if tracing, save a copy of iovec
259 */
260 if (KTRPOINT(p, KTR_GENIO)) {
261 ktriov = malloc(iovlen, M_TEMP, M_WAITOK);
262 memcpy((caddr_t)ktriov, (caddr_t)auio.uio_iov, iovlen);
263 }
264 #endif
265 cnt = auio.uio_resid;
266 error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred, flags);
267 if (error)
268 if (auio.uio_resid != cnt && (error == ERESTART ||
269 error == EINTR || error == EWOULDBLOCK))
270 error = 0;
271 cnt -= auio.uio_resid;
272 #ifdef KTRACE
273 if (ktriov != NULL) {
274 if (KTRPOINT(p, KTR_GENIO) && (error == 0))
275 ktrgenio(l, fd, UIO_READ, ktriov, cnt, error);
276 free(ktriov, M_TEMP);
277 }
278 #endif
279 *retval = cnt;
280 done:
281 if (needfree)
282 free(needfree, M_IOV);
283 out:
284 FILE_UNUSE(fp, l);
285 return (error);
286 }
287
288 /*
289 * Write system call
290 */
291 int
292 sys_write(struct lwp *l, void *v, register_t *retval)
293 {
294 struct sys_write_args /* {
295 syscallarg(int) fd;
296 syscallarg(const void *) buf;
297 syscallarg(size_t) nbyte;
298 } */ *uap = v;
299 int fd;
300 struct file *fp;
301 struct proc *p;
302 struct filedesc *fdp;
303
304 fd = SCARG(uap, fd);
305 p = l->l_proc;
306 fdp = p->p_fd;
307
308 if ((fp = fd_getfile(fdp, fd)) == NULL)
309 return (EBADF);
310
311 if ((fp->f_flag & FWRITE) == 0) {
312 simple_unlock(&fp->f_slock);
313 return (EBADF);
314 }
315
316 FILE_USE(fp);
317
318 /* dofilewrite() will unuse the descriptor for us */
319 return (dofilewrite(l, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
320 &fp->f_offset, FOF_UPDATE_OFFSET, retval));
321 }
322
323 int
324 dofilewrite(struct lwp *l, int fd, struct file *fp, const void *buf,
325 size_t nbyte, off_t *offset, int flags, register_t *retval)
326 {
327 struct iovec aiov;
328 struct uio auio;
329 struct proc *p;
330 size_t cnt;
331 int error;
332 #ifdef KTRACE
333 struct iovec ktriov = {0};
334 #endif
335
336 p = l->l_proc;
337 error = 0;
338 aiov.iov_base = __UNCONST(buf); /* XXXUNCONST kills const */
339 aiov.iov_len = nbyte;
340 auio.uio_iov = &aiov;
341 auio.uio_iovcnt = 1;
342 auio.uio_resid = nbyte;
343 auio.uio_rw = UIO_WRITE;
344 auio.uio_segflg = UIO_USERSPACE;
345 auio.uio_lwp = l;
346
347 /*
348 * Writes return ssize_t because -1 is returned on error. Therefore
349 * we must restrict the length to SSIZE_MAX to avoid garbage return
350 * values.
351 */
352 if (auio.uio_resid > SSIZE_MAX) {
353 error = EINVAL;
354 goto out;
355 }
356
357 #ifdef KTRACE
358 /*
359 * if tracing, save a copy of iovec
360 */
361 if (KTRPOINT(p, KTR_GENIO))
362 ktriov = aiov;
363 #endif
364 cnt = auio.uio_resid;
365 error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred, flags);
366 if (error) {
367 if (auio.uio_resid != cnt && (error == ERESTART ||
368 error == EINTR || error == EWOULDBLOCK))
369 error = 0;
370 if (error == EPIPE)
371 psignal(p, SIGPIPE);
372 }
373 cnt -= auio.uio_resid;
374 #ifdef KTRACE
375 if (KTRPOINT(p, KTR_GENIO) && error == 0)
376 ktrgenio(l, fd, UIO_WRITE, &ktriov, cnt, error);
377 #endif
378 *retval = cnt;
379 out:
380 FILE_UNUSE(fp, l);
381 return (error);
382 }
383
384 /*
385 * Gather write system call
386 */
387 int
388 sys_writev(struct lwp *l, void *v, register_t *retval)
389 {
390 struct sys_writev_args /* {
391 syscallarg(int) fd;
392 syscallarg(const struct iovec *) iovp;
393 syscallarg(int) iovcnt;
394 } */ *uap = v;
395 int fd;
396 struct file *fp;
397 struct proc *p;
398 struct filedesc *fdp;
399
400 fd = SCARG(uap, fd);
401 p = l->l_proc;
402 fdp = p->p_fd;
403
404 if ((fp = fd_getfile(fdp, fd)) == NULL)
405 return (EBADF);
406
407 if ((fp->f_flag & FWRITE) == 0) {
408 simple_unlock(&fp->f_slock);
409 return (EBADF);
410 }
411
412 FILE_USE(fp);
413
414 /* dofilewritev() will unuse the descriptor for us */
415 return (dofilewritev(l, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt),
416 &fp->f_offset, FOF_UPDATE_OFFSET, retval));
417 }
418
419 int
420 dofilewritev(struct lwp *l, int fd, struct file *fp, const struct iovec *iovp,
421 int iovcnt, off_t *offset, int flags, register_t *retval)
422 {
423 struct proc *p;
424 struct uio auio;
425 struct iovec *iov, *needfree, aiov[UIO_SMALLIOV];
426 int i, error;
427 size_t cnt;
428 u_int iovlen;
429 #ifdef KTRACE
430 struct iovec *ktriov;
431 #endif
432
433 p = l->l_proc;
434 error = 0;
435 #ifdef KTRACE
436 ktriov = NULL;
437 #endif
438 /* note: can't use iovlen until iovcnt is validated */
439 iovlen = iovcnt * sizeof(struct iovec);
440 if ((u_int)iovcnt > UIO_SMALLIOV) {
441 if ((u_int)iovcnt > IOV_MAX) {
442 error = EINVAL;
443 goto out;
444 }
445 iov = malloc(iovlen, M_IOV, M_WAITOK);
446 needfree = iov;
447 } else if ((u_int)iovcnt > 0) {
448 iov = aiov;
449 needfree = NULL;
450 } else {
451 error = EINVAL;
452 goto out;
453 }
454
455 auio.uio_iov = iov;
456 auio.uio_iovcnt = iovcnt;
457 auio.uio_rw = UIO_WRITE;
458 auio.uio_segflg = UIO_USERSPACE;
459 auio.uio_lwp = l;
460 error = copyin(iovp, iov, iovlen);
461 if (error)
462 goto done;
463 auio.uio_resid = 0;
464 for (i = 0; i < iovcnt; i++) {
465 auio.uio_resid += iov->iov_len;
466 /*
467 * Writes return ssize_t because -1 is returned on error.
468 * Therefore we must restrict the length to SSIZE_MAX to
469 * avoid garbage return values.
470 */
471 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) {
472 error = EINVAL;
473 goto done;
474 }
475 iov++;
476 }
477 #ifdef KTRACE
478 /*
479 * if tracing, save a copy of iovec
480 */
481 if (KTRPOINT(p, KTR_GENIO)) {
482 ktriov = malloc(iovlen, M_TEMP, M_WAITOK);
483 memcpy((caddr_t)ktriov, (caddr_t)auio.uio_iov, iovlen);
484 }
485 #endif
486 cnt = auio.uio_resid;
487 error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred, flags);
488 if (error) {
489 if (auio.uio_resid != cnt && (error == ERESTART ||
490 error == EINTR || error == EWOULDBLOCK))
491 error = 0;
492 if (error == EPIPE)
493 psignal(p, SIGPIPE);
494 }
495 cnt -= auio.uio_resid;
496 #ifdef KTRACE
497 if (ktriov != NULL) {
498 if (KTRPOINT(p, KTR_GENIO) && (error == 0))
499 ktrgenio(l, fd, UIO_WRITE, ktriov, cnt, error);
500 free(ktriov, M_TEMP);
501 }
502 #endif
503 *retval = cnt;
504 done:
505 if (needfree)
506 free(needfree, M_IOV);
507 out:
508 FILE_UNUSE(fp, l);
509 return (error);
510 }
511
512 /*
513 * Ioctl system call
514 */
515 /* ARGSUSED */
516 int
517 sys_ioctl(struct lwp *l, void *v, register_t *retval)
518 {
519 struct sys_ioctl_args /* {
520 syscallarg(int) fd;
521 syscallarg(u_long) com;
522 syscallarg(caddr_t) data;
523 } */ *uap = v;
524 struct file *fp;
525 struct proc *p;
526 struct filedesc *fdp;
527 u_long com;
528 int error;
529 u_int size;
530 caddr_t data, memp;
531 #define STK_PARAMS 128
532 u_long stkbuf[STK_PARAMS/sizeof(u_long)];
533
534 error = 0;
535 p = l->l_proc;
536 fdp = p->p_fd;
537
538 if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL)
539 return (EBADF);
540
541 FILE_USE(fp);
542
543 if ((fp->f_flag & (FREAD | FWRITE)) == 0) {
544 error = EBADF;
545 com = 0;
546 goto out;
547 }
548
549 switch (com = SCARG(uap, com)) {
550 case FIONCLEX:
551 fdp->fd_ofileflags[SCARG(uap, fd)] &= ~UF_EXCLOSE;
552 goto out;
553
554 case FIOCLEX:
555 fdp->fd_ofileflags[SCARG(uap, fd)] |= UF_EXCLOSE;
556 goto out;
557 }
558
559 /*
560 * Interpret high order word to find amount of data to be
561 * copied to/from the user's address space.
562 */
563 size = IOCPARM_LEN(com);
564 if (size > IOCPARM_MAX) {
565 error = ENOTTY;
566 goto out;
567 }
568 memp = NULL;
569 if (size > sizeof(stkbuf)) {
570 memp = (caddr_t)malloc((u_long)size, M_IOCTLOPS, M_WAITOK);
571 data = memp;
572 } else
573 data = (caddr_t)stkbuf;
574 if (com&IOC_IN) {
575 if (size) {
576 error = copyin(SCARG(uap, data), data, size);
577 if (error) {
578 if (memp)
579 free(memp, M_IOCTLOPS);
580 goto out;
581 }
582 #ifdef KTRACE
583 if (KTRPOINT(p, KTR_GENIO)) {
584 struct iovec iov;
585 iov.iov_base = SCARG(uap, data);
586 iov.iov_len = size;
587 ktrgenio(l, SCARG(uap, fd), UIO_WRITE, &iov,
588 size, 0);
589 }
590 #endif
591 } else
592 *(caddr_t *)data = SCARG(uap, data);
593 } else if ((com&IOC_OUT) && size)
594 /*
595 * Zero the buffer so the user always
596 * gets back something deterministic.
597 */
598 memset(data, 0, size);
599 else if (com&IOC_VOID)
600 *(caddr_t *)data = SCARG(uap, data);
601
602 switch (com) {
603
604 case FIONBIO:
605 if (*(int *)data != 0)
606 fp->f_flag |= FNONBLOCK;
607 else
608 fp->f_flag &= ~FNONBLOCK;
609 error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, data, l);
610 break;
611
612 case FIOASYNC:
613 if (*(int *)data != 0)
614 fp->f_flag |= FASYNC;
615 else
616 fp->f_flag &= ~FASYNC;
617 error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, data, l);
618 break;
619
620 default:
621 error = (*fp->f_ops->fo_ioctl)(fp, com, data, l);
622 /*
623 * Copy any data to user, size was
624 * already set and checked above.
625 */
626 if (error == 0 && (com&IOC_OUT) && size) {
627 error = copyout(data, SCARG(uap, data), size);
628 #ifdef KTRACE
629 if (KTRPOINT(p, KTR_GENIO)) {
630 struct iovec iov;
631 iov.iov_base = SCARG(uap, data);
632 iov.iov_len = size;
633 ktrgenio(l, SCARG(uap, fd), UIO_READ, &iov,
634 size, error);
635 }
636 #endif
637 }
638 break;
639 }
640 if (memp)
641 free(memp, M_IOCTLOPS);
642 out:
643 FILE_UNUSE(fp, l);
644 switch (error) {
645 case -1:
646 printf("sys_ioctl: _IO%s%s('%c', %lu, %lu) returned -1: "
647 "pid=%d comm=%s\n",
648 (com & IOC_IN) ? "W" : "", (com & IOC_OUT) ? "R" : "",
649 (char)IOCGROUP(com), (com & 0xff), IOCPARM_LEN(com),
650 p->p_pid, p->p_comm);
651 /* FALLTHROUGH */
652 case EPASSTHROUGH:
653 error = ENOTTY;
654 /* FALLTHROUGH */
655 default:
656 return (error);
657 }
658 }
659
660 int selwait, nselcoll;
661
662 /*
663 * Select system call.
664 */
665 int
666 sys_pselect(struct lwp *l, void *v, register_t *retval)
667 {
668 struct sys_pselect_args /* {
669 syscallarg(int) nd;
670 syscallarg(fd_set *) in;
671 syscallarg(fd_set *) ou;
672 syscallarg(fd_set *) ex;
673 syscallarg(const struct timespec *) ts;
674 syscallarg(sigset_t *) mask;
675 } */ * const uap = v;
676 struct timespec ats;
677 struct timeval atv, *tv = NULL;
678 sigset_t amask, *mask = NULL;
679 int error;
680
681 if (SCARG(uap, ts)) {
682 error = copyin(SCARG(uap, ts), &ats, sizeof(ats));
683 if (error)
684 return error;
685 atv.tv_sec = ats.tv_sec;
686 atv.tv_usec = ats.tv_nsec / 1000;
687 tv = &atv;
688 }
689 if (SCARG(uap, mask) != NULL) {
690 error = copyin(SCARG(uap, mask), &amask, sizeof(amask));
691 if (error)
692 return error;
693 mask = &amask;
694 }
695
696 return selcommon(l, retval, SCARG(uap, nd), SCARG(uap, in),
697 SCARG(uap, ou), SCARG(uap, ex), tv, mask);
698 }
699
700 int
701 sys_select(struct lwp *l, void *v, register_t *retval)
702 {
703 struct sys_select_args /* {
704 syscallarg(int) nd;
705 syscallarg(fd_set *) in;
706 syscallarg(fd_set *) ou;
707 syscallarg(fd_set *) ex;
708 syscallarg(struct timeval *) tv;
709 } */ * const uap = v;
710 struct timeval atv, *tv = NULL;
711 int error;
712
713 if (SCARG(uap, tv)) {
714 error = copyin(SCARG(uap, tv), (caddr_t)&atv,
715 sizeof(atv));
716 if (error)
717 return error;
718 tv = &atv;
719 }
720
721 return selcommon(l, retval, SCARG(uap, nd), SCARG(uap, in),
722 SCARG(uap, ou), SCARG(uap, ex), tv, NULL);
723 }
724
725 int
726 selcommon(struct lwp *l, register_t *retval, int nd, fd_set *u_in,
727 fd_set *u_ou, fd_set *u_ex, struct timeval *tv, sigset_t *mask)
728 {
729 struct proc * const p = l->l_proc;
730 caddr_t bits;
731 char smallbits[howmany(FD_SETSIZE, NFDBITS) *
732 sizeof(fd_mask) * 6];
733 int s, ncoll, error, timo;
734 size_t ni;
735 sigset_t oldmask;
736
737 error = 0;
738 if (nd < 0)
739 return (EINVAL);
740 if (nd > p->p_fd->fd_nfiles) {
741 /* forgiving; slightly wrong */
742 nd = p->p_fd->fd_nfiles;
743 }
744 ni = howmany(nd, NFDBITS) * sizeof(fd_mask);
745 if (ni * 6 > sizeof(smallbits))
746 bits = malloc(ni * 6, M_TEMP, M_WAITOK);
747 else
748 bits = smallbits;
749
750 #define getbits(name, x) \
751 if (u_ ## name) { \
752 error = copyin(u_ ## name, bits + ni * x, ni); \
753 if (error) \
754 goto done; \
755 } else \
756 memset(bits + ni * x, 0, ni);
757 getbits(in, 0);
758 getbits(ou, 1);
759 getbits(ex, 2);
760 #undef getbits
761
762 timo = 0;
763 if (tv) {
764 if (itimerfix(tv)) {
765 error = EINVAL;
766 goto done;
767 }
768 s = splclock();
769 timeradd(tv, &time, tv);
770 splx(s);
771 }
772 if (mask)
773 (void)sigprocmask1(p, SIG_SETMASK, mask, &oldmask);
774
775 retry:
776 ncoll = nselcoll;
777 l->l_flag |= L_SELECT;
778 error = selscan(l, (fd_mask *)(bits + ni * 0),
779 (fd_mask *)(bits + ni * 3), nd, retval);
780 if (error || *retval)
781 goto done;
782 if (tv) {
783 /*
784 * We have to recalculate the timeout on every retry.
785 */
786 timo = hzto(tv);
787 if (timo <= 0)
788 goto done;
789 }
790 s = splsched();
791 if ((l->l_flag & L_SELECT) == 0 || nselcoll != ncoll) {
792 splx(s);
793 goto retry;
794 }
795 l->l_flag &= ~L_SELECT;
796 error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "select", timo);
797 splx(s);
798 if (error == 0)
799 goto retry;
800 done:
801 if (mask)
802 (void)sigprocmask1(p, SIG_SETMASK, &oldmask, NULL);
803 l->l_flag &= ~L_SELECT;
804 /* select is not restarted after signals... */
805 if (error == ERESTART)
806 error = EINTR;
807 if (error == EWOULDBLOCK)
808 error = 0;
809 if (error == 0) {
810
811 #define putbits(name, x) \
812 if (u_ ## name) { \
813 error = copyout(bits + ni * x, u_ ## name, ni); \
814 if (error) \
815 goto out; \
816 }
817 putbits(in, 3);
818 putbits(ou, 4);
819 putbits(ex, 5);
820 #undef putbits
821 }
822 out:
823 if (ni * 6 > sizeof(smallbits))
824 free(bits, M_TEMP);
825 return (error);
826 }
827
828 int
829 selscan(struct lwp *l, fd_mask *ibitp, fd_mask *obitp, int nfd,
830 register_t *retval)
831 {
832 static const int flag[3] = { POLLRDNORM | POLLHUP | POLLERR,
833 POLLWRNORM | POLLHUP | POLLERR,
834 POLLRDBAND };
835 struct proc *p = l->l_proc;
836 struct filedesc *fdp;
837 int msk, i, j, fd, n;
838 fd_mask ibits, obits;
839 struct file *fp;
840
841 fdp = p->p_fd;
842 n = 0;
843 for (msk = 0; msk < 3; msk++) {
844 for (i = 0; i < nfd; i += NFDBITS) {
845 ibits = *ibitp++;
846 obits = 0;
847 while ((j = ffs(ibits)) && (fd = i + --j) < nfd) {
848 ibits &= ~(1 << j);
849 if ((fp = fd_getfile(fdp, fd)) == NULL)
850 return (EBADF);
851 FILE_USE(fp);
852 if ((*fp->f_ops->fo_poll)(fp, flag[msk], l)) {
853 obits |= (1 << j);
854 n++;
855 }
856 FILE_UNUSE(fp, l);
857 }
858 *obitp++ = obits;
859 }
860 }
861 *retval = n;
862 return (0);
863 }
864
865 /*
866 * Poll system call.
867 */
868 int
869 sys_poll(struct lwp *l, void *v, register_t *retval)
870 {
871 struct sys_poll_args /* {
872 syscallarg(struct pollfd *) fds;
873 syscallarg(u_int) nfds;
874 syscallarg(int) timeout;
875 } */ * const uap = v;
876 struct timeval atv, *tv = NULL;
877
878 if (SCARG(uap, timeout) != INFTIM) {
879 atv.tv_sec = SCARG(uap, timeout) / 1000;
880 atv.tv_usec = (SCARG(uap, timeout) % 1000) * 1000;
881 tv = &atv;
882 }
883
884 return pollcommon(l, retval, SCARG(uap, fds), SCARG(uap, nfds),
885 tv, NULL);
886 }
887
888 /*
889 * Poll system call.
890 */
891 int
892 sys_pollts(struct lwp *l, void *v, register_t *retval)
893 {
894 struct sys_pollts_args /* {
895 syscallarg(struct pollfd *) fds;
896 syscallarg(u_int) nfds;
897 syscallarg(const struct timespec *) ts;
898 syscallarg(const sigset_t *) mask;
899 } */ * const uap = v;
900 struct timespec ats;
901 struct timeval atv, *tv = NULL;
902 sigset_t amask, *mask = NULL;
903 int error;
904
905 if (SCARG(uap, ts)) {
906 error = copyin(SCARG(uap, ts), &ats, sizeof(ats));
907 if (error)
908 return error;
909 atv.tv_sec = ats.tv_sec;
910 atv.tv_usec = ats.tv_nsec / 1000;
911 tv = &atv;
912 }
913 if (SCARG(uap, mask)) {
914 error = copyin(SCARG(uap, mask), &amask, sizeof(amask));
915 if (error)
916 return error;
917 mask = &amask;
918 }
919
920 return pollcommon(l, retval, SCARG(uap, fds), SCARG(uap, nfds),
921 tv, mask);
922 }
923
924 int
925 pollcommon(struct lwp *l, register_t *retval,
926 struct pollfd *u_fds, u_int nfds,
927 struct timeval *tv, sigset_t *mask)
928 {
929 struct proc * const p = l->l_proc;
930 caddr_t bits;
931 char smallbits[32 * sizeof(struct pollfd)];
932 sigset_t oldmask;
933 int s, ncoll, error, timo;
934 size_t ni;
935
936 if (nfds > p->p_fd->fd_nfiles) {
937 /* forgiving; slightly wrong */
938 nfds = p->p_fd->fd_nfiles;
939 }
940 ni = nfds * sizeof(struct pollfd);
941 if (ni > sizeof(smallbits))
942 bits = malloc(ni, M_TEMP, M_WAITOK);
943 else
944 bits = smallbits;
945
946 error = copyin(u_fds, bits, ni);
947 if (error)
948 goto done;
949
950 timo = 0;
951 if (tv) {
952 if (itimerfix(tv)) {
953 error = EINVAL;
954 goto done;
955 }
956 s = splclock();
957 timeradd(tv, &time, tv);
958 splx(s);
959 }
960 if (mask != NULL)
961 (void)sigprocmask1(p, SIG_SETMASK, mask, &oldmask);
962
963 retry:
964 ncoll = nselcoll;
965 l->l_flag |= L_SELECT;
966 error = pollscan(l, (struct pollfd *)bits, nfds, retval);
967 if (error || *retval)
968 goto done;
969 if (tv) {
970 /*
971 * We have to recalculate the timeout on every retry.
972 */
973 timo = hzto(tv);
974 if (timo <= 0)
975 goto done;
976 }
977 s = splsched();
978 if ((l->l_flag & L_SELECT) == 0 || nselcoll != ncoll) {
979 splx(s);
980 goto retry;
981 }
982 l->l_flag &= ~L_SELECT;
983 error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "poll", timo);
984 splx(s);
985 if (error == 0)
986 goto retry;
987 done:
988 if (mask != NULL)
989 (void)sigprocmask1(p, SIG_SETMASK, &oldmask, NULL);
990 l->l_flag &= ~L_SELECT;
991 /* poll is not restarted after signals... */
992 if (error == ERESTART)
993 error = EINTR;
994 if (error == EWOULDBLOCK)
995 error = 0;
996 if (error == 0) {
997 error = copyout(bits, u_fds, ni);
998 if (error)
999 goto out;
1000 }
1001 out:
1002 if (ni > sizeof(smallbits))
1003 free(bits, M_TEMP);
1004 return (error);
1005 }
1006
1007 int
1008 pollscan(struct lwp *l, struct pollfd *fds, int nfd, register_t *retval)
1009 {
1010 struct proc *p = l->l_proc;
1011 struct filedesc *fdp;
1012 int i, n;
1013 struct file *fp;
1014
1015 fdp = p->p_fd;
1016 n = 0;
1017 for (i = 0; i < nfd; i++, fds++) {
1018 if (fds->fd >= fdp->fd_nfiles) {
1019 fds->revents = POLLNVAL;
1020 n++;
1021 } else if (fds->fd < 0) {
1022 fds->revents = 0;
1023 } else {
1024 if ((fp = fd_getfile(fdp, fds->fd)) == NULL) {
1025 fds->revents = POLLNVAL;
1026 n++;
1027 } else {
1028 FILE_USE(fp);
1029 fds->revents = (*fp->f_ops->fo_poll)(fp,
1030 fds->events | POLLERR | POLLHUP, l);
1031 if (fds->revents != 0)
1032 n++;
1033 FILE_UNUSE(fp, l);
1034 }
1035 }
1036 }
1037 *retval = n;
1038 return (0);
1039 }
1040
1041 /*ARGSUSED*/
1042 int
1043 seltrue(dev_t dev, int events, struct lwp *l)
1044 {
1045
1046 return (events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM));
1047 }
1048
1049 /*
1050 * Record a select request.
1051 */
1052 void
1053 selrecord(struct lwp *selector, struct selinfo *sip)
1054 {
1055 struct lwp *l;
1056 struct proc *p;
1057 pid_t mypid;
1058
1059 mypid = selector->l_proc->p_pid;
1060 if (sip->sel_pid == mypid)
1061 return;
1062 if (sip->sel_pid && (p = pfind(sip->sel_pid))) {
1063 LIST_FOREACH(l, &p->p_lwps, l_sibling) {
1064 if (l->l_wchan == (caddr_t)&selwait) {
1065 sip->sel_collision = 1;
1066 return;
1067 }
1068 }
1069 }
1070
1071 sip->sel_pid = mypid;
1072 }
1073
1074 /*
1075 * Do a wakeup when a selectable event occurs.
1076 */
1077 void
1078 selwakeup(sip)
1079 struct selinfo *sip;
1080 {
1081 struct lwp *l;
1082 struct proc *p;
1083 int s;
1084
1085 if (sip->sel_pid == 0)
1086 return;
1087 if (sip->sel_collision) {
1088 sip->sel_pid = 0;
1089 nselcoll++;
1090 sip->sel_collision = 0;
1091 wakeup((caddr_t)&selwait);
1092 return;
1093 }
1094 p = pfind(sip->sel_pid);
1095 sip->sel_pid = 0;
1096 if (p != NULL) {
1097 LIST_FOREACH(l, &p->p_lwps, l_sibling) {
1098 SCHED_LOCK(s);
1099 if (l->l_wchan == (caddr_t)&selwait) {
1100 if (l->l_stat == LSSLEEP)
1101 setrunnable(l);
1102 else
1103 unsleep(l);
1104 } else if (l->l_flag & L_SELECT)
1105 l->l_flag &= ~L_SELECT;
1106 SCHED_UNLOCK(s);
1107 }
1108 }
1109 }
1110