sys_generic.c revision 1.1.1.3 1 /*
2 * Copyright (c) 1982, 1986, 1989, 1993
3 * The Regents of the University of California. All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the University of
21 * California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 * may be used to endorse or promote products derived from this software
24 * without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 * @(#)sys_generic.c 8.9 (Berkeley) 2/14/95
39 */
40
41 #include <sys/param.h>
42 #include <sys/systm.h>
43 #include <sys/filedesc.h>
44 #include <sys/ioctl.h>
45 #include <sys/file.h>
46 #include <sys/proc.h>
47 #include <sys/socketvar.h>
48 #include <sys/uio.h>
49 #include <sys/kernel.h>
50 #include <sys/stat.h>
51 #include <sys/malloc.h>
52 #ifdef KTRACE
53 #include <sys/ktrace.h>
54 #endif
55
56 #include <sys/mount.h>
57 #include <sys/syscallargs.h>
58
59 /*
60 * Read system call.
61 */
62 /* ARGSUSED */
63 int
64 read(p, uap, retval)
65 struct proc *p;
66 register struct read_args /* {
67 syscallarg(int) fd;
68 syscallarg(char *) buf;
69 syscallarg(u_int) nbyte;
70 } */ *uap;
71 register_t *retval;
72 {
73 register struct file *fp;
74 register struct filedesc *fdp = p->p_fd;
75 struct uio auio;
76 struct iovec aiov;
77 long cnt, error = 0;
78 #ifdef KTRACE
79 struct iovec ktriov;
80 #endif
81
82 if (((u_int)SCARG(uap, fd)) >= fdp->fd_nfiles ||
83 (fp = fdp->fd_ofiles[SCARG(uap, fd)]) == NULL ||
84 (fp->f_flag & FREAD) == 0)
85 return (EBADF);
86 aiov.iov_base = (caddr_t)SCARG(uap, buf);
87 aiov.iov_len = SCARG(uap, nbyte);
88 auio.uio_iov = &aiov;
89 auio.uio_iovcnt = 1;
90 auio.uio_resid = SCARG(uap, nbyte);
91 auio.uio_rw = UIO_READ;
92 auio.uio_segflg = UIO_USERSPACE;
93 auio.uio_procp = p;
94 #ifdef KTRACE
95 /*
96 * if tracing, save a copy of iovec
97 */
98 if (KTRPOINT(p, KTR_GENIO))
99 ktriov = aiov;
100 #endif
101 cnt = SCARG(uap, nbyte);
102 if (error = (*fp->f_ops->fo_read)(fp, &auio, fp->f_cred))
103 if (auio.uio_resid != cnt && (error == ERESTART ||
104 error == EINTR || error == EWOULDBLOCK))
105 error = 0;
106 cnt -= auio.uio_resid;
107 #ifdef KTRACE
108 if (KTRPOINT(p, KTR_GENIO) && error == 0)
109 ktrgenio(p->p_tracep, SCARG(uap, fd), UIO_READ, &ktriov,
110 cnt, error);
111 #endif
112 *retval = cnt;
113 return (error);
114 }
115
116 /*
117 * Scatter read system call.
118 */
119 int
120 readv(p, uap, retval)
121 struct proc *p;
122 register struct readv_args /* {
123 syscallarg(int) fd;
124 syscallarg(struct iovec *) iovp;
125 syscallarg(u_int) iovcnt;
126 } */ *uap;
127 register_t *retval;
128 {
129 register struct file *fp;
130 register struct filedesc *fdp = p->p_fd;
131 struct uio auio;
132 register struct iovec *iov;
133 struct iovec *needfree;
134 struct iovec aiov[UIO_SMALLIOV];
135 long i, cnt, error = 0;
136 u_int iovlen;
137 #ifdef KTRACE
138 struct iovec *ktriov = NULL;
139 #endif
140
141 if (((u_int)SCARG(uap, fd)) >= fdp->fd_nfiles ||
142 (fp = fdp->fd_ofiles[SCARG(uap, fd)]) == NULL ||
143 (fp->f_flag & FREAD) == 0)
144 return (EBADF);
145 /* note: can't use iovlen until iovcnt is validated */
146 iovlen = SCARG(uap, iovcnt) * sizeof (struct iovec);
147 if (SCARG(uap, iovcnt) > UIO_SMALLIOV) {
148 if (SCARG(uap, iovcnt) > UIO_MAXIOV)
149 return (EINVAL);
150 MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK);
151 needfree = iov;
152 } else {
153 iov = aiov;
154 needfree = NULL;
155 }
156 auio.uio_iov = iov;
157 auio.uio_iovcnt = SCARG(uap, iovcnt);
158 auio.uio_rw = UIO_READ;
159 auio.uio_segflg = UIO_USERSPACE;
160 auio.uio_procp = p;
161 if (error = copyin((caddr_t)SCARG(uap, iovp), (caddr_t)iov, iovlen))
162 goto done;
163 auio.uio_resid = 0;
164 for (i = 0; i < SCARG(uap, iovcnt); i++) {
165 if (auio.uio_resid + iov->iov_len < auio.uio_resid) {
166 error = EINVAL;
167 goto done;
168 }
169 auio.uio_resid += iov->iov_len;
170 iov++;
171 }
172 #ifdef KTRACE
173 /*
174 * if tracing, save a copy of iovec
175 */
176 if (KTRPOINT(p, KTR_GENIO)) {
177 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
178 bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen);
179 }
180 #endif
181 cnt = auio.uio_resid;
182 if (error = (*fp->f_ops->fo_read)(fp, &auio, fp->f_cred))
183 if (auio.uio_resid != cnt && (error == ERESTART ||
184 error == EINTR || error == EWOULDBLOCK))
185 error = 0;
186 cnt -= auio.uio_resid;
187 #ifdef KTRACE
188 if (ktriov != NULL) {
189 if (error == 0)
190 ktrgenio(p->p_tracep, SCARG(uap, fd), UIO_READ, ktriov,
191 cnt, error);
192 FREE(ktriov, M_TEMP);
193 }
194 #endif
195 *retval = cnt;
196 done:
197 if (needfree)
198 FREE(needfree, M_IOV);
199 return (error);
200 }
201
202 /*
203 * Write system call
204 */
205 int
206 write(p, uap, retval)
207 struct proc *p;
208 register struct write_args /* {
209 syscallarg(int) fd;
210 syscallarg(char *) buf;
211 syscallarg(u_int) nbyte;
212 } */ *uap;
213 register_t *retval;
214 {
215 register struct file *fp;
216 register struct filedesc *fdp = p->p_fd;
217 struct uio auio;
218 struct iovec aiov;
219 long cnt, error = 0;
220 #ifdef KTRACE
221 struct iovec ktriov;
222 #endif
223
224 if (((u_int)SCARG(uap, fd)) >= fdp->fd_nfiles ||
225 (fp = fdp->fd_ofiles[SCARG(uap, fd)]) == NULL ||
226 (fp->f_flag & FWRITE) == 0)
227 return (EBADF);
228 aiov.iov_base = (caddr_t)SCARG(uap, buf);
229 aiov.iov_len = SCARG(uap, nbyte);
230 auio.uio_iov = &aiov;
231 auio.uio_iovcnt = 1;
232 auio.uio_resid = SCARG(uap, nbyte);
233 auio.uio_rw = UIO_WRITE;
234 auio.uio_segflg = UIO_USERSPACE;
235 auio.uio_procp = p;
236 #ifdef KTRACE
237 /*
238 * if tracing, save a copy of iovec
239 */
240 if (KTRPOINT(p, KTR_GENIO))
241 ktriov = aiov;
242 #endif
243 cnt = SCARG(uap, nbyte);
244 if (error = (*fp->f_ops->fo_write)(fp, &auio, fp->f_cred)) {
245 if (auio.uio_resid != cnt && (error == ERESTART ||
246 error == EINTR || error == EWOULDBLOCK))
247 error = 0;
248 if (error == EPIPE)
249 psignal(p, SIGPIPE);
250 }
251 cnt -= auio.uio_resid;
252 #ifdef KTRACE
253 if (KTRPOINT(p, KTR_GENIO) && error == 0)
254 ktrgenio(p->p_tracep, SCARG(uap, fd), UIO_WRITE,
255 &ktriov, cnt, error);
256 #endif
257 *retval = cnt;
258 return (error);
259 }
260
261 /*
262 * Gather write system call
263 */
264 int
265 writev(p, uap, retval)
266 struct proc *p;
267 register struct writev_args /* {
268 syscallarg(int) fd;
269 syscallarg(struct iovec *) iovp;
270 syscallarg(u_int) iovcnt;
271 } */ *uap;
272 register_t *retval;
273 {
274 register struct file *fp;
275 register struct filedesc *fdp = p->p_fd;
276 struct uio auio;
277 register struct iovec *iov;
278 struct iovec *needfree;
279 struct iovec aiov[UIO_SMALLIOV];
280 long i, cnt, error = 0;
281 u_int iovlen;
282 #ifdef KTRACE
283 struct iovec *ktriov = NULL;
284 #endif
285
286 if (((u_int)SCARG(uap, fd)) >= fdp->fd_nfiles ||
287 (fp = fdp->fd_ofiles[SCARG(uap, fd)]) == NULL ||
288 (fp->f_flag & FWRITE) == 0)
289 return (EBADF);
290 /* note: can't use iovlen until iovcnt is validated */
291 iovlen = SCARG(uap, iovcnt) * sizeof (struct iovec);
292 if (SCARG(uap, iovcnt) > UIO_SMALLIOV) {
293 if (SCARG(uap, iovcnt) > UIO_MAXIOV)
294 return (EINVAL);
295 MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK);
296 needfree = iov;
297 } else {
298 iov = aiov;
299 needfree = NULL;
300 }
301 auio.uio_iov = iov;
302 auio.uio_iovcnt = SCARG(uap, iovcnt);
303 auio.uio_rw = UIO_WRITE;
304 auio.uio_segflg = UIO_USERSPACE;
305 auio.uio_procp = p;
306 if (error = copyin((caddr_t)SCARG(uap, iovp), (caddr_t)iov, iovlen))
307 goto done;
308 auio.uio_resid = 0;
309 for (i = 0; i < SCARG(uap, iovcnt); i++) {
310 if (auio.uio_resid + iov->iov_len < auio.uio_resid) {
311 error = EINVAL;
312 goto done;
313 }
314 auio.uio_resid += iov->iov_len;
315 iov++;
316 }
317 #ifdef KTRACE
318 /*
319 * if tracing, save a copy of iovec
320 */
321 if (KTRPOINT(p, KTR_GENIO)) {
322 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
323 bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen);
324 }
325 #endif
326 cnt = auio.uio_resid;
327 if (error = (*fp->f_ops->fo_write)(fp, &auio, fp->f_cred)) {
328 if (auio.uio_resid != cnt && (error == ERESTART ||
329 error == EINTR || error == EWOULDBLOCK))
330 error = 0;
331 if (error == EPIPE)
332 psignal(p, SIGPIPE);
333 }
334 cnt -= auio.uio_resid;
335 #ifdef KTRACE
336 if (ktriov != NULL) {
337 if (error == 0)
338 ktrgenio(p->p_tracep, SCARG(uap, fd), UIO_WRITE,
339 ktriov, cnt, error);
340 FREE(ktriov, M_TEMP);
341 }
342 #endif
343 *retval = cnt;
344 done:
345 if (needfree)
346 FREE(needfree, M_IOV);
347 return (error);
348 }
349
350 /*
351 * Ioctl system call
352 */
353 /* ARGSUSED */
354 int
355 ioctl(p, uap, retval)
356 struct proc *p;
357 register struct ioctl_args /* {
358 syscallarg(int) fd;
359 syscallarg(u_long) com;
360 syscallarg(caddr_t) data;
361 } */ *uap;
362 register_t *retval;
363 {
364 register struct file *fp;
365 register struct filedesc *fdp;
366 register u_long com;
367 register int error;
368 register u_int size;
369 caddr_t data, memp;
370 int tmp;
371 #define STK_PARAMS 128
372 char stkbuf[STK_PARAMS];
373
374 fdp = p->p_fd;
375 if ((u_int)SCARG(uap, fd) >= fdp->fd_nfiles ||
376 (fp = fdp->fd_ofiles[SCARG(uap, fd)]) == NULL)
377 return (EBADF);
378
379 if ((fp->f_flag & (FREAD | FWRITE)) == 0)
380 return (EBADF);
381
382 switch (com = SCARG(uap, com)) {
383 case FIONCLEX:
384 fdp->fd_ofileflags[SCARG(uap, fd)] &= ~UF_EXCLOSE;
385 return (0);
386 case FIOCLEX:
387 fdp->fd_ofileflags[SCARG(uap, fd)] |= UF_EXCLOSE;
388 return (0);
389 }
390
391 /*
392 * Interpret high order word to find amount of data to be
393 * copied to/from the user's address space.
394 */
395 size = IOCPARM_LEN(com);
396 if (size > IOCPARM_MAX)
397 return (ENOTTY);
398 memp = NULL;
399 if (size > sizeof (stkbuf)) {
400 memp = (caddr_t)malloc((u_long)size, M_IOCTLOPS, M_WAITOK);
401 data = memp;
402 } else
403 data = stkbuf;
404 if (com&IOC_IN) {
405 if (size) {
406 error = copyin(SCARG(uap, data), data, (u_int)size);
407 if (error) {
408 if (memp)
409 free(memp, M_IOCTLOPS);
410 return (error);
411 }
412 } else
413 *(caddr_t *)data = SCARG(uap, data);
414 } else if ((com&IOC_OUT) && size)
415 /*
416 * Zero the buffer so the user always
417 * gets back something deterministic.
418 */
419 bzero(data, size);
420 else if (com&IOC_VOID)
421 *(caddr_t *)data = SCARG(uap, data);
422
423 switch (com) {
424
425 case FIONBIO:
426 if (tmp = *(int *)data)
427 fp->f_flag |= FNONBLOCK;
428 else
429 fp->f_flag &= ~FNONBLOCK;
430 error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p);
431 break;
432
433 case FIOASYNC:
434 if (tmp = *(int *)data)
435 fp->f_flag |= FASYNC;
436 else
437 fp->f_flag &= ~FASYNC;
438 error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, (caddr_t)&tmp, p);
439 break;
440
441 case FIOSETOWN:
442 tmp = *(int *)data;
443 if (fp->f_type == DTYPE_SOCKET) {
444 ((struct socket *)fp->f_data)->so_pgid = tmp;
445 error = 0;
446 break;
447 }
448 if (tmp <= 0) {
449 tmp = -tmp;
450 } else {
451 struct proc *p1 = pfind(tmp);
452 if (p1 == 0) {
453 error = ESRCH;
454 break;
455 }
456 tmp = p1->p_pgrp->pg_id;
457 }
458 error = (*fp->f_ops->fo_ioctl)
459 (fp, TIOCSPGRP, (caddr_t)&tmp, p);
460 break;
461
462 case FIOGETOWN:
463 if (fp->f_type == DTYPE_SOCKET) {
464 error = 0;
465 *(int *)data = ((struct socket *)fp->f_data)->so_pgid;
466 break;
467 }
468 error = (*fp->f_ops->fo_ioctl)(fp, TIOCGPGRP, data, p);
469 *(int *)data = -*(int *)data;
470 break;
471
472 default:
473 error = (*fp->f_ops->fo_ioctl)(fp, com, data, p);
474 /*
475 * Copy any data to user, size was
476 * already set and checked above.
477 */
478 if (error == 0 && (com&IOC_OUT) && size)
479 error = copyout(data, SCARG(uap, data), (u_int)size);
480 break;
481 }
482 if (memp)
483 free(memp, M_IOCTLOPS);
484 return (error);
485 }
486
487 int selwait, nselcoll;
488
489 /*
490 * Select system call.
491 */
492 int
493 select(p, uap, retval)
494 register struct proc *p;
495 register struct select_args /* {
496 syscallarg(u_int) nd;
497 syscallarg(fd_set *) in;
498 syscallarg(fd_set *) ou;
499 syscallarg(fd_set *) ex;
500 syscallarg(struct timeval *) tv;
501 } */ *uap;
502 register_t *retval;
503 {
504 fd_set ibits[3], obits[3];
505 struct timeval atv;
506 int s, ncoll, error, timo = 0;
507 u_int ni;
508
509 bzero((caddr_t)ibits, sizeof(ibits));
510 bzero((caddr_t)obits, sizeof(obits));
511 if (SCARG(uap, nd) > FD_SETSIZE)
512 return (EINVAL);
513 if (SCARG(uap, nd) > p->p_fd->fd_nfiles) {
514 /* forgiving; slightly wrong */
515 SCARG(uap, nd) = p->p_fd->fd_nfiles;
516 }
517 ni = howmany(SCARG(uap, nd), NFDBITS) * sizeof(fd_mask);
518
519 #define getbits(name, x) \
520 if (SCARG(uap, name) && (error = copyin((caddr_t)SCARG(uap, name), \
521 (caddr_t)&ibits[x], ni))) \
522 goto done;
523 getbits(in, 0);
524 getbits(ou, 1);
525 getbits(ex, 2);
526 #undef getbits
527
528 if (SCARG(uap, tv)) {
529 error = copyin((caddr_t)SCARG(uap, tv), (caddr_t)&atv,
530 sizeof (atv));
531 if (error)
532 goto done;
533 if (itimerfix(&atv)) {
534 error = EINVAL;
535 goto done;
536 }
537 s = splclock();
538 timevaladd(&atv, (struct timeval *)&time);
539 splx(s);
540 }
541 retry:
542 ncoll = nselcoll;
543 p->p_flag |= P_SELECT;
544 error = selscan(p, ibits, obits, SCARG(uap, nd), retval);
545 if (error || *retval)
546 goto done;
547 s = splhigh();
548 if (SCARG(uap, tv)) {
549 if (timercmp(&time, &atv, >=)) {
550 splx(s);
551 goto done;
552 }
553 /*
554 * If poll wait was tiny, this could be zero; we will
555 * have to round it up to avoid sleeping forever. If
556 * we retry below, the timercmp above will get us out.
557 * Note that if wait was 0, the timercmp will prevent
558 * us from getting here the first time.
559 */
560 timo = hzto(&atv);
561 if (timo == 0)
562 timo = 1;
563 }
564 if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) {
565 splx(s);
566 goto retry;
567 }
568 p->p_flag &= ~P_SELECT;
569 error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "select", timo);
570 splx(s);
571 if (error == 0)
572 goto retry;
573 done:
574 p->p_flag &= ~P_SELECT;
575 /* select is not restarted after signals... */
576 if (error == ERESTART)
577 error = EINTR;
578 if (error == EWOULDBLOCK)
579 error = 0;
580 #define putbits(name, x) \
581 if (SCARG(uap, name) && (error2 = copyout((caddr_t)&obits[x], \
582 (caddr_t)SCARG(uap, name), ni))) \
583 error = error2;
584 if (error == 0) {
585 int error2;
586
587 putbits(in, 0);
588 putbits(ou, 1);
589 putbits(ex, 2);
590 #undef putbits
591 }
592 return (error);
593 }
594
595 int
596 selscan(p, ibits, obits, nfd, retval)
597 struct proc *p;
598 fd_set *ibits, *obits;
599 int nfd;
600 register_t *retval;
601 {
602 register struct filedesc *fdp = p->p_fd;
603 register int msk, i, j, fd;
604 register fd_mask bits;
605 struct file *fp;
606 int n = 0;
607 static int flag[3] = { FREAD, FWRITE, 0 };
608
609 for (msk = 0; msk < 3; msk++) {
610 for (i = 0; i < nfd; i += NFDBITS) {
611 bits = ibits[msk].fds_bits[i/NFDBITS];
612 while ((j = ffs(bits)) && (fd = i + --j) < nfd) {
613 bits &= ~(1 << j);
614 fp = fdp->fd_ofiles[fd];
615 if (fp == NULL)
616 return (EBADF);
617 if ((*fp->f_ops->fo_select)(fp, flag[msk], p)) {
618 FD_SET(fd, &obits[msk]);
619 n++;
620 }
621 }
622 }
623 }
624 *retval = n;
625 return (0);
626 }
627
628 /*ARGSUSED*/
629 int
630 seltrue(dev, flag, p)
631 dev_t dev;
632 int flag;
633 struct proc *p;
634 {
635
636 return (1);
637 }
638
639 /*
640 * Record a select request.
641 */
642 void
643 selrecord(selector, sip)
644 struct proc *selector;
645 struct selinfo *sip;
646 {
647 struct proc *p;
648 pid_t mypid;
649
650 mypid = selector->p_pid;
651 if (sip->si_pid == mypid)
652 return;
653 if (sip->si_pid && (p = pfind(sip->si_pid)) &&
654 p->p_wchan == (caddr_t)&selwait)
655 sip->si_flags |= SI_COLL;
656 else
657 sip->si_pid = mypid;
658 }
659
660 /*
661 * Do a wakeup when a selectable event occurs.
662 */
663 void
664 selwakeup(sip)
665 register struct selinfo *sip;
666 {
667 register struct proc *p;
668 int s;
669
670 if (sip->si_pid == 0)
671 return;
672 if (sip->si_flags & SI_COLL) {
673 nselcoll++;
674 sip->si_flags &= ~SI_COLL;
675 wakeup((caddr_t)&selwait);
676 }
677 p = pfind(sip->si_pid);
678 sip->si_pid = 0;
679 if (p != NULL) {
680 s = splhigh();
681 if (p->p_wchan == (caddr_t)&selwait) {
682 if (p->p_stat == SSLEEP)
683 setrunnable(p);
684 else
685 unsleep(p);
686 } else if (p->p_flag & P_SELECT)
687 p->p_flag &= ~P_SELECT;
688 splx(s);
689 }
690 }
691