kern_descrip.c revision 1.67.4.8 1 /* $NetBSD: kern_descrip.c,v 1.67.4.8 2002/04/27 15:52:56 he Exp $ */
2
3 /*
4 * Copyright (c) 1982, 1986, 1989, 1991, 1993
5 * The Regents of the University of California. All rights reserved.
6 * (c) UNIX System Laboratories, Inc.
7 * All or some portions of this file are derived from material licensed
8 * to the University of California by American Telephone and Telegraph
9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
10 * the permission of UNIX System Laboratories, Inc.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. All advertising materials mentioning features or use of this software
21 * must display the following acknowledgement:
22 * This product includes software developed by the University of
23 * California, Berkeley and its contributors.
24 * 4. Neither the name of the University nor the names of its contributors
25 * may be used to endorse or promote products derived from this software
26 * without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
29 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
32 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
33 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
34 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
35 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
37 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38 * SUCH DAMAGE.
39 *
40 * @(#)kern_descrip.c 8.8 (Berkeley) 2/14/95
41 */
42
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/filedesc.h>
46 #include <sys/kernel.h>
47 #include <sys/vnode.h>
48 #include <sys/proc.h>
49 #include <sys/file.h>
50 #include <sys/namei.h>
51 #include <sys/socket.h>
52 #include <sys/socketvar.h>
53 #include <sys/stat.h>
54 #include <sys/ioctl.h>
55 #include <sys/fcntl.h>
56 #include <sys/malloc.h>
57 #include <sys/pool.h>
58 #include <sys/syslog.h>
59 #include <sys/unistd.h>
60 #include <sys/resourcevar.h>
61 #include <sys/conf.h>
62
63 #include <sys/mount.h>
64 #include <sys/syscallargs.h>
65
66 #include <vm/vm.h>
67
68 /*
69 * Descriptor management.
70 */
71 struct filelist filehead; /* head of list of open files */
72 int nfiles; /* actual number of open files */
73 struct pool file_pool; /* memory pool for file structures */
74 struct pool cwdi_pool; /* memory pool for cwdinfo structures */
75 struct pool filedesc0_pool; /* memory pool for filedesc0 structures */
76
77 static __inline void fd_used __P((struct filedesc *, int));
78 static __inline void fd_unused __P((struct filedesc *, int));
79 int finishdup __P((struct proc *, int, int, register_t *));
80
81 static __inline void
82 fd_used(fdp, fd)
83 struct filedesc *fdp;
84 int fd;
85 {
86
87 if (fd > fdp->fd_lastfile)
88 fdp->fd_lastfile = fd;
89 }
90
91 static __inline void
92 fd_unused(fdp, fd)
93 struct filedesc *fdp;
94 int fd;
95 {
96
97 if (fd < fdp->fd_freefile)
98 fdp->fd_freefile = fd;
99 #ifdef DIAGNOSTIC
100 if (fd > fdp->fd_lastfile)
101 panic("fd_unused: fd_lastfile inconsistent");
102 #endif
103 if (fd == fdp->fd_lastfile) {
104 do {
105 fd--;
106 } while (fd >= 0 && fdp->fd_ofiles[fd] == NULL);
107 fdp->fd_lastfile = fd;
108 }
109 }
110
111 /*
112 * System calls on descriptors.
113 */
114
115 /*
116 * Duplicate a file descriptor.
117 */
118 /* ARGSUSED */
119 int
120 sys_dup(p, v, retval)
121 struct proc *p;
122 void *v;
123 register_t *retval;
124 {
125 struct sys_dup_args /* {
126 syscallarg(int) fd;
127 } */ *uap = v;
128 struct file *fp;
129 struct filedesc *fdp = p->p_fd;
130 int old = SCARG(uap, fd);
131 int new;
132 int error;
133
134 restart:
135 if ((u_int)old >= fdp->fd_nfiles ||
136 (fp = fdp->fd_ofiles[old]) == NULL ||
137 (fp->f_iflags & FIF_WANTCLOSE) != 0)
138 return (EBADF);
139
140 FILE_USE(fp);
141
142 if ((error = fdalloc(p, 0, &new)) != 0) {
143 FILE_UNUSE(fp, p);
144 if (error == ERESTART)
145 goto restart;
146 return (error);
147 }
148
149 /* finishdup() will unuse the descriptors for us */
150 return (finishdup(p, old, new, retval));
151 }
152
153 /*
154 * Duplicate a file descriptor to a particular value.
155 */
156 /* ARGSUSED */
157 int
158 sys_dup2(p, v, retval)
159 struct proc *p;
160 void *v;
161 register_t *retval;
162 {
163 struct sys_dup2_args /* {
164 syscallarg(int) from;
165 syscallarg(int) to;
166 } */ *uap = v;
167 struct file *fp;
168 struct filedesc *fdp = p->p_fd;
169 int old = SCARG(uap, from), new = SCARG(uap, to);
170 int i, error;
171
172 restart:
173 if ((u_int)old >= fdp->fd_nfiles ||
174 (fp = fdp->fd_ofiles[old]) == NULL ||
175 (fp->f_iflags & FIF_WANTCLOSE) != 0 ||
176 (u_int)new >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
177 (u_int)new >= maxfiles)
178 return (EBADF);
179 if (old == new) {
180 *retval = new;
181 return (0);
182 }
183
184 FILE_USE(fp);
185
186 if (new >= fdp->fd_nfiles) {
187 if ((error = fdalloc(p, new, &i)) != 0) {
188 FILE_UNUSE(fp, p);
189 if (error == ERESTART)
190 goto restart;
191 return (error);
192 }
193 if (new != i)
194 panic("dup2: fdalloc");
195 }
196
197 /*
198 * finishdup() will close the file that's in the `new'
199 * slot, if there's one there.
200 */
201
202 /* finishdup() will unuse the descriptors for us */
203 return (finishdup(p, old, new, retval));
204 }
205
206 int fcntl_forfs __P((int, struct proc *, int, void *));
207
208 /*
209 * The file control system call.
210 */
211 /* ARGSUSED */
212 int
213 sys_fcntl(p, v, retval)
214 struct proc *p;
215 void *v;
216 register_t *retval;
217 {
218 struct sys_fcntl_args /* {
219 syscallarg(int) fd;
220 syscallarg(int) cmd;
221 syscallarg(void *) arg;
222 } */ *uap = v;
223 int fd = SCARG(uap, fd);
224 struct filedesc *fdp = p->p_fd;
225 struct file *fp;
226 struct vnode *vp;
227 int i, tmp, error = 0, flg = F_POSIX, cmd;
228 struct flock fl;
229 int newmin;
230
231 restart:
232 if ((u_int)fd >= fdp->fd_nfiles ||
233 (fp = fdp->fd_ofiles[fd]) == NULL ||
234 (fp->f_iflags & FIF_WANTCLOSE) != 0)
235 return (EBADF);
236
237 FILE_USE(fp);
238
239 cmd = SCARG(uap, cmd);
240 if ((cmd & F_FSCTL)) {
241 error = fcntl_forfs(fd, p, cmd, SCARG(uap, arg));
242 goto out;
243 }
244
245 switch (cmd) {
246
247 case F_DUPFD:
248 newmin = (long)SCARG(uap, arg);
249 if ((u_int)newmin >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
250 (u_int)newmin >= maxfiles) {
251 error = EINVAL;
252 goto out;
253 }
254 if ((error = fdalloc(p, newmin, &i)) != 0) {
255 if (error == ERESTART) {
256 FILE_UNUSE(fp, p);
257 goto restart;
258 }
259 goto out;
260 }
261
262 /* finishdup() will unuse the descriptors for us */
263 return (finishdup(p, fd, i, retval));
264
265 case F_GETFD:
266 *retval = fdp->fd_ofileflags[fd] & UF_EXCLOSE ? 1 : 0;
267 break;
268
269 case F_SETFD:
270 if ((long)SCARG(uap, arg) & 1)
271 fdp->fd_ofileflags[fd] |= UF_EXCLOSE;
272 else
273 fdp->fd_ofileflags[fd] &= ~UF_EXCLOSE;
274 break;
275
276 case F_GETFL:
277 *retval = OFLAGS(fp->f_flag);
278 break;
279
280 case F_SETFL:
281 tmp = FFLAGS((long)SCARG(uap, arg)) & FCNTLFLAGS;
282 error = (*fp->f_ops->fo_fcntl)(fp, F_SETFL, (caddr_t)&tmp, p);
283 if (error)
284 goto out;
285 fp->f_flag &= ~FCNTLFLAGS;
286 fp->f_flag |= tmp;
287 tmp = fp->f_flag & FNONBLOCK;
288 error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p);
289 if (error)
290 goto out;
291 tmp = fp->f_flag & FASYNC;
292 error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, (caddr_t)&tmp, p);
293 if (error == 0)
294 goto out;
295 fp->f_flag &= ~FNONBLOCK;
296 tmp = 0;
297 (void) (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p);
298 break;
299
300 case F_GETOWN:
301 if (fp->f_type == DTYPE_SOCKET) {
302 *retval = ((struct socket *)fp->f_data)->so_pgid;
303 goto out;
304 }
305 error = (*fp->f_ops->fo_ioctl)
306 (fp, TIOCGPGRP, (caddr_t)&tmp, p);
307 *retval = -tmp;
308 break;
309
310 case F_SETOWN:
311 if (fp->f_type == DTYPE_SOCKET) {
312 ((struct socket *)fp->f_data)->so_pgid =
313 (long)SCARG(uap, arg);
314 goto out;
315 }
316 if ((long)SCARG(uap, arg) <= 0) {
317 tmp = (-(long)SCARG(uap, arg));
318 } else {
319 struct proc *p1 = pfind((long)SCARG(uap, arg));
320 if (p1 == 0) {
321 error = ESRCH;
322 goto out;
323 }
324 tmp = (long)p1->p_pgrp->pg_id;
325 }
326 error = (*fp->f_ops->fo_ioctl)
327 (fp, TIOCSPGRP, (caddr_t)&tmp, p);
328 break;
329
330 case F_SETLKW:
331 flg |= F_WAIT;
332 /* Fall into F_SETLK */
333
334 case F_SETLK:
335 if (fp->f_type != DTYPE_VNODE) {
336 error = EINVAL;
337 goto out;
338 }
339 vp = (struct vnode *)fp->f_data;
340 /* Copy in the lock structure */
341 error = copyin((caddr_t)SCARG(uap, arg), (caddr_t)&fl,
342 sizeof(fl));
343 if (error)
344 goto out;
345 if (fl.l_whence == SEEK_CUR)
346 fl.l_start += fp->f_offset;
347 switch (fl.l_type) {
348 case F_RDLCK:
349 if ((fp->f_flag & FREAD) == 0) {
350 error = EBADF;
351 goto out;
352 }
353 p->p_flag |= P_ADVLOCK;
354 error = VOP_ADVLOCK(vp, (caddr_t)p, F_SETLK, &fl, flg);
355 goto out;
356
357 case F_WRLCK:
358 if ((fp->f_flag & FWRITE) == 0) {
359 error = EBADF;
360 goto out;
361 }
362 p->p_flag |= P_ADVLOCK;
363 error = VOP_ADVLOCK(vp, (caddr_t)p, F_SETLK, &fl, flg);
364 goto out;
365
366 case F_UNLCK:
367 error = VOP_ADVLOCK(vp, (caddr_t)p, F_UNLCK, &fl,
368 F_POSIX);
369 goto out;
370
371 default:
372 error = EINVAL;
373 goto out;
374 }
375
376 case F_GETLK:
377 if (fp->f_type != DTYPE_VNODE) {
378 error = EINVAL;
379 goto out;
380 }
381 vp = (struct vnode *)fp->f_data;
382 /* Copy in the lock structure */
383 error = copyin((caddr_t)SCARG(uap, arg), (caddr_t)&fl,
384 sizeof(fl));
385 if (error)
386 goto out;
387 if (fl.l_whence == SEEK_CUR)
388 fl.l_start += fp->f_offset;
389 if (fl.l_type != F_RDLCK &&
390 fl.l_type != F_WRLCK &&
391 fl.l_type != F_UNLCK) {
392 error = EINVAL;
393 goto out;
394 }
395 error = VOP_ADVLOCK(vp, (caddr_t)p, F_GETLK, &fl, F_POSIX);
396 if (error)
397 goto out;
398 error = copyout((caddr_t)&fl, (caddr_t)SCARG(uap, arg),
399 sizeof(fl));
400 break;
401
402 default:
403 error = EINVAL;
404 }
405
406 out:
407 FILE_UNUSE(fp, p);
408 return (error);
409 }
410
411 /*
412 * Common code for dup, dup2, and fcntl(F_DUPFD).
413 */
414 int
415 finishdup(p, old, new, retval)
416 struct proc *p;
417 int old, new;
418 register_t *retval;
419 {
420 struct filedesc *fdp = p->p_fd;
421 struct file *fp, *delfp;
422
423 /*
424 * If there is a file in the new slot, remember it so we
425 * can close it after we've finished the dup. We need
426 * to do it after the dup is finished, since closing
427 * the file may block.
428 *
429 * Note: `old' is already used for us.
430 */
431 delfp = fdp->fd_ofiles[new];
432
433 fp = fdp->fd_ofiles[old];
434 fdp->fd_ofiles[new] = fp;
435 fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] &~ UF_EXCLOSE;
436 fp->f_count++;
437 /*
438 * Note, don't have to mark it "used" in the table if there
439 * was already a file in the `new' slot.
440 */
441 if (delfp == NULL)
442 fd_used(fdp, new);
443 *retval = new;
444 FILE_UNUSE(fp, p);
445 if (delfp != NULL) {
446 FILE_USE(delfp);
447 (void) closef(delfp, p);
448 }
449 return (0);
450 }
451
452 void
453 fdremove(fdp, fd)
454 struct filedesc *fdp;
455 int fd;
456 {
457
458 fdp->fd_ofiles[fd] = NULL;
459 fd_unused(fdp, fd);
460 }
461
462 int
463 fdrelease(p, fd)
464 struct proc *p;
465 int fd;
466 {
467 struct filedesc *fdp = p->p_fd;
468 struct file **fpp, *fp;
469
470 fpp = &fdp->fd_ofiles[fd];
471 fp = *fpp;
472 if (fp == NULL)
473 return (EBADF);
474
475 FILE_USE(fp);
476
477 *fpp = NULL;
478 fdp->fd_ofileflags[fd] = 0;
479 fd_unused(fdp, fd);
480 return (closef(fp, p));
481 }
482
483 /*
484 * Close a file descriptor.
485 */
486 /* ARGSUSED */
487 int
488 sys_close(p, v, retval)
489 struct proc *p;
490 void *v;
491 register_t *retval;
492 {
493 struct sys_close_args /* {
494 syscallarg(int) fd;
495 } */ *uap = v;
496 int fd = SCARG(uap, fd);
497 struct filedesc *fdp = p->p_fd;
498
499 if ((u_int)fd >= fdp->fd_nfiles)
500 return (EBADF);
501 return (fdrelease(p, fd));
502 }
503
504 /*
505 * Return status information about a file descriptor.
506 */
507 /* ARGSUSED */
508 int
509 sys___fstat13(p, v, retval)
510 struct proc *p;
511 void *v;
512 register_t *retval;
513 {
514 struct sys___fstat13_args /* {
515 syscallarg(int) fd;
516 syscallarg(struct stat *) sb;
517 } */ *uap = v;
518 int fd = SCARG(uap, fd);
519 struct filedesc *fdp = p->p_fd;
520 struct file *fp;
521 struct stat ub;
522 int error;
523
524 if ((u_int)fd >= fdp->fd_nfiles ||
525 (fp = fdp->fd_ofiles[fd]) == NULL ||
526 (fp->f_iflags & FIF_WANTCLOSE) != 0)
527 return (EBADF);
528
529 FILE_USE(fp);
530
531 switch (fp->f_type) {
532
533 case DTYPE_VNODE:
534 error = vn_stat((struct vnode *)fp->f_data, &ub, p);
535 break;
536
537 case DTYPE_SOCKET:
538 error = soo_stat((struct socket *)fp->f_data, &ub);
539 break;
540
541 default:
542 panic("fstat");
543 /*NOTREACHED*/
544 }
545 if (error == 0)
546 error = copyout(&ub, SCARG(uap, sb), sizeof(ub));
547 FILE_UNUSE(fp, p);
548 return (error);
549 }
550
551 /*
552 * Return pathconf information about a file descriptor.
553 */
554 /* ARGSUSED */
555 int
556 sys_fpathconf(p, v, retval)
557 struct proc *p;
558 void *v;
559 register_t *retval;
560 {
561 struct sys_fpathconf_args /* {
562 syscallarg(int) fd;
563 syscallarg(int) name;
564 } */ *uap = v;
565 int fd = SCARG(uap, fd);
566 struct filedesc *fdp = p->p_fd;
567 struct file *fp;
568 struct vnode *vp;
569 int error = 0;
570
571 if ((u_int)fd >= fdp->fd_nfiles ||
572 (fp = fdp->fd_ofiles[fd]) == NULL ||
573 (fp->f_iflags & FIF_WANTCLOSE) != 0)
574 return (EBADF);
575
576 FILE_USE(fp);
577
578 switch (fp->f_type) {
579
580 case DTYPE_SOCKET:
581 if (SCARG(uap, name) != _PC_PIPE_BUF)
582 error = EINVAL;
583 else
584 *retval = PIPE_BUF;
585 break;
586
587 case DTYPE_VNODE:
588 vp = (struct vnode *)fp->f_data;
589 error = VOP_PATHCONF(vp, SCARG(uap, name), retval);
590 break;
591
592 default:
593 panic("fpathconf");
594 }
595
596 FILE_UNUSE(fp, p);
597 return (error);
598 }
599
600 /*
601 * Allocate a file descriptor for the process.
602 */
603 int fdexpand;
604
605 int
606 fdalloc(p, want, result)
607 struct proc *p;
608 int want;
609 int *result;
610 {
611 struct filedesc *fdp = p->p_fd;
612 int i;
613 int lim, last, nfiles, rv = 0;
614 struct file **newofile;
615 char *newofileflags;
616
617 /*
618 * Search for a free descriptor starting at the higher
619 * of want or fd_freefile. If that fails, consider
620 * expanding the ofile array.
621 */
622 lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles);
623 for (;;) {
624 last = min(fdp->fd_nfiles, lim);
625 if ((i = want) < fdp->fd_freefile)
626 i = fdp->fd_freefile;
627 for (; i < last; i++) {
628 if (fdp->fd_ofiles[i] == NULL) {
629 fd_used(fdp, i);
630 if (want <= fdp->fd_freefile)
631 fdp->fd_freefile = i;
632 *result = i;
633 return (rv);
634 }
635 }
636
637 /*
638 * No space in current array. Expand?
639 */
640 if (fdp->fd_nfiles >= lim)
641 return (EMFILE);
642 if (fdp->fd_nfiles < NDEXTENT)
643 nfiles = NDEXTENT;
644 else
645 nfiles = 2 * fdp->fd_nfiles;
646 rv = ERESTART;
647 newofile = malloc(nfiles * OFILESIZE, M_FILEDESC, M_WAITOK);
648 newofileflags = (char *) &newofile[nfiles];
649 /*
650 * Copy the existing ofile and ofileflags arrays
651 * and zero the new portion of each array.
652 */
653 memcpy(newofile, fdp->fd_ofiles,
654 (i = sizeof(struct file *) * fdp->fd_nfiles));
655 memset((char *)newofile + i, 0, nfiles * sizeof(struct file *) - i);
656 memcpy(newofileflags, fdp->fd_ofileflags,
657 (i = sizeof(char) * fdp->fd_nfiles));
658 memset(newofileflags + i, 0, nfiles * sizeof(char) - i);
659 if (fdp->fd_nfiles > NDFILE)
660 free(fdp->fd_ofiles, M_FILEDESC);
661 fdp->fd_ofiles = newofile;
662 fdp->fd_ofileflags = newofileflags;
663 fdp->fd_nfiles = nfiles;
664 fdexpand++;
665 }
666 }
667
668 /*
669 * Check to see whether n user file descriptors
670 * are available to the process p.
671 */
672 int
673 fdavail(p, n)
674 struct proc *p;
675 int n;
676 {
677 struct filedesc *fdp = p->p_fd;
678 struct file **fpp;
679 int i, lim;
680
681 lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles);
682 if ((i = lim - fdp->fd_nfiles) > 0 && (n -= i) <= 0)
683 return (1);
684 fpp = &fdp->fd_ofiles[fdp->fd_freefile];
685 for (i = min(lim,fdp->fd_nfiles) - fdp->fd_freefile; --i >= 0; fpp++)
686 if (*fpp == NULL && --n <= 0)
687 return (1);
688 return (0);
689 }
690
691 /*
692 * Initialize the data structures necessary for managing files.
693 */
694 void
695 finit()
696 {
697
698 pool_init(&file_pool, sizeof(struct file), 0, 0, 0, "filepl",
699 0, pool_page_alloc_nointr, pool_page_free_nointr, M_FILE);
700 pool_init(&cwdi_pool, sizeof(struct cwdinfo), 0, 0, 0, "cwdipl",
701 0, pool_page_alloc_nointr, pool_page_free_nointr, M_FILEDESC);
702 pool_init(&filedesc0_pool, sizeof(struct filedesc0), 0, 0, 0, "fdescpl",
703 0, pool_page_alloc_nointr, pool_page_free_nointr, M_FILEDESC);
704 }
705
706 /*
707 * Create a new open file structure and allocate
708 * a file decriptor for the process that refers to it.
709 */
710 int
711 falloc(p, resultfp, resultfd)
712 struct proc *p;
713 struct file **resultfp;
714 int *resultfd;
715 {
716 struct file *fp, *fq;
717 int error, i;
718
719 restart:
720 if ((error = fdalloc(p, 0, &i)) != 0) {
721 if (error == ERESTART)
722 goto restart;
723 return (error);
724 }
725 if (nfiles >= maxfiles) {
726 tablefull("file", "increase kern.maxfiles or MAXFILES");
727 return (ENFILE);
728 }
729 /*
730 * Allocate a new file descriptor.
731 * If the process has file descriptor zero open, add to the list
732 * of open files at that point, otherwise put it at the front of
733 * the list of open files.
734 */
735 nfiles++;
736 fp = pool_get(&file_pool, PR_WAITOK);
737 memset(fp, 0, sizeof(struct file));
738 if ((fq = p->p_fd->fd_ofiles[0]) != NULL) {
739 LIST_INSERT_AFTER(fq, fp, f_list);
740 } else {
741 LIST_INSERT_HEAD(&filehead, fp, f_list);
742 }
743 p->p_fd->fd_ofiles[i] = fp;
744 fp->f_count = 1;
745 fp->f_cred = p->p_ucred;
746 crhold(fp->f_cred);
747 if (resultfp) {
748 FILE_USE(fp);
749 *resultfp = fp;
750 }
751 if (resultfd)
752 *resultfd = i;
753 return (0);
754 }
755
756 /*
757 * Free a file descriptor.
758 */
759 void
760 ffree(fp)
761 struct file *fp;
762 {
763
764 #ifdef DIAGNOSTIC
765 if (fp->f_usecount)
766 panic("ffree");
767 #endif
768
769 LIST_REMOVE(fp, f_list);
770 crfree(fp->f_cred);
771 #ifdef DIAGNOSTIC
772 fp->f_count = 0;
773 #endif
774 nfiles--;
775 pool_put(&file_pool, fp);
776 }
777
778 /*
779 * Create an initial cwdinfo structure, using the same current and root
780 * directories as p.
781 */
782 struct cwdinfo *
783 cwdinit(p)
784 struct proc *p;
785 {
786 struct cwdinfo *cwdi;
787
788 cwdi = pool_get(&cwdi_pool, PR_WAITOK);
789
790 cwdi->cwdi_cdir = p->p_cwdi->cwdi_cdir;
791 if (cwdi->cwdi_cdir)
792 VREF(cwdi->cwdi_cdir);
793 cwdi->cwdi_rdir = p->p_cwdi->cwdi_rdir;
794 if (cwdi->cwdi_rdir)
795 VREF(cwdi->cwdi_rdir);
796 cwdi->cwdi_cmask = p->p_cwdi->cwdi_cmask;
797 cwdi->cwdi_refcnt = 1;
798
799 return (cwdi);
800 }
801
802 /*
803 * Make p2 share p1's cwdinfo.
804 */
805 void
806 cwdshare(p1, p2)
807 struct proc *p1, *p2;
808 {
809
810 p2->p_cwdi = p1->p_cwdi;
811 p1->p_cwdi->cwdi_refcnt++;
812 }
813
814 /*
815 * Make this process not share its cwdinfo structure, maintaining
816 * all cwdinfo state.
817 */
818 void
819 cwdunshare(p)
820 struct proc *p;
821 {
822 struct cwdinfo *newcwdi;
823
824 if (p->p_cwdi->cwdi_refcnt == 1)
825 return;
826
827 newcwdi = cwdinit(p);
828 cwdfree(p);
829 p->p_cwdi = newcwdi;
830 }
831
832 /*
833 * Release a cwdinfo structure.
834 */
835 void
836 cwdfree(p)
837 struct proc *p;
838 {
839 struct cwdinfo *cwdi = p->p_cwdi;
840
841 if (--cwdi->cwdi_refcnt > 0)
842 return;
843
844 p->p_cwdi = NULL;
845
846 vrele(cwdi->cwdi_cdir);
847 if (cwdi->cwdi_rdir)
848 vrele(cwdi->cwdi_rdir);
849 pool_put(&cwdi_pool, cwdi);
850 }
851
852 /*
853 * Create an initial filedesc structure, using the same current and root
854 * directories as p.
855 */
856 struct filedesc *
857 fdinit(p)
858 struct proc *p;
859 {
860 struct filedesc0 *newfdp;
861
862 newfdp = pool_get(&filedesc0_pool, PR_WAITOK);
863 memset(newfdp, 0, sizeof(struct filedesc0));
864
865 fdinit1(newfdp);
866
867 return (&newfdp->fd_fd);
868 }
869
870 /*
871 * Initialize a file descriptor table.
872 */
873 void
874 fdinit1(newfdp)
875 struct filedesc0 *newfdp;
876 {
877
878 newfdp->fd_fd.fd_refcnt = 1;
879 newfdp->fd_fd.fd_ofiles = newfdp->fd_dfiles;
880 newfdp->fd_fd.fd_ofileflags = newfdp->fd_dfileflags;
881 newfdp->fd_fd.fd_nfiles = NDFILE;
882 }
883
884 /*
885 * Make p2 share p1's filedesc structure.
886 */
887 void
888 fdshare(p1, p2)
889 struct proc *p1, *p2;
890 {
891
892 p2->p_fd = p1->p_fd;
893 p1->p_fd->fd_refcnt++;
894 }
895
896 /*
897 * Make this process not share its filedesc structure, maintaining
898 * all file descriptor state.
899 */
900 void
901 fdunshare(p)
902 struct proc *p;
903 {
904 struct filedesc *newfd;
905
906 if (p->p_fd->fd_refcnt == 1)
907 return;
908
909 newfd = fdcopy(p);
910 fdfree(p);
911 p->p_fd = newfd;
912 }
913
914 /*
915 * Clear a process's fd table.
916 */
917 void
918 fdclear(p)
919 struct proc *p;
920 {
921 struct filedesc *newfd;
922
923 newfd = fdinit(p);
924 fdfree(p);
925 p->p_fd = newfd;
926 }
927
928 /*
929 * Copy a filedesc structure.
930 */
931 struct filedesc *
932 fdcopy(p)
933 struct proc *p;
934 {
935 struct filedesc *newfdp, *fdp = p->p_fd;
936 struct file **fpp;
937 int i;
938
939 newfdp = pool_get(&filedesc0_pool, PR_WAITOK);
940 memcpy(newfdp, fdp, sizeof(struct filedesc));
941 newfdp->fd_refcnt = 1;
942
943 /*
944 * If the number of open files fits in the internal arrays
945 * of the open file structure, use them, otherwise allocate
946 * additional memory for the number of descriptors currently
947 * in use.
948 */
949 if (newfdp->fd_lastfile < NDFILE) {
950 newfdp->fd_ofiles = ((struct filedesc0 *) newfdp)->fd_dfiles;
951 newfdp->fd_ofileflags =
952 ((struct filedesc0 *) newfdp)->fd_dfileflags;
953 i = NDFILE;
954 } else {
955 /*
956 * Compute the smallest multiple of NDEXTENT needed
957 * for the file descriptors currently in use,
958 * allowing the table to shrink.
959 */
960 i = newfdp->fd_nfiles;
961 while (i >= 2 * NDEXTENT && i > newfdp->fd_lastfile * 2)
962 i /= 2;
963 newfdp->fd_ofiles = malloc(i * OFILESIZE, M_FILEDESC, M_WAITOK);
964 newfdp->fd_ofileflags = (char *) &newfdp->fd_ofiles[i];
965 }
966 newfdp->fd_nfiles = i;
967 memcpy(newfdp->fd_ofiles, fdp->fd_ofiles, i * sizeof(struct file **));
968 memcpy(newfdp->fd_ofileflags, fdp->fd_ofileflags, i * sizeof(char));
969 fpp = newfdp->fd_ofiles;
970 for (i = newfdp->fd_lastfile; i >= 0; i--, fpp++)
971 if (*fpp != NULL)
972 (*fpp)->f_count++;
973 return (newfdp);
974 }
975
976 /*
977 * Release a filedesc structure.
978 */
979 void
980 fdfree(p)
981 struct proc *p;
982 {
983 struct filedesc *fdp = p->p_fd;
984 struct file **fpp, *fp;
985 int i;
986
987 if (--fdp->fd_refcnt > 0)
988 return;
989 fpp = fdp->fd_ofiles;
990 for (i = fdp->fd_lastfile; i >= 0; i--, fpp++) {
991 fp = *fpp;
992 if (fp != NULL) {
993 *fpp = NULL;
994 FILE_USE(fp);
995 (void) closef(fp, p);
996 }
997 }
998 p->p_fd = NULL;
999 if (fdp->fd_nfiles > NDFILE)
1000 free(fdp->fd_ofiles, M_FILEDESC);
1001 pool_put(&filedesc0_pool, fdp);
1002 }
1003
1004 /*
1005 * Internal form of close.
1006 * Decrement reference count on file structure.
1007 * Note: p may be NULL when closing a file
1008 * that was being passed in a message.
1009 *
1010 * Note: we expect the caller is holding a usecount, and expects us
1011 * to drop it (the caller thinks the file is going away forever).
1012 */
1013 int
1014 closef(fp, p)
1015 struct file *fp;
1016 struct proc *p;
1017 {
1018 struct vnode *vp;
1019 struct flock lf;
1020 int error;
1021
1022 if (fp == NULL)
1023 return (0);
1024
1025 /*
1026 * POSIX record locking dictates that any close releases ALL
1027 * locks owned by this process. This is handled by setting
1028 * a flag in the unlock to free ONLY locks obeying POSIX
1029 * semantics, and not to free BSD-style file locks.
1030 * If the descriptor was in a message, POSIX-style locks
1031 * aren't passed with the descriptor.
1032 */
1033 if (p && (p->p_flag & P_ADVLOCK) && fp->f_type == DTYPE_VNODE) {
1034 lf.l_whence = SEEK_SET;
1035 lf.l_start = 0;
1036 lf.l_len = 0;
1037 lf.l_type = F_UNLCK;
1038 vp = (struct vnode *)fp->f_data;
1039 (void) VOP_ADVLOCK(vp, (caddr_t)p, F_UNLCK, &lf, F_POSIX);
1040 }
1041
1042 /*
1043 * If WANTCLOSE is set, then the reference count on the file
1044 * is 0, but there were multiple users of the file. This can
1045 * happen if a filedesc structure is shared by multiple
1046 * processes.
1047 */
1048 if (fp->f_iflags & FIF_WANTCLOSE) {
1049 /*
1050 * Another user of the file is already closing, and is
1051 * simply waiting for other users of the file to drain.
1052 * Release our usecount, and wake up the closer if it
1053 * is the only remaining use.
1054 */
1055 #ifdef DIAGNOSTIC
1056 if (fp->f_count != 0)
1057 panic("closef: wantclose and count != 0");
1058 if (fp->f_usecount < 2)
1059 panic("closef: wantclose and usecount < 2");
1060 #endif
1061 if (--fp->f_usecount == 1)
1062 wakeup(&fp->f_usecount);
1063 return (0);
1064 } else {
1065 /*
1066 * Decrement the reference count. If we were not the
1067 * last reference, then release our use and just
1068 * return.
1069 */
1070 if (--fp->f_count > 0) {
1071 #ifdef DIAGNOSTIC
1072 if (fp->f_usecount < 1)
1073 panic("closef: no wantclose and usecount < 1");
1074 #endif
1075 fp->f_usecount--;
1076 return (0);
1077 }
1078 if (fp->f_count < 0)
1079 panic("closef: count < 0");
1080 }
1081
1082 /*
1083 * The reference count is now 0. However, there may be
1084 * multiple potential users of this file. This can happen
1085 * if multiple processes shared a single filedesc structure.
1086 *
1087 * Notify these potential users that the file is closing.
1088 * This will prevent them from adding additional uses to
1089 * the file.
1090 */
1091 fp->f_iflags |= FIF_WANTCLOSE;
1092
1093 /*
1094 * We expect the caller to add a use to the file. So, if we
1095 * are the last user, usecount will be 1. If it is not, we
1096 * must wait for the usecount to drain. When it drains back
1097 * to 1, we will be awakened so that we may proceed with the
1098 * close.
1099 */
1100 #ifdef DIAGNOSTIC
1101 if (fp->f_usecount < 1)
1102 panic("closef: usecount < 1");
1103 #endif
1104 while (fp->f_usecount > 1)
1105 (void) tsleep(&fp->f_usecount, PRIBIO, "closef", 0);
1106 #ifdef DIAGNOSTIC
1107 if (fp->f_usecount != 1)
1108 panic("closef: usecount != 1");
1109 #endif
1110
1111 if ((fp->f_flag & FHASLOCK) && fp->f_type == DTYPE_VNODE) {
1112 lf.l_whence = SEEK_SET;
1113 lf.l_start = 0;
1114 lf.l_len = 0;
1115 lf.l_type = F_UNLCK;
1116 vp = (struct vnode *)fp->f_data;
1117 (void) VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK);
1118 }
1119 if (fp->f_ops)
1120 error = (*fp->f_ops->fo_close)(fp, p);
1121 else
1122 error = 0;
1123
1124 /* Nothing references the file now, drop the final use (us). */
1125 fp->f_usecount--;
1126
1127 ffree(fp);
1128 return (error);
1129 }
1130
1131 /*
1132 * Apply an advisory lock on a file descriptor.
1133 *
1134 * Just attempt to get a record lock of the requested type on
1135 * the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0).
1136 */
1137 /* ARGSUSED */
1138 int
1139 sys_flock(p, v, retval)
1140 struct proc *p;
1141 void *v;
1142 register_t *retval;
1143 {
1144 struct sys_flock_args /* {
1145 syscallarg(int) fd;
1146 syscallarg(int) how;
1147 } */ *uap = v;
1148 int fd = SCARG(uap, fd);
1149 int how = SCARG(uap, how);
1150 struct filedesc *fdp = p->p_fd;
1151 struct file *fp;
1152 struct vnode *vp;
1153 struct flock lf;
1154 int error = 0;
1155
1156 if ((u_int)fd >= fdp->fd_nfiles ||
1157 (fp = fdp->fd_ofiles[fd]) == NULL ||
1158 (fp->f_iflags & FIF_WANTCLOSE) != 0)
1159 return (EBADF);
1160
1161 FILE_USE(fp);
1162
1163 if (fp->f_type != DTYPE_VNODE) {
1164 error = EOPNOTSUPP;
1165 goto out;
1166 }
1167
1168 vp = (struct vnode *)fp->f_data;
1169 lf.l_whence = SEEK_SET;
1170 lf.l_start = 0;
1171 lf.l_len = 0;
1172 if (how & LOCK_UN) {
1173 lf.l_type = F_UNLCK;
1174 fp->f_flag &= ~FHASLOCK;
1175 error = VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK);
1176 goto out;
1177 }
1178 if (how & LOCK_EX)
1179 lf.l_type = F_WRLCK;
1180 else if (how & LOCK_SH)
1181 lf.l_type = F_RDLCK;
1182 else {
1183 error = EINVAL;
1184 goto out;
1185 }
1186 fp->f_flag |= FHASLOCK;
1187 if (how & LOCK_NB)
1188 error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, F_FLOCK);
1189 else
1190 error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
1191 F_FLOCK|F_WAIT);
1192 out:
1193 FILE_UNUSE(fp, p);
1194 return (error);
1195 }
1196
1197 /*
1198 * File Descriptor pseudo-device driver (/dev/fd/).
1199 *
1200 * Opening minor device N dup()s the file (if any) connected to file
1201 * descriptor N belonging to the calling process. Note that this driver
1202 * consists of only the ``open()'' routine, because all subsequent
1203 * references to this file will be direct to the other driver.
1204 */
1205 /* ARGSUSED */
1206 int
1207 filedescopen(dev, mode, type, p)
1208 dev_t dev;
1209 int mode, type;
1210 struct proc *p;
1211 {
1212
1213 /*
1214 * XXX Kludge: set p->p_dupfd to contain the value of the
1215 * the file descriptor being sought for duplication. The error
1216 * return ensures that the vnode for this device will be released
1217 * by vn_open. Open will detect this special error and take the
1218 * actions in dupfdopen below. Other callers of vn_open or VOP_OPEN
1219 * will simply report the error.
1220 */
1221 p->p_dupfd = minor(dev);
1222 return (ENODEV);
1223 }
1224
1225 /*
1226 * Duplicate the specified descriptor to a free descriptor.
1227 */
1228 int
1229 dupfdopen(p, indx, dfd, mode, error)
1230 struct proc *p;
1231 int indx, dfd, mode, error;
1232 {
1233 struct filedesc *fdp = p->p_fd;
1234 struct file *wfp;
1235 struct file *fp;
1236
1237 /*
1238 * If the to-be-dup'd fd number is greater than the allowed number
1239 * of file descriptors, or the fd to be dup'd has already been
1240 * closed, reject. Note, check for new == old is necessary as
1241 * falloc could allocate an already closed to-be-dup'd descriptor
1242 * as the new descriptor.
1243 */
1244 fp = fdp->fd_ofiles[indx];
1245 if ((u_int)dfd >= fdp->fd_nfiles ||
1246 (wfp = fdp->fd_ofiles[dfd]) == NULL ||
1247 (wfp->f_iflags & FIF_WANTCLOSE) != 0 ||
1248 fp == wfp)
1249 return (EBADF);
1250
1251 FILE_USE(wfp);
1252
1253 /*
1254 * There are two cases of interest here.
1255 *
1256 * For ENODEV simply dup (dfd) to file descriptor
1257 * (indx) and return.
1258 *
1259 * For ENXIO steal away the file structure from (dfd) and
1260 * store it in (indx). (dfd) is effectively closed by
1261 * this operation.
1262 *
1263 * Any other error code is just returned.
1264 */
1265 switch (error) {
1266 case ENODEV:
1267 /*
1268 * Check that the mode the file is being opened for is a
1269 * subset of the mode of the existing descriptor.
1270 */
1271 if (((mode & (FREAD|FWRITE)) | wfp->f_flag) != wfp->f_flag) {
1272 FILE_UNUSE(wfp, p);
1273 return (EACCES);
1274 }
1275 fdp->fd_ofiles[indx] = wfp;
1276 fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
1277 wfp->f_count++;
1278 fd_used(fdp, indx);
1279 FILE_UNUSE(wfp, p);
1280 return (0);
1281
1282 case ENXIO:
1283 /*
1284 * Steal away the file pointer from dfd, and stuff it into indx.
1285 */
1286 fdp->fd_ofiles[indx] = fdp->fd_ofiles[dfd];
1287 fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
1288 fdp->fd_ofiles[dfd] = NULL;
1289 fdp->fd_ofileflags[dfd] = 0;
1290 /*
1291 * Complete the clean up of the filedesc structure by
1292 * recomputing the various hints.
1293 */
1294 fd_used(fdp, indx);
1295 fd_unused(fdp, dfd);
1296 FILE_UNUSE(wfp, p);
1297 return (0);
1298
1299 default:
1300 FILE_UNUSE(wfp, p);
1301 return (error);
1302 }
1303 /* NOTREACHED */
1304 }
1305
1306 /*
1307 * fcntl call which is being passed to the file's fs.
1308 */
1309 int
1310 fcntl_forfs(fd, p, cmd, arg)
1311 int fd, cmd;
1312 struct proc *p;
1313 void *arg;
1314 {
1315 struct file *fp;
1316 struct filedesc *fdp;
1317 int error;
1318 u_int size;
1319 caddr_t data, memp;
1320 #define STK_PARAMS 128
1321 char stkbuf[STK_PARAMS];
1322
1323 /* fd's value was validated in sys_fcntl before calling this routine */
1324 fdp = p->p_fd;
1325 fp = fdp->fd_ofiles[fd];
1326
1327 if ((fp->f_flag & (FREAD | FWRITE)) == 0)
1328 return (EBADF);
1329
1330 /*
1331 * Interpret high order word to find amount of data to be
1332 * copied to/from the user's address space.
1333 */
1334 size = (size_t)F_PARAM_LEN(cmd);
1335 if (size > F_PARAM_MAX)
1336 return (EINVAL);
1337 memp = NULL;
1338 if (size > sizeof(stkbuf)) {
1339 memp = (caddr_t)malloc((u_long)size, M_IOCTLOPS, M_WAITOK);
1340 data = memp;
1341 } else
1342 data = stkbuf;
1343 if (cmd & F_FSIN) {
1344 if (size) {
1345 error = copyin(arg, data, size);
1346 if (error) {
1347 if (memp)
1348 free(memp, M_IOCTLOPS);
1349 return (error);
1350 }
1351 } else
1352 *(caddr_t *)data = arg;
1353 } else if ((cmd & F_FSOUT) && size)
1354 /*
1355 * Zero the buffer so the user always
1356 * gets back something deterministic.
1357 */
1358 memset(data, 0, size);
1359 else if (cmd & F_FSVOID)
1360 *(caddr_t *)data = arg;
1361
1362
1363 error = (*fp->f_ops->fo_fcntl)(fp, cmd, data, p);
1364
1365 /*
1366 * Copy any data to user, size was
1367 * already set and checked above.
1368 */
1369 if (error == 0 && (cmd & F_FSOUT) && size)
1370 error = copyout(data, arg, size);
1371 if (memp)
1372 free(memp, M_IOCTLOPS);
1373 return (error);
1374 }
1375
1376 /*
1377 * Close any files on exec?
1378 */
1379 void
1380 fdcloseexec(p)
1381 struct proc *p;
1382 {
1383 struct filedesc *fdp;
1384 int fd;
1385
1386 fdunshare(p);
1387 cwdunshare(p);
1388
1389 fdp = p->p_fd;
1390 for (fd = 0; fd <= fdp->fd_lastfile; fd++)
1391 if (fdp->fd_ofileflags[fd] & UF_EXCLOSE)
1392 (void) fdrelease(p, fd);
1393 }
1394
1395 /*
1396 * It is unsafe for set[ug]id processes to be started with file
1397 * descriptors 0..2 closed, as these descriptors are given implicit
1398 * significance in the Standard C library. fdcheckstd() will create a
1399 * descriptor referencing /dev/null for each of stdin, stdout, and
1400 * stderr that is not already open.
1401 */
1402 int
1403 fdcheckstd(p)
1404 struct proc *p;
1405 {
1406 struct nameidata nd;
1407 struct filedesc *fdp;
1408 struct file *fp;
1409 struct file *devnullfp;
1410 register_t retval;
1411 int fd, i, error, flags = FREAD|FWRITE, devnull = -1, logged = 0;
1412
1413 if ((fdp = p->p_fd) == NULL)
1414 return 0;
1415 for (i = 0; i < 3; i++) {
1416 if (fdp->fd_ofiles[i] != NULL)
1417 continue;
1418 if (!logged) {
1419 log(LOG_WARNING, "set{u,g}id pid %d (%s) was invoked "
1420 "with fd 0, 1, or 2 closed\n", p->p_pid, p->p_comm);
1421 logged++;
1422 }
1423 if (devnull < 0) {
1424 if ((error = falloc(p, &fp, &fd)) != 0)
1425 return error;
1426 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, "/dev/null",
1427 p);
1428 if ((error = vn_open(&nd, flags, 0)) != 0) {
1429 FILE_UNUSE(fp, p);
1430 ffree(fp);
1431 fdremove(p->p_fd, fd);
1432 return error;
1433 }
1434 fp->f_data = (caddr_t)nd.ni_vp;
1435 fp->f_flag = flags;
1436 fp->f_ops = &vnops;
1437 fp->f_type = DTYPE_VNODE;
1438 VOP_UNLOCK(nd.ni_vp, 0);
1439 devnull = fd;
1440 devnullfp = fp;
1441 FILE_UNUSE(fp, p);
1442 } else {
1443 restart:
1444 if ((error = fdalloc(p, 0, &fd)) != 0) {
1445 if (error == ERESTART)
1446 goto restart;
1447 return error;
1448 }
1449
1450 FILE_USE(devnullfp);
1451 /* finishdup() will unuse the descriptors for us */
1452 if ((error = finishdup(p, devnull, fd, &retval)) != 0)
1453 return error;
1454 }
1455 }
1456 return 0;
1457 }
1458