kern_descrip.c revision 1.67.4.6 1 /* $NetBSD: kern_descrip.c,v 1.67.4.6 2002/02/09 22:56:01 he Exp $ */
2
3 /*
4 * Copyright (c) 1982, 1986, 1989, 1991, 1993
5 * The Regents of the University of California. All rights reserved.
6 * (c) UNIX System Laboratories, Inc.
7 * All or some portions of this file are derived from material licensed
8 * to the University of California by American Telephone and Telegraph
9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
10 * the permission of UNIX System Laboratories, Inc.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. All advertising materials mentioning features or use of this software
21 * must display the following acknowledgement:
22 * This product includes software developed by the University of
23 * California, Berkeley and its contributors.
24 * 4. Neither the name of the University nor the names of its contributors
25 * may be used to endorse or promote products derived from this software
26 * without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
29 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
32 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
33 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
34 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
35 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
37 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38 * SUCH DAMAGE.
39 *
40 * @(#)kern_descrip.c 8.8 (Berkeley) 2/14/95
41 */
42
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/filedesc.h>
46 #include <sys/kernel.h>
47 #include <sys/vnode.h>
48 #include <sys/proc.h>
49 #include <sys/file.h>
50 #include <sys/socket.h>
51 #include <sys/socketvar.h>
52 #include <sys/stat.h>
53 #include <sys/ioctl.h>
54 #include <sys/fcntl.h>
55 #include <sys/malloc.h>
56 #include <sys/pool.h>
57 #include <sys/syslog.h>
58 #include <sys/unistd.h>
59 #include <sys/resourcevar.h>
60 #include <sys/conf.h>
61
62 #include <sys/mount.h>
63 #include <sys/syscallargs.h>
64
65 #include <vm/vm.h>
66
67 /*
68 * Descriptor management.
69 */
70 struct filelist filehead; /* head of list of open files */
71 int nfiles; /* actual number of open files */
72 struct pool file_pool; /* memory pool for file structures */
73 struct pool cwdi_pool; /* memory pool for cwdinfo structures */
74 struct pool filedesc0_pool; /* memory pool for filedesc0 structures */
75
76 static __inline void fd_used __P((struct filedesc *, int));
77 static __inline void fd_unused __P((struct filedesc *, int));
78 int finishdup __P((struct proc *, int, int, register_t *));
79
80 static __inline void
81 fd_used(fdp, fd)
82 struct filedesc *fdp;
83 int fd;
84 {
85
86 if (fd > fdp->fd_lastfile)
87 fdp->fd_lastfile = fd;
88 }
89
90 static __inline void
91 fd_unused(fdp, fd)
92 struct filedesc *fdp;
93 int fd;
94 {
95
96 if (fd < fdp->fd_freefile)
97 fdp->fd_freefile = fd;
98 #ifdef DIAGNOSTIC
99 if (fd > fdp->fd_lastfile)
100 panic("fd_unused: fd_lastfile inconsistent");
101 #endif
102 if (fd == fdp->fd_lastfile) {
103 do {
104 fd--;
105 } while (fd >= 0 && fdp->fd_ofiles[fd] == NULL);
106 fdp->fd_lastfile = fd;
107 }
108 }
109
110 /*
111 * System calls on descriptors.
112 */
113
114 /*
115 * Duplicate a file descriptor.
116 */
117 /* ARGSUSED */
118 int
119 sys_dup(p, v, retval)
120 struct proc *p;
121 void *v;
122 register_t *retval;
123 {
124 struct sys_dup_args /* {
125 syscallarg(int) fd;
126 } */ *uap = v;
127 struct file *fp;
128 struct filedesc *fdp = p->p_fd;
129 int old = SCARG(uap, fd);
130 int new;
131 int error;
132
133 restart:
134 if ((u_int)old >= fdp->fd_nfiles ||
135 (fp = fdp->fd_ofiles[old]) == NULL ||
136 (fp->f_iflags & FIF_WANTCLOSE) != 0)
137 return (EBADF);
138
139 FILE_USE(fp);
140
141 if ((error = fdalloc(p, 0, &new)) != 0) {
142 FILE_UNUSE(fp, p);
143 if (error == ERESTART)
144 goto restart;
145 return (error);
146 }
147
148 /* finishdup() will unuse the descriptors for us */
149 return (finishdup(p, old, new, retval));
150 }
151
152 /*
153 * Duplicate a file descriptor to a particular value.
154 */
155 /* ARGSUSED */
156 int
157 sys_dup2(p, v, retval)
158 struct proc *p;
159 void *v;
160 register_t *retval;
161 {
162 struct sys_dup2_args /* {
163 syscallarg(int) from;
164 syscallarg(int) to;
165 } */ *uap = v;
166 struct file *fp;
167 struct filedesc *fdp = p->p_fd;
168 int old = SCARG(uap, from), new = SCARG(uap, to);
169 int i, error;
170
171 restart:
172 if ((u_int)old >= fdp->fd_nfiles ||
173 (fp = fdp->fd_ofiles[old]) == NULL ||
174 (fp->f_iflags & FIF_WANTCLOSE) != 0 ||
175 (u_int)new >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
176 (u_int)new >= maxfiles)
177 return (EBADF);
178 if (old == new) {
179 *retval = new;
180 return (0);
181 }
182
183 FILE_USE(fp);
184
185 if (new >= fdp->fd_nfiles) {
186 if ((error = fdalloc(p, new, &i)) != 0) {
187 FILE_UNUSE(fp, p);
188 if (error == ERESTART)
189 goto restart;
190 return (error);
191 }
192 if (new != i)
193 panic("dup2: fdalloc");
194 }
195
196 /*
197 * finishdup() will close the file that's in the `new'
198 * slot, if there's one there.
199 */
200
201 /* finishdup() will unuse the descriptors for us */
202 return (finishdup(p, old, new, retval));
203 }
204
205 int fcntl_forfs __P((int, struct proc *, int, void *));
206
207 /*
208 * The file control system call.
209 */
210 /* ARGSUSED */
211 int
212 sys_fcntl(p, v, retval)
213 struct proc *p;
214 void *v;
215 register_t *retval;
216 {
217 struct sys_fcntl_args /* {
218 syscallarg(int) fd;
219 syscallarg(int) cmd;
220 syscallarg(void *) arg;
221 } */ *uap = v;
222 int fd = SCARG(uap, fd);
223 struct filedesc *fdp = p->p_fd;
224 struct file *fp;
225 struct vnode *vp;
226 int i, tmp, error = 0, flg = F_POSIX, cmd;
227 struct flock fl;
228 int newmin;
229
230 restart:
231 if ((u_int)fd >= fdp->fd_nfiles ||
232 (fp = fdp->fd_ofiles[fd]) == NULL ||
233 (fp->f_iflags & FIF_WANTCLOSE) != 0)
234 return (EBADF);
235
236 FILE_USE(fp);
237
238 cmd = SCARG(uap, cmd);
239 if ((cmd & F_FSCTL)) {
240 error = fcntl_forfs(fd, p, cmd, SCARG(uap, arg));
241 goto out;
242 }
243
244 switch (cmd) {
245
246 case F_DUPFD:
247 newmin = (long)SCARG(uap, arg);
248 if ((u_int)newmin >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
249 (u_int)newmin >= maxfiles) {
250 error = EINVAL;
251 goto out;
252 }
253 if ((error = fdalloc(p, newmin, &i)) != 0) {
254 if (error == ERESTART) {
255 FILE_UNUSE(fp, p);
256 goto restart;
257 }
258 goto out;
259 }
260
261 /* finishdup() will unuse the descriptors for us */
262 return (finishdup(p, fd, i, retval));
263
264 case F_GETFD:
265 *retval = fdp->fd_ofileflags[fd] & UF_EXCLOSE ? 1 : 0;
266 break;
267
268 case F_SETFD:
269 if ((long)SCARG(uap, arg) & 1)
270 fdp->fd_ofileflags[fd] |= UF_EXCLOSE;
271 else
272 fdp->fd_ofileflags[fd] &= ~UF_EXCLOSE;
273 break;
274
275 case F_GETFL:
276 *retval = OFLAGS(fp->f_flag);
277 break;
278
279 case F_SETFL:
280 tmp = FFLAGS((long)SCARG(uap, arg)) & FCNTLFLAGS;
281 error = (*fp->f_ops->fo_fcntl)(fp, F_SETFL, (caddr_t)&tmp, p);
282 if (error)
283 goto out;
284 fp->f_flag &= ~FCNTLFLAGS;
285 fp->f_flag |= tmp;
286 tmp = fp->f_flag & FNONBLOCK;
287 error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p);
288 if (error)
289 goto out;
290 tmp = fp->f_flag & FASYNC;
291 error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, (caddr_t)&tmp, p);
292 if (error == 0)
293 goto out;
294 fp->f_flag &= ~FNONBLOCK;
295 tmp = 0;
296 (void) (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p);
297 break;
298
299 case F_GETOWN:
300 if (fp->f_type == DTYPE_SOCKET) {
301 *retval = ((struct socket *)fp->f_data)->so_pgid;
302 goto out;
303 }
304 error = (*fp->f_ops->fo_ioctl)
305 (fp, TIOCGPGRP, (caddr_t)&tmp, p);
306 *retval = -tmp;
307 break;
308
309 case F_SETOWN:
310 if (fp->f_type == DTYPE_SOCKET) {
311 ((struct socket *)fp->f_data)->so_pgid =
312 (long)SCARG(uap, arg);
313 goto out;
314 }
315 if ((long)SCARG(uap, arg) <= 0) {
316 tmp = (-(long)SCARG(uap, arg));
317 } else {
318 struct proc *p1 = pfind((long)SCARG(uap, arg));
319 if (p1 == 0) {
320 error = ESRCH;
321 goto out;
322 }
323 tmp = (long)p1->p_pgrp->pg_id;
324 }
325 error = (*fp->f_ops->fo_ioctl)
326 (fp, TIOCSPGRP, (caddr_t)&tmp, p);
327 break;
328
329 case F_SETLKW:
330 flg |= F_WAIT;
331 /* Fall into F_SETLK */
332
333 case F_SETLK:
334 if (fp->f_type != DTYPE_VNODE) {
335 error = EINVAL;
336 goto out;
337 }
338 vp = (struct vnode *)fp->f_data;
339 /* Copy in the lock structure */
340 error = copyin((caddr_t)SCARG(uap, arg), (caddr_t)&fl,
341 sizeof(fl));
342 if (error)
343 goto out;
344 if (fl.l_whence == SEEK_CUR)
345 fl.l_start += fp->f_offset;
346 switch (fl.l_type) {
347 case F_RDLCK:
348 if ((fp->f_flag & FREAD) == 0) {
349 error = EBADF;
350 goto out;
351 }
352 p->p_flag |= P_ADVLOCK;
353 error = VOP_ADVLOCK(vp, (caddr_t)p, F_SETLK, &fl, flg);
354 goto out;
355
356 case F_WRLCK:
357 if ((fp->f_flag & FWRITE) == 0) {
358 error = EBADF;
359 goto out;
360 }
361 p->p_flag |= P_ADVLOCK;
362 error = VOP_ADVLOCK(vp, (caddr_t)p, F_SETLK, &fl, flg);
363 goto out;
364
365 case F_UNLCK:
366 error = VOP_ADVLOCK(vp, (caddr_t)p, F_UNLCK, &fl,
367 F_POSIX);
368 goto out;
369
370 default:
371 error = EINVAL;
372 goto out;
373 }
374
375 case F_GETLK:
376 if (fp->f_type != DTYPE_VNODE) {
377 error = EINVAL;
378 goto out;
379 }
380 vp = (struct vnode *)fp->f_data;
381 /* Copy in the lock structure */
382 error = copyin((caddr_t)SCARG(uap, arg), (caddr_t)&fl,
383 sizeof(fl));
384 if (error)
385 goto out;
386 if (fl.l_whence == SEEK_CUR)
387 fl.l_start += fp->f_offset;
388 if (fl.l_type != F_RDLCK &&
389 fl.l_type != F_WRLCK &&
390 fl.l_type != F_UNLCK) {
391 error = EINVAL;
392 goto out;
393 }
394 error = VOP_ADVLOCK(vp, (caddr_t)p, F_GETLK, &fl, F_POSIX);
395 if (error)
396 goto out;
397 error = copyout((caddr_t)&fl, (caddr_t)SCARG(uap, arg),
398 sizeof(fl));
399 break;
400
401 default:
402 error = EINVAL;
403 }
404
405 out:
406 FILE_UNUSE(fp, p);
407 return (error);
408 }
409
410 /*
411 * Common code for dup, dup2, and fcntl(F_DUPFD).
412 */
413 int
414 finishdup(p, old, new, retval)
415 struct proc *p;
416 int old, new;
417 register_t *retval;
418 {
419 struct filedesc *fdp = p->p_fd;
420 struct file *fp, *delfp;
421
422 /*
423 * If there is a file in the new slot, remember it so we
424 * can close it after we've finished the dup. We need
425 * to do it after the dup is finished, since closing
426 * the file may block.
427 *
428 * Note: `old' is already used for us.
429 */
430 delfp = fdp->fd_ofiles[new];
431
432 fp = fdp->fd_ofiles[old];
433 fdp->fd_ofiles[new] = fp;
434 fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] &~ UF_EXCLOSE;
435 fp->f_count++;
436 /*
437 * Note, don't have to mark it "used" in the table if there
438 * was already a file in the `new' slot.
439 */
440 if (delfp == NULL)
441 fd_used(fdp, new);
442 *retval = new;
443 FILE_UNUSE(fp, p);
444 if (delfp != NULL) {
445 FILE_USE(delfp);
446 (void) closef(delfp, p);
447 }
448 return (0);
449 }
450
451 void
452 fdremove(fdp, fd)
453 struct filedesc *fdp;
454 int fd;
455 {
456
457 fdp->fd_ofiles[fd] = NULL;
458 fd_unused(fdp, fd);
459 }
460
461 int
462 fdrelease(p, fd)
463 struct proc *p;
464 int fd;
465 {
466 struct filedesc *fdp = p->p_fd;
467 struct file **fpp, *fp;
468
469 fpp = &fdp->fd_ofiles[fd];
470 fp = *fpp;
471 if (fp == NULL)
472 return (EBADF);
473
474 FILE_USE(fp);
475
476 *fpp = NULL;
477 fdp->fd_ofileflags[fd] = 0;
478 fd_unused(fdp, fd);
479 return (closef(fp, p));
480 }
481
482 /*
483 * Close a file descriptor.
484 */
485 /* ARGSUSED */
486 int
487 sys_close(p, v, retval)
488 struct proc *p;
489 void *v;
490 register_t *retval;
491 {
492 struct sys_close_args /* {
493 syscallarg(int) fd;
494 } */ *uap = v;
495 int fd = SCARG(uap, fd);
496 struct filedesc *fdp = p->p_fd;
497
498 if ((u_int)fd >= fdp->fd_nfiles)
499 return (EBADF);
500 return (fdrelease(p, fd));
501 }
502
503 /*
504 * Return status information about a file descriptor.
505 */
506 /* ARGSUSED */
507 int
508 sys___fstat13(p, v, retval)
509 struct proc *p;
510 void *v;
511 register_t *retval;
512 {
513 struct sys___fstat13_args /* {
514 syscallarg(int) fd;
515 syscallarg(struct stat *) sb;
516 } */ *uap = v;
517 int fd = SCARG(uap, fd);
518 struct filedesc *fdp = p->p_fd;
519 struct file *fp;
520 struct stat ub;
521 int error;
522
523 if ((u_int)fd >= fdp->fd_nfiles ||
524 (fp = fdp->fd_ofiles[fd]) == NULL ||
525 (fp->f_iflags & FIF_WANTCLOSE) != 0)
526 return (EBADF);
527
528 FILE_USE(fp);
529
530 switch (fp->f_type) {
531
532 case DTYPE_VNODE:
533 error = vn_stat((struct vnode *)fp->f_data, &ub, p);
534 break;
535
536 case DTYPE_SOCKET:
537 error = soo_stat((struct socket *)fp->f_data, &ub);
538 break;
539
540 default:
541 panic("fstat");
542 /*NOTREACHED*/
543 }
544 if (error == 0)
545 error = copyout(&ub, SCARG(uap, sb), sizeof(ub));
546 FILE_UNUSE(fp, p);
547 return (error);
548 }
549
550 /*
551 * Return pathconf information about a file descriptor.
552 */
553 /* ARGSUSED */
554 int
555 sys_fpathconf(p, v, retval)
556 struct proc *p;
557 void *v;
558 register_t *retval;
559 {
560 struct sys_fpathconf_args /* {
561 syscallarg(int) fd;
562 syscallarg(int) name;
563 } */ *uap = v;
564 int fd = SCARG(uap, fd);
565 struct filedesc *fdp = p->p_fd;
566 struct file *fp;
567 struct vnode *vp;
568 int error = 0;
569
570 if ((u_int)fd >= fdp->fd_nfiles ||
571 (fp = fdp->fd_ofiles[fd]) == NULL ||
572 (fp->f_iflags & FIF_WANTCLOSE) != 0)
573 return (EBADF);
574
575 FILE_USE(fp);
576
577 switch (fp->f_type) {
578
579 case DTYPE_SOCKET:
580 if (SCARG(uap, name) != _PC_PIPE_BUF)
581 error = EINVAL;
582 else
583 *retval = PIPE_BUF;
584 break;
585
586 case DTYPE_VNODE:
587 vp = (struct vnode *)fp->f_data;
588 error = VOP_PATHCONF(vp, SCARG(uap, name), retval);
589 break;
590
591 default:
592 panic("fpathconf");
593 }
594
595 FILE_UNUSE(fp, p);
596 return (error);
597 }
598
599 /*
600 * Allocate a file descriptor for the process.
601 */
602 int fdexpand;
603
604 int
605 fdalloc(p, want, result)
606 struct proc *p;
607 int want;
608 int *result;
609 {
610 struct filedesc *fdp = p->p_fd;
611 int i;
612 int lim, last, nfiles, rv = 0;
613 struct file **newofile;
614 char *newofileflags;
615
616 /*
617 * Search for a free descriptor starting at the higher
618 * of want or fd_freefile. If that fails, consider
619 * expanding the ofile array.
620 */
621 lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles);
622 for (;;) {
623 last = min(fdp->fd_nfiles, lim);
624 if ((i = want) < fdp->fd_freefile)
625 i = fdp->fd_freefile;
626 for (; i < last; i++) {
627 if (fdp->fd_ofiles[i] == NULL) {
628 fd_used(fdp, i);
629 if (want <= fdp->fd_freefile)
630 fdp->fd_freefile = i;
631 *result = i;
632 return (rv);
633 }
634 }
635
636 /*
637 * No space in current array. Expand?
638 */
639 if (fdp->fd_nfiles >= lim)
640 return (EMFILE);
641 if (fdp->fd_nfiles < NDEXTENT)
642 nfiles = NDEXTENT;
643 else
644 nfiles = 2 * fdp->fd_nfiles;
645 rv = ERESTART;
646 newofile = malloc(nfiles * OFILESIZE, M_FILEDESC, M_WAITOK);
647 newofileflags = (char *) &newofile[nfiles];
648 /*
649 * Copy the existing ofile and ofileflags arrays
650 * and zero the new portion of each array.
651 */
652 memcpy(newofile, fdp->fd_ofiles,
653 (i = sizeof(struct file *) * fdp->fd_nfiles));
654 memset((char *)newofile + i, 0, nfiles * sizeof(struct file *) - i);
655 memcpy(newofileflags, fdp->fd_ofileflags,
656 (i = sizeof(char) * fdp->fd_nfiles));
657 memset(newofileflags + i, 0, nfiles * sizeof(char) - i);
658 if (fdp->fd_nfiles > NDFILE)
659 free(fdp->fd_ofiles, M_FILEDESC);
660 fdp->fd_ofiles = newofile;
661 fdp->fd_ofileflags = newofileflags;
662 fdp->fd_nfiles = nfiles;
663 fdexpand++;
664 }
665 }
666
667 /*
668 * Check to see whether n user file descriptors
669 * are available to the process p.
670 */
671 int
672 fdavail(p, n)
673 struct proc *p;
674 int n;
675 {
676 struct filedesc *fdp = p->p_fd;
677 struct file **fpp;
678 int i, lim;
679
680 lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles);
681 if ((i = lim - fdp->fd_nfiles) > 0 && (n -= i) <= 0)
682 return (1);
683 fpp = &fdp->fd_ofiles[fdp->fd_freefile];
684 for (i = min(lim,fdp->fd_nfiles) - fdp->fd_freefile; --i >= 0; fpp++)
685 if (*fpp == NULL && --n <= 0)
686 return (1);
687 return (0);
688 }
689
690 /*
691 * Initialize the data structures necessary for managing files.
692 */
693 void
694 finit()
695 {
696
697 pool_init(&file_pool, sizeof(struct file), 0, 0, 0, "filepl",
698 0, pool_page_alloc_nointr, pool_page_free_nointr, M_FILE);
699 pool_init(&cwdi_pool, sizeof(struct cwdinfo), 0, 0, 0, "cwdipl",
700 0, pool_page_alloc_nointr, pool_page_free_nointr, M_FILEDESC);
701 pool_init(&filedesc0_pool, sizeof(struct filedesc0), 0, 0, 0, "fdescpl",
702 0, pool_page_alloc_nointr, pool_page_free_nointr, M_FILEDESC);
703 }
704
705 /*
706 * Create a new open file structure and allocate
707 * a file decriptor for the process that refers to it.
708 */
709 int
710 falloc(p, resultfp, resultfd)
711 struct proc *p;
712 struct file **resultfp;
713 int *resultfd;
714 {
715 struct file *fp, *fq;
716 int error, i;
717
718 restart:
719 if ((error = fdalloc(p, 0, &i)) != 0) {
720 if (error == ERESTART)
721 goto restart;
722 return (error);
723 }
724 if (nfiles >= maxfiles) {
725 tablefull("file", "increase kern.maxfiles or MAXFILES");
726 return (ENFILE);
727 }
728 /*
729 * Allocate a new file descriptor.
730 * If the process has file descriptor zero open, add to the list
731 * of open files at that point, otherwise put it at the front of
732 * the list of open files.
733 */
734 nfiles++;
735 fp = pool_get(&file_pool, PR_WAITOK);
736 memset(fp, 0, sizeof(struct file));
737 if ((fq = p->p_fd->fd_ofiles[0]) != NULL) {
738 LIST_INSERT_AFTER(fq, fp, f_list);
739 } else {
740 LIST_INSERT_HEAD(&filehead, fp, f_list);
741 }
742 p->p_fd->fd_ofiles[i] = fp;
743 fp->f_count = 1;
744 fp->f_cred = p->p_ucred;
745 crhold(fp->f_cred);
746 if (resultfp) {
747 FILE_USE(fp);
748 *resultfp = fp;
749 }
750 if (resultfd)
751 *resultfd = i;
752 return (0);
753 }
754
755 /*
756 * Free a file descriptor.
757 */
758 void
759 ffree(fp)
760 struct file *fp;
761 {
762
763 #ifdef DIAGNOSTIC
764 if (fp->f_usecount)
765 panic("ffree");
766 #endif
767
768 LIST_REMOVE(fp, f_list);
769 crfree(fp->f_cred);
770 #ifdef DIAGNOSTIC
771 fp->f_count = 0;
772 #endif
773 nfiles--;
774 pool_put(&file_pool, fp);
775 }
776
777 /*
778 * Create an initial cwdinfo structure, using the same current and root
779 * directories as p.
780 */
781 struct cwdinfo *
782 cwdinit(p)
783 struct proc *p;
784 {
785 struct cwdinfo *cwdi;
786
787 cwdi = pool_get(&cwdi_pool, PR_WAITOK);
788
789 cwdi->cwdi_cdir = p->p_cwdi->cwdi_cdir;
790 if (cwdi->cwdi_cdir)
791 VREF(cwdi->cwdi_cdir);
792 cwdi->cwdi_rdir = p->p_cwdi->cwdi_rdir;
793 if (cwdi->cwdi_rdir)
794 VREF(cwdi->cwdi_rdir);
795 cwdi->cwdi_cmask = p->p_cwdi->cwdi_cmask;
796 cwdi->cwdi_refcnt = 1;
797
798 return (cwdi);
799 }
800
801 /*
802 * Make p2 share p1's cwdinfo.
803 */
804 void
805 cwdshare(p1, p2)
806 struct proc *p1, *p2;
807 {
808
809 p2->p_cwdi = p1->p_cwdi;
810 p1->p_cwdi->cwdi_refcnt++;
811 }
812
813 /*
814 * Make this process not share its cwdinfo structure, maintaining
815 * all cwdinfo state.
816 */
817 void
818 cwdunshare(p)
819 struct proc *p;
820 {
821 struct cwdinfo *newcwdi;
822
823 if (p->p_cwdi->cwdi_refcnt == 1)
824 return;
825
826 newcwdi = cwdinit(p);
827 cwdfree(p);
828 p->p_cwdi = newcwdi;
829 }
830
831 /*
832 * Release a cwdinfo structure.
833 */
834 void
835 cwdfree(p)
836 struct proc *p;
837 {
838 struct cwdinfo *cwdi = p->p_cwdi;
839
840 if (--cwdi->cwdi_refcnt > 0)
841 return;
842
843 p->p_cwdi = NULL;
844
845 vrele(cwdi->cwdi_cdir);
846 if (cwdi->cwdi_rdir)
847 vrele(cwdi->cwdi_rdir);
848 pool_put(&cwdi_pool, cwdi);
849 }
850
851 /*
852 * Create an initial filedesc structure, using the same current and root
853 * directories as p.
854 */
855 struct filedesc *
856 fdinit(p)
857 struct proc *p;
858 {
859 struct filedesc0 *newfdp;
860
861 newfdp = pool_get(&filedesc0_pool, PR_WAITOK);
862 memset(newfdp, 0, sizeof(struct filedesc0));
863
864 fdinit1(newfdp);
865
866 return (&newfdp->fd_fd);
867 }
868
869 /*
870 * Initialize a file descriptor table.
871 */
872 void
873 fdinit1(newfdp)
874 struct filedesc0 *newfdp;
875 {
876
877 newfdp->fd_fd.fd_refcnt = 1;
878 newfdp->fd_fd.fd_ofiles = newfdp->fd_dfiles;
879 newfdp->fd_fd.fd_ofileflags = newfdp->fd_dfileflags;
880 newfdp->fd_fd.fd_nfiles = NDFILE;
881 }
882
883 /*
884 * Make p2 share p1's filedesc structure.
885 */
886 void
887 fdshare(p1, p2)
888 struct proc *p1, *p2;
889 {
890
891 p2->p_fd = p1->p_fd;
892 p1->p_fd->fd_refcnt++;
893 }
894
895 /*
896 * Make this process not share its filedesc structure, maintaining
897 * all file descriptor state.
898 */
899 void
900 fdunshare(p)
901 struct proc *p;
902 {
903 struct filedesc *newfd;
904
905 if (p->p_fd->fd_refcnt == 1)
906 return;
907
908 newfd = fdcopy(p);
909 fdfree(p);
910 p->p_fd = newfd;
911 }
912
913 /*
914 * Clear a process's fd table.
915 */
916 void
917 fdclear(p)
918 struct proc *p;
919 {
920 struct filedesc *newfd;
921
922 newfd = fdinit(p);
923 fdfree(p);
924 p->p_fd = newfd;
925 }
926
927 /*
928 * Copy a filedesc structure.
929 */
930 struct filedesc *
931 fdcopy(p)
932 struct proc *p;
933 {
934 struct filedesc *newfdp, *fdp = p->p_fd;
935 struct file **fpp;
936 int i;
937
938 newfdp = pool_get(&filedesc0_pool, PR_WAITOK);
939 memcpy(newfdp, fdp, sizeof(struct filedesc));
940 newfdp->fd_refcnt = 1;
941
942 /*
943 * If the number of open files fits in the internal arrays
944 * of the open file structure, use them, otherwise allocate
945 * additional memory for the number of descriptors currently
946 * in use.
947 */
948 if (newfdp->fd_lastfile < NDFILE) {
949 newfdp->fd_ofiles = ((struct filedesc0 *) newfdp)->fd_dfiles;
950 newfdp->fd_ofileflags =
951 ((struct filedesc0 *) newfdp)->fd_dfileflags;
952 i = NDFILE;
953 } else {
954 /*
955 * Compute the smallest multiple of NDEXTENT needed
956 * for the file descriptors currently in use,
957 * allowing the table to shrink.
958 */
959 i = newfdp->fd_nfiles;
960 while (i >= 2 * NDEXTENT && i > newfdp->fd_lastfile * 2)
961 i /= 2;
962 newfdp->fd_ofiles = malloc(i * OFILESIZE, M_FILEDESC, M_WAITOK);
963 newfdp->fd_ofileflags = (char *) &newfdp->fd_ofiles[i];
964 }
965 newfdp->fd_nfiles = i;
966 memcpy(newfdp->fd_ofiles, fdp->fd_ofiles, i * sizeof(struct file **));
967 memcpy(newfdp->fd_ofileflags, fdp->fd_ofileflags, i * sizeof(char));
968 fpp = newfdp->fd_ofiles;
969 for (i = newfdp->fd_lastfile; i >= 0; i--, fpp++)
970 if (*fpp != NULL)
971 (*fpp)->f_count++;
972 return (newfdp);
973 }
974
975 /*
976 * Release a filedesc structure.
977 */
978 void
979 fdfree(p)
980 struct proc *p;
981 {
982 struct filedesc *fdp = p->p_fd;
983 struct file **fpp, *fp;
984 int i;
985
986 if (--fdp->fd_refcnt > 0)
987 return;
988 fpp = fdp->fd_ofiles;
989 for (i = fdp->fd_lastfile; i >= 0; i--, fpp++) {
990 fp = *fpp;
991 if (fp != NULL) {
992 *fpp = NULL;
993 FILE_USE(fp);
994 (void) closef(fp, p);
995 }
996 }
997 p->p_fd = NULL;
998 if (fdp->fd_nfiles > NDFILE)
999 free(fdp->fd_ofiles, M_FILEDESC);
1000 pool_put(&filedesc0_pool, fdp);
1001 }
1002
1003 /*
1004 * Internal form of close.
1005 * Decrement reference count on file structure.
1006 * Note: p may be NULL when closing a file
1007 * that was being passed in a message.
1008 *
1009 * Note: we expect the caller is holding a usecount, and expects us
1010 * to drop it (the caller thinks the file is going away forever).
1011 */
1012 int
1013 closef(fp, p)
1014 struct file *fp;
1015 struct proc *p;
1016 {
1017 struct vnode *vp;
1018 struct flock lf;
1019 int error;
1020
1021 if (fp == NULL)
1022 return (0);
1023
1024 /*
1025 * POSIX record locking dictates that any close releases ALL
1026 * locks owned by this process. This is handled by setting
1027 * a flag in the unlock to free ONLY locks obeying POSIX
1028 * semantics, and not to free BSD-style file locks.
1029 * If the descriptor was in a message, POSIX-style locks
1030 * aren't passed with the descriptor.
1031 */
1032 if (p && (p->p_flag & P_ADVLOCK) && fp->f_type == DTYPE_VNODE) {
1033 lf.l_whence = SEEK_SET;
1034 lf.l_start = 0;
1035 lf.l_len = 0;
1036 lf.l_type = F_UNLCK;
1037 vp = (struct vnode *)fp->f_data;
1038 (void) VOP_ADVLOCK(vp, (caddr_t)p, F_UNLCK, &lf, F_POSIX);
1039 }
1040
1041 /*
1042 * If WANTCLOSE is set, then the reference count on the file
1043 * is 0, but there were multiple users of the file. This can
1044 * happen if a filedesc structure is shared by multiple
1045 * processes.
1046 */
1047 if (fp->f_iflags & FIF_WANTCLOSE) {
1048 /*
1049 * Another user of the file is already closing, and is
1050 * simply waiting for other users of the file to drain.
1051 * Release our usecount, and wake up the closer if it
1052 * is the only remaining use.
1053 */
1054 #ifdef DIAGNOSTIC
1055 if (fp->f_count != 0)
1056 panic("closef: wantclose and count != 0");
1057 if (fp->f_usecount < 2)
1058 panic("closef: wantclose and usecount < 2");
1059 #endif
1060 if (--fp->f_usecount == 1)
1061 wakeup(&fp->f_usecount);
1062 return (0);
1063 } else {
1064 /*
1065 * Decrement the reference count. If we were not the
1066 * last reference, then release our use and just
1067 * return.
1068 */
1069 if (--fp->f_count > 0) {
1070 #ifdef DIAGNOSTIC
1071 if (fp->f_usecount < 1)
1072 panic("closef: no wantclose and usecount < 1");
1073 #endif
1074 fp->f_usecount--;
1075 return (0);
1076 }
1077 if (fp->f_count < 0)
1078 panic("closef: count < 0");
1079 }
1080
1081 /*
1082 * The reference count is now 0. However, there may be
1083 * multiple potential users of this file. This can happen
1084 * if multiple processes shared a single filedesc structure.
1085 *
1086 * Notify these potential users that the file is closing.
1087 * This will prevent them from adding additional uses to
1088 * the file.
1089 */
1090 fp->f_iflags |= FIF_WANTCLOSE;
1091
1092 /*
1093 * We expect the caller to add a use to the file. So, if we
1094 * are the last user, usecount will be 1. If it is not, we
1095 * must wait for the usecount to drain. When it drains back
1096 * to 1, we will be awakened so that we may proceed with the
1097 * close.
1098 */
1099 #ifdef DIAGNOSTIC
1100 if (fp->f_usecount < 1)
1101 panic("closef: usecount < 1");
1102 #endif
1103 while (fp->f_usecount > 1)
1104 (void) tsleep(&fp->f_usecount, PRIBIO, "closef", 0);
1105 #ifdef DIAGNOSTIC
1106 if (fp->f_usecount != 1)
1107 panic("closef: usecount != 1");
1108 #endif
1109
1110 if ((fp->f_flag & FHASLOCK) && fp->f_type == DTYPE_VNODE) {
1111 lf.l_whence = SEEK_SET;
1112 lf.l_start = 0;
1113 lf.l_len = 0;
1114 lf.l_type = F_UNLCK;
1115 vp = (struct vnode *)fp->f_data;
1116 (void) VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK);
1117 }
1118 if (fp->f_ops)
1119 error = (*fp->f_ops->fo_close)(fp, p);
1120 else
1121 error = 0;
1122
1123 /* Nothing references the file now, drop the final use (us). */
1124 fp->f_usecount--;
1125
1126 ffree(fp);
1127 return (error);
1128 }
1129
1130 /*
1131 * Apply an advisory lock on a file descriptor.
1132 *
1133 * Just attempt to get a record lock of the requested type on
1134 * the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0).
1135 */
1136 /* ARGSUSED */
1137 int
1138 sys_flock(p, v, retval)
1139 struct proc *p;
1140 void *v;
1141 register_t *retval;
1142 {
1143 struct sys_flock_args /* {
1144 syscallarg(int) fd;
1145 syscallarg(int) how;
1146 } */ *uap = v;
1147 int fd = SCARG(uap, fd);
1148 int how = SCARG(uap, how);
1149 struct filedesc *fdp = p->p_fd;
1150 struct file *fp;
1151 struct vnode *vp;
1152 struct flock lf;
1153 int error = 0;
1154
1155 if ((u_int)fd >= fdp->fd_nfiles ||
1156 (fp = fdp->fd_ofiles[fd]) == NULL ||
1157 (fp->f_iflags & FIF_WANTCLOSE) != 0)
1158 return (EBADF);
1159
1160 FILE_USE(fp);
1161
1162 if (fp->f_type != DTYPE_VNODE) {
1163 error = EOPNOTSUPP;
1164 goto out;
1165 }
1166
1167 vp = (struct vnode *)fp->f_data;
1168 lf.l_whence = SEEK_SET;
1169 lf.l_start = 0;
1170 lf.l_len = 0;
1171 if (how & LOCK_UN) {
1172 lf.l_type = F_UNLCK;
1173 fp->f_flag &= ~FHASLOCK;
1174 error = VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK);
1175 goto out;
1176 }
1177 if (how & LOCK_EX)
1178 lf.l_type = F_WRLCK;
1179 else if (how & LOCK_SH)
1180 lf.l_type = F_RDLCK;
1181 else {
1182 error = EINVAL;
1183 goto out;
1184 }
1185 fp->f_flag |= FHASLOCK;
1186 if (how & LOCK_NB)
1187 error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, F_FLOCK);
1188 else
1189 error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
1190 F_FLOCK|F_WAIT);
1191 out:
1192 FILE_UNUSE(fp, p);
1193 return (error);
1194 }
1195
1196 /*
1197 * File Descriptor pseudo-device driver (/dev/fd/).
1198 *
1199 * Opening minor device N dup()s the file (if any) connected to file
1200 * descriptor N belonging to the calling process. Note that this driver
1201 * consists of only the ``open()'' routine, because all subsequent
1202 * references to this file will be direct to the other driver.
1203 */
1204 /* ARGSUSED */
1205 int
1206 filedescopen(dev, mode, type, p)
1207 dev_t dev;
1208 int mode, type;
1209 struct proc *p;
1210 {
1211
1212 /*
1213 * XXX Kludge: set p->p_dupfd to contain the value of the
1214 * the file descriptor being sought for duplication. The error
1215 * return ensures that the vnode for this device will be released
1216 * by vn_open. Open will detect this special error and take the
1217 * actions in dupfdopen below. Other callers of vn_open or VOP_OPEN
1218 * will simply report the error.
1219 */
1220 p->p_dupfd = minor(dev);
1221 return (ENODEV);
1222 }
1223
1224 /*
1225 * Duplicate the specified descriptor to a free descriptor.
1226 */
1227 int
1228 dupfdopen(p, indx, dfd, mode, error)
1229 struct proc *p;
1230 int indx, dfd, mode, error;
1231 {
1232 struct filedesc *fdp = p->p_fd;
1233 struct file *wfp;
1234 struct file *fp;
1235
1236 /*
1237 * If the to-be-dup'd fd number is greater than the allowed number
1238 * of file descriptors, or the fd to be dup'd has already been
1239 * closed, reject. Note, check for new == old is necessary as
1240 * falloc could allocate an already closed to-be-dup'd descriptor
1241 * as the new descriptor.
1242 */
1243 fp = fdp->fd_ofiles[indx];
1244 if ((u_int)dfd >= fdp->fd_nfiles ||
1245 (wfp = fdp->fd_ofiles[dfd]) == NULL ||
1246 (wfp->f_iflags & FIF_WANTCLOSE) != 0 ||
1247 fp == wfp)
1248 return (EBADF);
1249
1250 FILE_USE(wfp);
1251
1252 /*
1253 * There are two cases of interest here.
1254 *
1255 * For ENODEV simply dup (dfd) to file descriptor
1256 * (indx) and return.
1257 *
1258 * For ENXIO steal away the file structure from (dfd) and
1259 * store it in (indx). (dfd) is effectively closed by
1260 * this operation.
1261 *
1262 * Any other error code is just returned.
1263 */
1264 switch (error) {
1265 case ENODEV:
1266 /*
1267 * Check that the mode the file is being opened for is a
1268 * subset of the mode of the existing descriptor.
1269 */
1270 if (((mode & (FREAD|FWRITE)) | wfp->f_flag) != wfp->f_flag) {
1271 FILE_UNUSE(wfp, p);
1272 return (EACCES);
1273 }
1274 fdp->fd_ofiles[indx] = wfp;
1275 fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
1276 wfp->f_count++;
1277 fd_used(fdp, indx);
1278 FILE_UNUSE(wfp, p);
1279 return (0);
1280
1281 case ENXIO:
1282 /*
1283 * Steal away the file pointer from dfd, and stuff it into indx.
1284 */
1285 fdp->fd_ofiles[indx] = fdp->fd_ofiles[dfd];
1286 fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
1287 fdp->fd_ofiles[dfd] = NULL;
1288 fdp->fd_ofileflags[dfd] = 0;
1289 /*
1290 * Complete the clean up of the filedesc structure by
1291 * recomputing the various hints.
1292 */
1293 fd_used(fdp, indx);
1294 fd_unused(fdp, dfd);
1295 FILE_UNUSE(wfp, p);
1296 return (0);
1297
1298 default:
1299 FILE_UNUSE(wfp, p);
1300 return (error);
1301 }
1302 /* NOTREACHED */
1303 }
1304
1305 /*
1306 * fcntl call which is being passed to the file's fs.
1307 */
1308 int
1309 fcntl_forfs(fd, p, cmd, arg)
1310 int fd, cmd;
1311 struct proc *p;
1312 void *arg;
1313 {
1314 struct file *fp;
1315 struct filedesc *fdp;
1316 int error;
1317 u_int size;
1318 caddr_t data, memp;
1319 #define STK_PARAMS 128
1320 char stkbuf[STK_PARAMS];
1321
1322 /* fd's value was validated in sys_fcntl before calling this routine */
1323 fdp = p->p_fd;
1324 fp = fdp->fd_ofiles[fd];
1325
1326 if ((fp->f_flag & (FREAD | FWRITE)) == 0)
1327 return (EBADF);
1328
1329 /*
1330 * Interpret high order word to find amount of data to be
1331 * copied to/from the user's address space.
1332 */
1333 size = (size_t)F_PARAM_LEN(cmd);
1334 if (size > F_PARAM_MAX)
1335 return (EINVAL);
1336 memp = NULL;
1337 if (size > sizeof(stkbuf)) {
1338 memp = (caddr_t)malloc((u_long)size, M_IOCTLOPS, M_WAITOK);
1339 data = memp;
1340 } else
1341 data = stkbuf;
1342 if (cmd & F_FSIN) {
1343 if (size) {
1344 error = copyin(arg, data, size);
1345 if (error) {
1346 if (memp)
1347 free(memp, M_IOCTLOPS);
1348 return (error);
1349 }
1350 } else
1351 *(caddr_t *)data = arg;
1352 } else if ((cmd & F_FSOUT) && size)
1353 /*
1354 * Zero the buffer so the user always
1355 * gets back something deterministic.
1356 */
1357 memset(data, 0, size);
1358 else if (cmd & F_FSVOID)
1359 *(caddr_t *)data = arg;
1360
1361
1362 error = (*fp->f_ops->fo_fcntl)(fp, cmd, data, p);
1363
1364 /*
1365 * Copy any data to user, size was
1366 * already set and checked above.
1367 */
1368 if (error == 0 && (cmd & F_FSOUT) && size)
1369 error = copyout(data, arg, size);
1370 if (memp)
1371 free(memp, M_IOCTLOPS);
1372 return (error);
1373 }
1374
1375 /*
1376 * Close any files on exec?
1377 */
1378 void
1379 fdcloseexec(p)
1380 struct proc *p;
1381 {
1382 struct filedesc *fdp;
1383 int fd;
1384
1385 fdunshare(p);
1386 cwdunshare(p);
1387
1388 fdp = p->p_fd;
1389 for (fd = 0; fd <= fdp->fd_lastfile; fd++)
1390 if (fdp->fd_ofileflags[fd] & UF_EXCLOSE)
1391 (void) fdrelease(p, fd);
1392 }
1393