kern_descrip.c revision 1.77 1 /* $NetBSD: kern_descrip.c,v 1.77 2001/06/14 20:32:47 thorpej Exp $ */
2
3 /*
4 * Copyright (c) 1982, 1986, 1989, 1991, 1993
5 * The Regents of the University of California. All rights reserved.
6 * (c) UNIX System Laboratories, Inc.
7 * All or some portions of this file are derived from material licensed
8 * to the University of California by American Telephone and Telegraph
9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
10 * the permission of UNIX System Laboratories, Inc.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. All advertising materials mentioning features or use of this software
21 * must display the following acknowledgement:
22 * This product includes software developed by the University of
23 * California, Berkeley and its contributors.
24 * 4. Neither the name of the University nor the names of its contributors
25 * may be used to endorse or promote products derived from this software
26 * without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
29 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
32 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
33 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
34 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
35 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
37 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38 * SUCH DAMAGE.
39 *
40 * @(#)kern_descrip.c 8.8 (Berkeley) 2/14/95
41 */
42
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/filedesc.h>
46 #include <sys/kernel.h>
47 #include <sys/vnode.h>
48 #include <sys/proc.h>
49 #include <sys/file.h>
50 #include <sys/socket.h>
51 #include <sys/socketvar.h>
52 #include <sys/stat.h>
53 #include <sys/ioctl.h>
54 #include <sys/fcntl.h>
55 #include <sys/malloc.h>
56 #include <sys/pool.h>
57 #include <sys/syslog.h>
58 #include <sys/unistd.h>
59 #include <sys/resourcevar.h>
60 #include <sys/conf.h>
61
62 #include <sys/mount.h>
63 #include <sys/syscallargs.h>
64
65 /*
66 * Descriptor management.
67 */
68 struct filelist filehead; /* head of list of open files */
69 int nfiles; /* actual number of open files */
70 struct pool file_pool; /* memory pool for file structures */
71 struct pool cwdi_pool; /* memory pool for cwdinfo structures */
72 struct pool filedesc0_pool; /* memory pool for filedesc0 structures */
73
74 static __inline void fd_used(struct filedesc *, int);
75 static __inline void fd_unused(struct filedesc *, int);
76 int finishdup(struct proc *, int, int, register_t *);
77 int fcntl_forfs(int, struct proc *, int, void *);
78
79 static __inline void
80 fd_used(struct filedesc *fdp, int fd)
81 {
82
83 if (fd > fdp->fd_lastfile)
84 fdp->fd_lastfile = fd;
85 }
86
87 static __inline void
88 fd_unused(struct filedesc *fdp, int fd)
89 {
90
91 if (fd < fdp->fd_freefile)
92 fdp->fd_freefile = fd;
93 #ifdef DIAGNOSTIC
94 if (fd > fdp->fd_lastfile)
95 panic("fd_unused: fd_lastfile inconsistent");
96 #endif
97 if (fd == fdp->fd_lastfile) {
98 do {
99 fd--;
100 } while (fd >= 0 && fdp->fd_ofiles[fd] == NULL);
101 fdp->fd_lastfile = fd;
102 }
103 }
104
105 struct file *
106 fd_getfile(struct filedesc *fdp, int fd)
107 {
108 struct file *fp;
109
110 if ((u_int) fd >= fdp->fd_nfiles || (fp = fdp->fd_ofiles[fd]) == NULL)
111 return (NULL);
112
113 if (FILE_IS_USABLE(fp) == 0)
114 return (NULL);
115
116 return (fp);
117 }
118
119 /*
120 * System calls on descriptors.
121 */
122
123 /*
124 * Duplicate a file descriptor.
125 */
126 /* ARGSUSED */
127 int
128 sys_dup(struct proc *p, void *v, register_t *retval)
129 {
130 struct sys_dup_args /* {
131 syscallarg(int) fd;
132 } */ *uap = v;
133 struct file *fp;
134 struct filedesc *fdp;
135 int old, new, error;
136
137 fdp = p->p_fd;
138 old = SCARG(uap, fd);
139
140 restart:
141 if ((fp = fd_getfile(fdp, old)) == NULL)
142 return (EBADF);
143
144 FILE_USE(fp);
145
146 if ((error = fdalloc(p, 0, &new)) != 0) {
147 if (error == ENOSPC) {
148 fdexpand(p);
149 FILE_UNUSE(fp, p);
150 goto restart;
151 }
152 FILE_UNUSE(fp, p);
153 return (error);
154 }
155
156 /* finishdup() will unuse the descriptors for us */
157 return (finishdup(p, old, new, retval));
158 }
159
160 /*
161 * Duplicate a file descriptor to a particular value.
162 */
163 /* ARGSUSED */
164 int
165 sys_dup2(struct proc *p, void *v, register_t *retval)
166 {
167 struct sys_dup2_args /* {
168 syscallarg(int) from;
169 syscallarg(int) to;
170 } */ *uap = v;
171 struct file *fp;
172 struct filedesc *fdp;
173 int old, new, i, error;
174
175 fdp = p->p_fd;
176 old = SCARG(uap, from);
177 new = SCARG(uap, to);
178
179 restart:
180 if ((fp = fd_getfile(fdp, old)) == NULL)
181 return (EBADF);
182
183 if ((u_int)new >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
184 (u_int)new >= maxfiles)
185 return (EBADF);
186
187 if (old == new) {
188 *retval = new;
189 return (0);
190 }
191
192 FILE_USE(fp);
193
194 if (new >= fdp->fd_nfiles) {
195 if ((error = fdalloc(p, new, &i)) != 0) {
196 if (error == ENOSPC) {
197 fdexpand(p);
198 FILE_UNUSE(fp, p);
199 goto restart;
200 }
201 FILE_UNUSE(fp, p);
202 return (error);
203 }
204 if (new != i)
205 panic("dup2: fdalloc");
206 }
207
208 /*
209 * finishdup() will close the file that's in the `new'
210 * slot, if there's one there.
211 */
212
213 /* finishdup() will unuse the descriptors for us */
214 return (finishdup(p, old, new, retval));
215 }
216
217 /*
218 * The file control system call.
219 */
220 /* ARGSUSED */
221 int
222 sys_fcntl(struct proc *p, void *v, register_t *retval)
223 {
224 struct sys_fcntl_args /* {
225 syscallarg(int) fd;
226 syscallarg(int) cmd;
227 syscallarg(void *) arg;
228 } */ *uap = v;
229 struct filedesc *fdp;
230 struct file *fp;
231 struct vnode *vp;
232 int fd, i, tmp, error, flg, cmd, newmin;
233 struct flock fl;
234
235 fd = SCARG(uap, fd);
236 fdp = p->p_fd;
237 error = 0;
238 flg = F_POSIX;
239
240 restart:
241 if ((fp = fd_getfile(fdp, fd)) == NULL)
242 return (EBADF);
243
244 FILE_USE(fp);
245
246 cmd = SCARG(uap, cmd);
247 if ((cmd & F_FSCTL)) {
248 error = fcntl_forfs(fd, p, cmd, SCARG(uap, arg));
249 goto out;
250 }
251
252 switch (cmd) {
253
254 case F_DUPFD:
255 newmin = (long)SCARG(uap, arg);
256 if ((u_int)newmin >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
257 (u_int)newmin >= maxfiles) {
258 error = EINVAL;
259 goto out;
260 }
261 if ((error = fdalloc(p, newmin, &i)) != 0) {
262 if (error == ENOSPC) {
263 fdexpand(p);
264 FILE_UNUSE(fp, p);
265 goto restart;
266 }
267 goto out;
268 }
269
270 /* finishdup() will unuse the descriptors for us */
271 return (finishdup(p, fd, i, retval));
272
273 case F_GETFD:
274 *retval = fdp->fd_ofileflags[fd] & UF_EXCLOSE ? 1 : 0;
275 break;
276
277 case F_SETFD:
278 if ((long)SCARG(uap, arg) & 1)
279 fdp->fd_ofileflags[fd] |= UF_EXCLOSE;
280 else
281 fdp->fd_ofileflags[fd] &= ~UF_EXCLOSE;
282 break;
283
284 case F_GETFL:
285 *retval = OFLAGS(fp->f_flag);
286 break;
287
288 case F_SETFL:
289 tmp = FFLAGS((long)SCARG(uap, arg)) & FCNTLFLAGS;
290 error = (*fp->f_ops->fo_fcntl)(fp, F_SETFL, (caddr_t)&tmp, p);
291 if (error)
292 goto out;
293 fp->f_flag &= ~FCNTLFLAGS;
294 fp->f_flag |= tmp;
295 tmp = fp->f_flag & FNONBLOCK;
296 error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p);
297 if (error)
298 goto out;
299 tmp = fp->f_flag & FASYNC;
300 error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, (caddr_t)&tmp, p);
301 if (error == 0)
302 goto out;
303 fp->f_flag &= ~FNONBLOCK;
304 tmp = 0;
305 (void) (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p);
306 break;
307
308 case F_GETOWN:
309 if (fp->f_type == DTYPE_SOCKET) {
310 *retval = ((struct socket *)fp->f_data)->so_pgid;
311 goto out;
312 }
313 error = (*fp->f_ops->fo_ioctl)
314 (fp, TIOCGPGRP, (caddr_t)retval, p);
315 *retval = -*retval;
316 break;
317
318 case F_SETOWN:
319 if (fp->f_type == DTYPE_SOCKET) {
320 ((struct socket *)fp->f_data)->so_pgid =
321 (long)SCARG(uap, arg);
322 goto out;
323 }
324 if ((long)SCARG(uap, arg) <= 0) {
325 tmp = (-(long)SCARG(uap, arg));
326 } else {
327 struct proc *p1 = pfind((long)SCARG(uap, arg));
328 if (p1 == 0) {
329 error = ESRCH;
330 goto out;
331 }
332 tmp = (long)p1->p_pgrp->pg_id;
333 }
334 error = (*fp->f_ops->fo_ioctl)
335 (fp, TIOCSPGRP, (caddr_t)&tmp, p);
336 break;
337
338 case F_SETLKW:
339 flg |= F_WAIT;
340 /* Fall into F_SETLK */
341
342 case F_SETLK:
343 if (fp->f_type != DTYPE_VNODE) {
344 error = EINVAL;
345 goto out;
346 }
347 vp = (struct vnode *)fp->f_data;
348 /* Copy in the lock structure */
349 error = copyin((caddr_t)SCARG(uap, arg), (caddr_t)&fl,
350 sizeof(fl));
351 if (error)
352 goto out;
353 if (fl.l_whence == SEEK_CUR)
354 fl.l_start += fp->f_offset;
355 switch (fl.l_type) {
356 case F_RDLCK:
357 if ((fp->f_flag & FREAD) == 0) {
358 error = EBADF;
359 goto out;
360 }
361 p->p_flag |= P_ADVLOCK;
362 error = VOP_ADVLOCK(vp, (caddr_t)p, F_SETLK, &fl, flg);
363 goto out;
364
365 case F_WRLCK:
366 if ((fp->f_flag & FWRITE) == 0) {
367 error = EBADF;
368 goto out;
369 }
370 p->p_flag |= P_ADVLOCK;
371 error = VOP_ADVLOCK(vp, (caddr_t)p, F_SETLK, &fl, flg);
372 goto out;
373
374 case F_UNLCK:
375 error = VOP_ADVLOCK(vp, (caddr_t)p, F_UNLCK, &fl,
376 F_POSIX);
377 goto out;
378
379 default:
380 error = EINVAL;
381 goto out;
382 }
383
384 case F_GETLK:
385 if (fp->f_type != DTYPE_VNODE) {
386 error = EINVAL;
387 goto out;
388 }
389 vp = (struct vnode *)fp->f_data;
390 /* Copy in the lock structure */
391 error = copyin((caddr_t)SCARG(uap, arg), (caddr_t)&fl,
392 sizeof(fl));
393 if (error)
394 goto out;
395 if (fl.l_whence == SEEK_CUR)
396 fl.l_start += fp->f_offset;
397 if (fl.l_type != F_RDLCK &&
398 fl.l_type != F_WRLCK &&
399 fl.l_type != F_UNLCK) {
400 error = EINVAL;
401 goto out;
402 }
403 error = VOP_ADVLOCK(vp, (caddr_t)p, F_GETLK, &fl, F_POSIX);
404 if (error)
405 goto out;
406 error = copyout((caddr_t)&fl, (caddr_t)SCARG(uap, arg),
407 sizeof(fl));
408 break;
409
410 default:
411 error = EINVAL;
412 }
413
414 out:
415 FILE_UNUSE(fp, p);
416 return (error);
417 }
418
419 /*
420 * Common code for dup, dup2, and fcntl(F_DUPFD).
421 */
422 int
423 finishdup(struct proc *p, int old, int new, register_t *retval)
424 {
425 struct filedesc *fdp;
426 struct file *fp, *delfp;
427
428 fdp = p->p_fd;
429
430 /*
431 * If there is a file in the new slot, remember it so we
432 * can close it after we've finished the dup. We need
433 * to do it after the dup is finished, since closing
434 * the file may block.
435 *
436 * Note: `old' is already used for us.
437 */
438 delfp = fdp->fd_ofiles[new];
439
440 fp = fdp->fd_ofiles[old];
441 fdp->fd_ofiles[new] = fp;
442 fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] &~ UF_EXCLOSE;
443 fp->f_count++;
444 /*
445 * Note, don't have to mark it "used" in the table if there
446 * was already a file in the `new' slot.
447 */
448 if (delfp == NULL)
449 fd_used(fdp, new);
450 *retval = new;
451 FILE_UNUSE(fp, p);
452
453 if (delfp != NULL) {
454 FILE_USE(delfp);
455 (void) closef(delfp, p);
456 }
457 return (0);
458 }
459
460 void
461 fdremove(struct filedesc *fdp, int fd)
462 {
463
464 fdp->fd_ofiles[fd] = NULL;
465 fd_unused(fdp, fd);
466 }
467
468 int
469 fdrelease(struct proc *p, int fd)
470 {
471 struct filedesc *fdp;
472 struct file **fpp, *fp;
473
474 fdp = p->p_fd;
475 fpp = &fdp->fd_ofiles[fd];
476 fp = *fpp;
477 if (fp == NULL)
478 return (EBADF);
479
480 FILE_USE(fp);
481
482 *fpp = NULL;
483 fdp->fd_ofileflags[fd] = 0;
484 fd_unused(fdp, fd);
485 return (closef(fp, p));
486 }
487
488 /*
489 * Close a file descriptor.
490 */
491 /* ARGSUSED */
492 int
493 sys_close(struct proc *p, void *v, register_t *retval)
494 {
495 struct sys_close_args /* {
496 syscallarg(int) fd;
497 } */ *uap = v;
498 int fd;
499 struct filedesc *fdp;
500
501 fd = SCARG(uap, fd);
502 fdp = p->p_fd;
503 if ((u_int)fd >= fdp->fd_nfiles)
504 return (EBADF);
505 return (fdrelease(p, fd));
506 }
507
508 /*
509 * Return status information about a file descriptor.
510 */
511 /* ARGSUSED */
512 int
513 sys___fstat13(struct proc *p, void *v, register_t *retval)
514 {
515 struct sys___fstat13_args /* {
516 syscallarg(int) fd;
517 syscallarg(struct stat *) sb;
518 } */ *uap = v;
519 int fd;
520 struct filedesc *fdp;
521 struct file *fp;
522 struct stat ub;
523 int error;
524
525 fd = SCARG(uap, fd);
526 fdp = p->p_fd;
527
528 if ((fp = fd_getfile(fdp, fd)) == NULL)
529 return (EBADF);
530
531 FILE_USE(fp);
532 error = (*fp->f_ops->fo_stat)(fp, &ub, p);
533 FILE_UNUSE(fp, p);
534
535 if (error == 0)
536 error = copyout(&ub, SCARG(uap, sb), sizeof(ub));
537
538 return (error);
539 }
540
541 /*
542 * Return pathconf information about a file descriptor.
543 */
544 /* ARGSUSED */
545 int
546 sys_fpathconf(struct proc *p, void *v, register_t *retval)
547 {
548 struct sys_fpathconf_args /* {
549 syscallarg(int) fd;
550 syscallarg(int) name;
551 } */ *uap = v;
552 int fd;
553 struct filedesc *fdp;
554 struct file *fp;
555 struct vnode *vp;
556 int error;
557
558 fd = SCARG(uap, fd);
559 fdp = p->p_fd;
560 error = 0;
561
562 if ((fp = fd_getfile(fdp, fd)) == NULL)
563 return (EBADF);
564
565 FILE_USE(fp);
566
567 switch (fp->f_type) {
568
569 case DTYPE_SOCKET:
570 if (SCARG(uap, name) != _PC_PIPE_BUF)
571 error = EINVAL;
572 else
573 *retval = PIPE_BUF;
574 break;
575
576 case DTYPE_VNODE:
577 vp = (struct vnode *)fp->f_data;
578 error = VOP_PATHCONF(vp, SCARG(uap, name), retval);
579 break;
580
581 default:
582 panic("fpathconf");
583 }
584
585 FILE_UNUSE(fp, p);
586 return (error);
587 }
588
589 /*
590 * Allocate a file descriptor for the process.
591 */
592 int fdexpanded; /* XXX: what else uses this? */
593
594 int
595 fdalloc(struct proc *p, int want, int *result)
596 {
597 struct filedesc *fdp;
598 int i, lim, last;
599
600 fdp = p->p_fd;
601
602 /*
603 * Search for a free descriptor starting at the higher
604 * of want or fd_freefile. If that fails, consider
605 * expanding the ofile array.
606 */
607 lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles);
608 for (;;) {
609 last = min(fdp->fd_nfiles, lim);
610 if ((i = want) < fdp->fd_freefile)
611 i = fdp->fd_freefile;
612 for (; i < last; i++) {
613 if (fdp->fd_ofiles[i] == NULL) {
614 fd_used(fdp, i);
615 if (want <= fdp->fd_freefile)
616 fdp->fd_freefile = i;
617 *result = i;
618 return (0);
619 }
620 }
621
622 /* No space in current array. Expand? */
623 if (fdp->fd_nfiles >= lim)
624 return (EMFILE);
625
626 /* Let the caller do it. */
627 return (ENOSPC);
628 }
629 }
630
631 void
632 fdexpand(struct proc *p)
633 {
634 struct filedesc *fdp;
635 int i, nfiles;
636 struct file **newofile;
637 char *newofileflags;
638
639 fdp = p->p_fd;
640
641 if (fdp->fd_nfiles < NDEXTENT)
642 nfiles = NDEXTENT;
643 else
644 nfiles = 2 * fdp->fd_nfiles;
645 newofile = malloc(nfiles * OFILESIZE, M_FILEDESC, M_WAITOK);
646 newofileflags = (char *) &newofile[nfiles];
647 /*
648 * Copy the existing ofile and ofileflags arrays
649 * and zero the new portion of each array.
650 */
651 memcpy(newofile, fdp->fd_ofiles,
652 (i = sizeof(struct file *) * fdp->fd_nfiles));
653 memset((char *)newofile + i, 0,
654 nfiles * sizeof(struct file *) - i);
655 memcpy(newofileflags, fdp->fd_ofileflags,
656 (i = sizeof(char) * fdp->fd_nfiles));
657 memset(newofileflags + i, 0, nfiles * sizeof(char) - i);
658 if (fdp->fd_nfiles > NDFILE)
659 free(fdp->fd_ofiles, M_FILEDESC);
660 fdp->fd_ofiles = newofile;
661 fdp->fd_ofileflags = newofileflags;
662 fdp->fd_nfiles = nfiles;
663 fdexpanded++;
664 }
665
666 /*
667 * Check to see whether n user file descriptors
668 * are available to the process p.
669 */
670 int
671 fdavail(struct proc *p, int n)
672 {
673 struct filedesc *fdp;
674 struct file **fpp;
675 int i, lim;
676
677 fdp = p->p_fd;
678 lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles);
679 if ((i = lim - fdp->fd_nfiles) > 0 && (n -= i) <= 0)
680 return (1);
681 fpp = &fdp->fd_ofiles[fdp->fd_freefile];
682 for (i = min(lim,fdp->fd_nfiles) - fdp->fd_freefile; --i >= 0; fpp++)
683 if (*fpp == NULL && --n <= 0)
684 return (1);
685 return (0);
686 }
687
688 /*
689 * Initialize the data structures necessary for managing files.
690 */
691 void
692 finit(void)
693 {
694
695 pool_init(&file_pool, sizeof(struct file), 0, 0, 0, "filepl",
696 0, pool_page_alloc_nointr, pool_page_free_nointr, M_FILE);
697 pool_init(&cwdi_pool, sizeof(struct cwdinfo), 0, 0, 0, "cwdipl",
698 0, pool_page_alloc_nointr, pool_page_free_nointr, M_FILEDESC);
699 pool_init(&filedesc0_pool, sizeof(struct filedesc0), 0, 0, 0, "fdescpl",
700 0, pool_page_alloc_nointr, pool_page_free_nointr, M_FILEDESC);
701 }
702
703 /*
704 * Create a new open file structure and allocate
705 * a file decriptor for the process that refers to it.
706 */
707 int
708 falloc(struct proc *p, struct file **resultfp, int *resultfd)
709 {
710 struct file *fp, *fq;
711 int error, i;
712
713 restart:
714 if ((error = fdalloc(p, 0, &i)) != 0) {
715 if (error == ENOSPC) {
716 fdexpand(p);
717 goto restart;
718 }
719 return (error);
720 }
721 if (nfiles >= maxfiles) {
722 tablefull("file", "increase kern.maxfiles or MAXFILES");
723 return (ENFILE);
724 }
725 /*
726 * Allocate a new file descriptor.
727 * If the process has file descriptor zero open, add to the list
728 * of open files at that point, otherwise put it at the front of
729 * the list of open files.
730 */
731 nfiles++;
732 fp = pool_get(&file_pool, PR_WAITOK);
733 memset(fp, 0, sizeof(struct file));
734 fp->f_iflags = FIF_LARVAL;
735 if ((fq = p->p_fd->fd_ofiles[0]) != NULL) {
736 LIST_INSERT_AFTER(fq, fp, f_list);
737 } else {
738 LIST_INSERT_HEAD(&filehead, fp, f_list);
739 }
740 p->p_fd->fd_ofiles[i] = fp;
741 fp->f_count = 1;
742 fp->f_cred = p->p_ucred;
743 crhold(fp->f_cred);
744 if (resultfp) {
745 FILE_USE(fp);
746 *resultfp = fp;
747 }
748 if (resultfd)
749 *resultfd = i;
750 return (0);
751 }
752
753 /*
754 * Free a file descriptor.
755 */
756 void
757 ffree(struct file *fp)
758 {
759
760 #ifdef DIAGNOSTIC
761 if (fp->f_usecount)
762 panic("ffree");
763 #endif
764
765 LIST_REMOVE(fp, f_list);
766 crfree(fp->f_cred);
767 #ifdef DIAGNOSTIC
768 fp->f_count = 0;
769 #endif
770 nfiles--;
771 pool_put(&file_pool, fp);
772 }
773
774 /*
775 * Create an initial cwdinfo structure, using the same current and root
776 * directories as p.
777 */
778 struct cwdinfo *
779 cwdinit(struct proc *p)
780 {
781 struct cwdinfo *cwdi;
782
783 cwdi = pool_get(&cwdi_pool, PR_WAITOK);
784
785 cwdi->cwdi_cdir = p->p_cwdi->cwdi_cdir;
786 if (cwdi->cwdi_cdir)
787 VREF(cwdi->cwdi_cdir);
788 cwdi->cwdi_rdir = p->p_cwdi->cwdi_rdir;
789 if (cwdi->cwdi_rdir)
790 VREF(cwdi->cwdi_rdir);
791 cwdi->cwdi_cmask = p->p_cwdi->cwdi_cmask;
792 cwdi->cwdi_refcnt = 1;
793
794 return (cwdi);
795 }
796
797 /*
798 * Make p2 share p1's cwdinfo.
799 */
800 void
801 cwdshare(struct proc *p1, struct proc *p2)
802 {
803
804 p2->p_cwdi = p1->p_cwdi;
805 p1->p_cwdi->cwdi_refcnt++;
806 }
807
808 /*
809 * Make this process not share its cwdinfo structure, maintaining
810 * all cwdinfo state.
811 */
812 void
813 cwdunshare(struct proc *p)
814 {
815 struct cwdinfo *newcwdi;
816
817 if (p->p_cwdi->cwdi_refcnt == 1)
818 return;
819
820 newcwdi = cwdinit(p);
821 cwdfree(p);
822 p->p_cwdi = newcwdi;
823 }
824
825 /*
826 * Release a cwdinfo structure.
827 */
828 void
829 cwdfree(struct proc *p)
830 {
831 struct cwdinfo *cwdi;
832
833 cwdi = p->p_cwdi;
834 if (--cwdi->cwdi_refcnt > 0)
835 return;
836
837 p->p_cwdi = NULL;
838
839 vrele(cwdi->cwdi_cdir);
840 if (cwdi->cwdi_rdir)
841 vrele(cwdi->cwdi_rdir);
842 pool_put(&cwdi_pool, cwdi);
843 }
844
845 /*
846 * Create an initial filedesc structure, using the same current and root
847 * directories as p.
848 */
849 struct filedesc *
850 fdinit(struct proc *p)
851 {
852 struct filedesc0 *newfdp;
853
854 newfdp = pool_get(&filedesc0_pool, PR_WAITOK);
855 memset(newfdp, 0, sizeof(struct filedesc0));
856
857 fdinit1(newfdp);
858
859 return (&newfdp->fd_fd);
860 }
861
862 /*
863 * Initialize a file descriptor table.
864 */
865 void
866 fdinit1(struct filedesc0 *newfdp)
867 {
868
869 newfdp->fd_fd.fd_refcnt = 1;
870 newfdp->fd_fd.fd_ofiles = newfdp->fd_dfiles;
871 newfdp->fd_fd.fd_ofileflags = newfdp->fd_dfileflags;
872 newfdp->fd_fd.fd_nfiles = NDFILE;
873 }
874
875 /*
876 * Make p2 share p1's filedesc structure.
877 */
878 void
879 fdshare(struct proc *p1, struct proc *p2)
880 {
881
882 p2->p_fd = p1->p_fd;
883 p1->p_fd->fd_refcnt++;
884 }
885
886 /*
887 * Make this process not share its filedesc structure, maintaining
888 * all file descriptor state.
889 */
890 void
891 fdunshare(struct proc *p)
892 {
893 struct filedesc *newfd;
894
895 if (p->p_fd->fd_refcnt == 1)
896 return;
897
898 newfd = fdcopy(p);
899 fdfree(p);
900 p->p_fd = newfd;
901 }
902
903 /*
904 * Clear a process's fd table.
905 */
906 void
907 fdclear(struct proc *p)
908 {
909 struct filedesc *newfd;
910
911 newfd = fdinit(p);
912 fdfree(p);
913 p->p_fd = newfd;
914 }
915
916 /*
917 * Copy a filedesc structure.
918 */
919 struct filedesc *
920 fdcopy(struct proc *p)
921 {
922 struct filedesc *newfdp, *fdp;
923 struct file **fpp;
924 int i;
925
926 fdp = p->p_fd;
927 newfdp = pool_get(&filedesc0_pool, PR_WAITOK);
928 memcpy(newfdp, fdp, sizeof(struct filedesc));
929 newfdp->fd_refcnt = 1;
930
931 /*
932 * If the number of open files fits in the internal arrays
933 * of the open file structure, use them, otherwise allocate
934 * additional memory for the number of descriptors currently
935 * in use.
936 */
937 if (newfdp->fd_lastfile < NDFILE) {
938 newfdp->fd_ofiles = ((struct filedesc0 *) newfdp)->fd_dfiles;
939 newfdp->fd_ofileflags =
940 ((struct filedesc0 *) newfdp)->fd_dfileflags;
941 i = NDFILE;
942 } else {
943 /*
944 * Compute the smallest multiple of NDEXTENT needed
945 * for the file descriptors currently in use,
946 * allowing the table to shrink.
947 */
948 i = newfdp->fd_nfiles;
949 while (i >= 2 * NDEXTENT && i > newfdp->fd_lastfile * 2)
950 i /= 2;
951 newfdp->fd_ofiles = malloc(i * OFILESIZE, M_FILEDESC, M_WAITOK);
952 newfdp->fd_ofileflags = (char *) &newfdp->fd_ofiles[i];
953 }
954 newfdp->fd_nfiles = i;
955 memcpy(newfdp->fd_ofiles, fdp->fd_ofiles, i * sizeof(struct file **));
956 memcpy(newfdp->fd_ofileflags, fdp->fd_ofileflags, i * sizeof(char));
957 fpp = newfdp->fd_ofiles;
958 for (i = newfdp->fd_lastfile; i >= 0; i--, fpp++)
959 if (*fpp != NULL)
960 (*fpp)->f_count++;
961 return (newfdp);
962 }
963
964 /*
965 * Release a filedesc structure.
966 */
967 void
968 fdfree(struct proc *p)
969 {
970 struct filedesc *fdp;
971 struct file **fpp, *fp;
972 int i;
973
974 fdp = p->p_fd;
975 if (--fdp->fd_refcnt > 0)
976 return;
977 fpp = fdp->fd_ofiles;
978 for (i = fdp->fd_lastfile; i >= 0; i--, fpp++) {
979 fp = *fpp;
980 if (fp != NULL) {
981 *fpp = NULL;
982 FILE_USE(fp);
983 (void) closef(fp, p);
984 }
985 }
986 p->p_fd = NULL;
987 if (fdp->fd_nfiles > NDFILE)
988 free(fdp->fd_ofiles, M_FILEDESC);
989 pool_put(&filedesc0_pool, fdp);
990 }
991
992 /*
993 * Internal form of close.
994 * Decrement reference count on file structure.
995 * Note: p may be NULL when closing a file
996 * that was being passed in a message.
997 *
998 * Note: we expect the caller is holding a usecount, and expects us
999 * to drop it (the caller thinks the file is going away forever).
1000 */
1001 int
1002 closef(struct file *fp, struct proc *p)
1003 {
1004 struct vnode *vp;
1005 struct flock lf;
1006 int error;
1007
1008 if (fp == NULL)
1009 return (0);
1010
1011 /*
1012 * POSIX record locking dictates that any close releases ALL
1013 * locks owned by this process. This is handled by setting
1014 * a flag in the unlock to free ONLY locks obeying POSIX
1015 * semantics, and not to free BSD-style file locks.
1016 * If the descriptor was in a message, POSIX-style locks
1017 * aren't passed with the descriptor.
1018 */
1019 if (p && (p->p_flag & P_ADVLOCK) && fp->f_type == DTYPE_VNODE) {
1020 lf.l_whence = SEEK_SET;
1021 lf.l_start = 0;
1022 lf.l_len = 0;
1023 lf.l_type = F_UNLCK;
1024 vp = (struct vnode *)fp->f_data;
1025 (void) VOP_ADVLOCK(vp, (caddr_t)p, F_UNLCK, &lf, F_POSIX);
1026 }
1027
1028 /*
1029 * If WANTCLOSE is set, then the reference count on the file
1030 * is 0, but there were multiple users of the file. This can
1031 * happen if a filedesc structure is shared by multiple
1032 * processes.
1033 */
1034 if (fp->f_iflags & FIF_WANTCLOSE) {
1035 /*
1036 * Another user of the file is already closing, and is
1037 * simply waiting for other users of the file to drain.
1038 * Release our usecount, and wake up the closer if it
1039 * is the only remaining use.
1040 */
1041 #ifdef DIAGNOSTIC
1042 if (fp->f_count != 0)
1043 panic("closef: wantclose and count != 0");
1044 if (fp->f_usecount < 2)
1045 panic("closef: wantclose and usecount < 2");
1046 #endif
1047 if (--fp->f_usecount == 1)
1048 wakeup(&fp->f_usecount);
1049 return (0);
1050 } else {
1051 /*
1052 * Decrement the reference count. If we were not the
1053 * last reference, then release our use and just
1054 * return.
1055 */
1056 if (--fp->f_count > 0) {
1057 #ifdef DIAGNOSTIC
1058 if (fp->f_usecount < 1)
1059 panic("closef: no wantclose and usecount < 1");
1060 #endif
1061 fp->f_usecount--;
1062 return (0);
1063 }
1064 if (fp->f_count < 0)
1065 panic("closef: count < 0");
1066 }
1067
1068 /*
1069 * The reference count is now 0. However, there may be
1070 * multiple potential users of this file. This can happen
1071 * if multiple processes shared a single filedesc structure.
1072 *
1073 * Notify these potential users that the file is closing.
1074 * This will prevent them from adding additional uses to
1075 * the file.
1076 */
1077 fp->f_iflags |= FIF_WANTCLOSE;
1078
1079 /*
1080 * We expect the caller to add a use to the file. So, if we
1081 * are the last user, usecount will be 1. If it is not, we
1082 * must wait for the usecount to drain. When it drains back
1083 * to 1, we will be awakened so that we may proceed with the
1084 * close.
1085 */
1086 #ifdef DIAGNOSTIC
1087 if (fp->f_usecount < 1)
1088 panic("closef: usecount < 1");
1089 #endif
1090 while (fp->f_usecount > 1)
1091 (void) tsleep(&fp->f_usecount, PRIBIO, "closef", 0);
1092 #ifdef DIAGNOSTIC
1093 if (fp->f_usecount != 1)
1094 panic("closef: usecount != 1");
1095 #endif
1096
1097 if ((fp->f_flag & FHASLOCK) && fp->f_type == DTYPE_VNODE) {
1098 lf.l_whence = SEEK_SET;
1099 lf.l_start = 0;
1100 lf.l_len = 0;
1101 lf.l_type = F_UNLCK;
1102 vp = (struct vnode *)fp->f_data;
1103 (void) VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK);
1104 }
1105 if (fp->f_ops)
1106 error = (*fp->f_ops->fo_close)(fp, p);
1107 else
1108 error = 0;
1109
1110 /* Nothing references the file now, drop the final use (us). */
1111 fp->f_usecount--;
1112
1113 ffree(fp);
1114 return (error);
1115 }
1116
1117 /*
1118 * Apply an advisory lock on a file descriptor.
1119 *
1120 * Just attempt to get a record lock of the requested type on
1121 * the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0).
1122 */
1123 /* ARGSUSED */
1124 int
1125 sys_flock(struct proc *p, void *v, register_t *retval)
1126 {
1127 struct sys_flock_args /* {
1128 syscallarg(int) fd;
1129 syscallarg(int) how;
1130 } */ *uap = v;
1131 int fd, how, error;
1132 struct filedesc *fdp;
1133 struct file *fp;
1134 struct vnode *vp;
1135 struct flock lf;
1136
1137 fd = SCARG(uap, fd);
1138 how = SCARG(uap, how);
1139 fdp = p->p_fd;
1140 error = 0;
1141
1142 if ((fp = fd_getfile(fdp, fd)) == NULL)
1143 return (EBADF);
1144
1145 FILE_USE(fp);
1146
1147 if (fp->f_type != DTYPE_VNODE) {
1148 error = EOPNOTSUPP;
1149 goto out;
1150 }
1151
1152 vp = (struct vnode *)fp->f_data;
1153 lf.l_whence = SEEK_SET;
1154 lf.l_start = 0;
1155 lf.l_len = 0;
1156 if (how & LOCK_UN) {
1157 lf.l_type = F_UNLCK;
1158 fp->f_flag &= ~FHASLOCK;
1159 error = VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK);
1160 goto out;
1161 }
1162 if (how & LOCK_EX)
1163 lf.l_type = F_WRLCK;
1164 else if (how & LOCK_SH)
1165 lf.l_type = F_RDLCK;
1166 else {
1167 error = EINVAL;
1168 goto out;
1169 }
1170 fp->f_flag |= FHASLOCK;
1171 if (how & LOCK_NB)
1172 error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, F_FLOCK);
1173 else
1174 error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
1175 F_FLOCK|F_WAIT);
1176 out:
1177 FILE_UNUSE(fp, p);
1178 return (error);
1179 }
1180
1181 /*
1182 * File Descriptor pseudo-device driver (/dev/fd/).
1183 *
1184 * Opening minor device N dup()s the file (if any) connected to file
1185 * descriptor N belonging to the calling process. Note that this driver
1186 * consists of only the ``open()'' routine, because all subsequent
1187 * references to this file will be direct to the other driver.
1188 */
1189 /* ARGSUSED */
1190 int
1191 filedescopen(dev_t dev, int mode, int type, struct proc *p)
1192 {
1193
1194 /*
1195 * XXX Kludge: set p->p_dupfd to contain the value of the
1196 * the file descriptor being sought for duplication. The error
1197 * return ensures that the vnode for this device will be released
1198 * by vn_open. Open will detect this special error and take the
1199 * actions in dupfdopen below. Other callers of vn_open or VOP_OPEN
1200 * will simply report the error.
1201 */
1202 p->p_dupfd = minor(dev);
1203 return (ENODEV);
1204 }
1205
1206 /*
1207 * Duplicate the specified descriptor to a free descriptor.
1208 */
1209 int
1210 dupfdopen(struct proc *p, int indx, int dfd, int mode, int error)
1211 {
1212 struct filedesc *fdp;
1213 struct file *wfp, *fp;
1214
1215 fdp = p->p_fd;
1216 /*
1217 * If the to-be-dup'd fd number is greater than the allowed number
1218 * of file descriptors, or the fd to be dup'd has already been
1219 * closed, reject. Note, check for new == old is necessary as
1220 * falloc could allocate an already closed to-be-dup'd descriptor
1221 * as the new descriptor.
1222 */
1223 fp = fdp->fd_ofiles[indx];
1224
1225 if ((wfp = fd_getfile(fdp, dfd)) == NULL)
1226 return (EBADF);
1227
1228 if (fp == wfp)
1229 return (EBADF);
1230
1231 FILE_USE(wfp);
1232
1233 /*
1234 * There are two cases of interest here.
1235 *
1236 * For ENODEV simply dup (dfd) to file descriptor
1237 * (indx) and return.
1238 *
1239 * For ENXIO steal away the file structure from (dfd) and
1240 * store it in (indx). (dfd) is effectively closed by
1241 * this operation.
1242 *
1243 * Any other error code is just returned.
1244 */
1245 switch (error) {
1246 case ENODEV:
1247 /*
1248 * Check that the mode the file is being opened for is a
1249 * subset of the mode of the existing descriptor.
1250 */
1251 if (((mode & (FREAD|FWRITE)) | wfp->f_flag) != wfp->f_flag) {
1252 FILE_UNUSE(wfp, p);
1253 return (EACCES);
1254 }
1255 fdp->fd_ofiles[indx] = wfp;
1256 fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
1257 wfp->f_count++;
1258 fd_used(fdp, indx);
1259 FILE_UNUSE(wfp, p);
1260 return (0);
1261
1262 case ENXIO:
1263 /*
1264 * Steal away the file pointer from dfd, and stuff it into indx.
1265 */
1266 fdp->fd_ofiles[indx] = fdp->fd_ofiles[dfd];
1267 fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
1268 fdp->fd_ofiles[dfd] = NULL;
1269 fdp->fd_ofileflags[dfd] = 0;
1270 /*
1271 * Complete the clean up of the filedesc structure by
1272 * recomputing the various hints.
1273 */
1274 fd_used(fdp, indx);
1275 fd_unused(fdp, dfd);
1276 FILE_UNUSE(wfp, p);
1277 return (0);
1278
1279 default:
1280 FILE_UNUSE(wfp, p);
1281 return (error);
1282 }
1283 /* NOTREACHED */
1284 }
1285
1286 /*
1287 * fcntl call which is being passed to the file's fs.
1288 */
1289 int
1290 fcntl_forfs(int fd, struct proc *p, int cmd, void *arg)
1291 {
1292 struct file *fp;
1293 struct filedesc *fdp;
1294 int error;
1295 u_int size;
1296 caddr_t data, memp;
1297 #define STK_PARAMS 128
1298 char stkbuf[STK_PARAMS];
1299
1300 /* fd's value was validated in sys_fcntl before calling this routine */
1301 fdp = p->p_fd;
1302 fp = fdp->fd_ofiles[fd];
1303
1304 if ((fp->f_flag & (FREAD | FWRITE)) == 0)
1305 return (EBADF);
1306
1307 /*
1308 * Interpret high order word to find amount of data to be
1309 * copied to/from the user's address space.
1310 */
1311 size = (size_t)F_PARAM_LEN(cmd);
1312 if (size > F_PARAM_MAX)
1313 return (EINVAL);
1314 memp = NULL;
1315 if (size > sizeof(stkbuf)) {
1316 memp = (caddr_t)malloc((u_long)size, M_IOCTLOPS, M_WAITOK);
1317 data = memp;
1318 } else
1319 data = stkbuf;
1320 if (cmd & F_FSIN) {
1321 if (size) {
1322 error = copyin(arg, data, size);
1323 if (error) {
1324 if (memp)
1325 free(memp, M_IOCTLOPS);
1326 return (error);
1327 }
1328 } else
1329 *(caddr_t *)data = arg;
1330 } else if ((cmd & F_FSOUT) && size)
1331 /*
1332 * Zero the buffer so the user always
1333 * gets back something deterministic.
1334 */
1335 memset(data, 0, size);
1336 else if (cmd & F_FSVOID)
1337 *(caddr_t *)data = arg;
1338
1339
1340 error = (*fp->f_ops->fo_fcntl)(fp, cmd, data, p);
1341
1342 /*
1343 * Copy any data to user, size was
1344 * already set and checked above.
1345 */
1346 if (error == 0 && (cmd & F_FSOUT) && size)
1347 error = copyout(data, arg, size);
1348 if (memp)
1349 free(memp, M_IOCTLOPS);
1350 return (error);
1351 }
1352
1353 /*
1354 * Close any files on exec?
1355 */
1356 void
1357 fdcloseexec(struct proc *p)
1358 {
1359 struct filedesc *fdp;
1360 int fd;
1361
1362 fdp = p->p_fd;
1363 for (fd = 0; fd <= fdp->fd_lastfile; fd++)
1364 if (fdp->fd_ofileflags[fd] & UF_EXCLOSE)
1365 (void) fdrelease(p, fd);
1366 }
1367