kern_descrip.c revision 1.76 1 /* $NetBSD: kern_descrip.c,v 1.76 2001/06/07 01:29:16 thorpej Exp $ */
2
3 /*
4 * Copyright (c) 1982, 1986, 1989, 1991, 1993
5 * The Regents of the University of California. All rights reserved.
6 * (c) UNIX System Laboratories, Inc.
7 * All or some portions of this file are derived from material licensed
8 * to the University of California by American Telephone and Telegraph
9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
10 * the permission of UNIX System Laboratories, Inc.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. All advertising materials mentioning features or use of this software
21 * must display the following acknowledgement:
22 * This product includes software developed by the University of
23 * California, Berkeley and its contributors.
24 * 4. Neither the name of the University nor the names of its contributors
25 * may be used to endorse or promote products derived from this software
26 * without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
29 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
32 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
33 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
34 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
35 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
37 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38 * SUCH DAMAGE.
39 *
40 * @(#)kern_descrip.c 8.8 (Berkeley) 2/14/95
41 */
42
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/filedesc.h>
46 #include <sys/kernel.h>
47 #include <sys/vnode.h>
48 #include <sys/proc.h>
49 #include <sys/file.h>
50 #include <sys/socket.h>
51 #include <sys/socketvar.h>
52 #include <sys/stat.h>
53 #include <sys/ioctl.h>
54 #include <sys/fcntl.h>
55 #include <sys/malloc.h>
56 #include <sys/pool.h>
57 #include <sys/syslog.h>
58 #include <sys/unistd.h>
59 #include <sys/resourcevar.h>
60 #include <sys/conf.h>
61
62 #include <sys/mount.h>
63 #include <sys/syscallargs.h>
64
65 /*
66 * Descriptor management.
67 */
68 struct filelist filehead; /* head of list of open files */
69 int nfiles; /* actual number of open files */
70 struct pool file_pool; /* memory pool for file structures */
71 struct pool cwdi_pool; /* memory pool for cwdinfo structures */
72 struct pool filedesc0_pool; /* memory pool for filedesc0 structures */
73
74 static __inline void fd_used(struct filedesc *, int);
75 static __inline void fd_unused(struct filedesc *, int);
76 int finishdup(struct proc *, int, int, register_t *);
77 int fcntl_forfs(int, struct proc *, int, void *);
78
79 static __inline void
80 fd_used(struct filedesc *fdp, int fd)
81 {
82
83 if (fd > fdp->fd_lastfile)
84 fdp->fd_lastfile = fd;
85 }
86
87 static __inline void
88 fd_unused(struct filedesc *fdp, int fd)
89 {
90
91 if (fd < fdp->fd_freefile)
92 fdp->fd_freefile = fd;
93 #ifdef DIAGNOSTIC
94 if (fd > fdp->fd_lastfile)
95 panic("fd_unused: fd_lastfile inconsistent");
96 #endif
97 if (fd == fdp->fd_lastfile) {
98 do {
99 fd--;
100 } while (fd >= 0 && fdp->fd_ofiles[fd] == NULL);
101 fdp->fd_lastfile = fd;
102 }
103 }
104
105 /*
106 * System calls on descriptors.
107 */
108
109 /*
110 * Duplicate a file descriptor.
111 */
112 /* ARGSUSED */
113 int
114 sys_dup(struct proc *p, void *v, register_t *retval)
115 {
116 struct sys_dup_args /* {
117 syscallarg(int) fd;
118 } */ *uap = v;
119 struct file *fp;
120 struct filedesc *fdp;
121 int old, new, error;
122
123 fdp = p->p_fd;
124 old = SCARG(uap, fd);
125
126 restart:
127 if ((u_int)old >= fdp->fd_nfiles ||
128 (fp = fdp->fd_ofiles[old]) == NULL ||
129 (fp->f_iflags & FIF_WANTCLOSE) != 0)
130 return (EBADF);
131
132 FILE_USE(fp);
133
134 if ((error = fdalloc(p, 0, &new)) != 0) {
135 if (error == ENOSPC) {
136 fdexpand(p);
137 FILE_UNUSE(fp, p);
138 goto restart;
139 }
140 FILE_UNUSE(fp, p);
141 return (error);
142 }
143
144 /* finishdup() will unuse the descriptors for us */
145 return (finishdup(p, old, new, retval));
146 }
147
148 /*
149 * Duplicate a file descriptor to a particular value.
150 */
151 /* ARGSUSED */
152 int
153 sys_dup2(struct proc *p, void *v, register_t *retval)
154 {
155 struct sys_dup2_args /* {
156 syscallarg(int) from;
157 syscallarg(int) to;
158 } */ *uap = v;
159 struct file *fp;
160 struct filedesc *fdp;
161 int old, new, i, error;
162
163 fdp = p->p_fd;
164 old = SCARG(uap, from);
165 new = SCARG(uap, to);
166
167 restart:
168 if ((u_int)old >= fdp->fd_nfiles ||
169 (fp = fdp->fd_ofiles[old]) == NULL ||
170 (fp->f_iflags & FIF_WANTCLOSE) != 0 ||
171 (u_int)new >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
172 (u_int)new >= maxfiles)
173 return (EBADF);
174 if (old == new) {
175 *retval = new;
176 return (0);
177 }
178
179 FILE_USE(fp);
180
181 if (new >= fdp->fd_nfiles) {
182 if ((error = fdalloc(p, new, &i)) != 0) {
183 if (error == ENOSPC) {
184 fdexpand(p);
185 FILE_UNUSE(fp, p);
186 goto restart;
187 }
188 FILE_UNUSE(fp, p);
189 return (error);
190 }
191 if (new != i)
192 panic("dup2: fdalloc");
193 }
194
195 /*
196 * finishdup() will close the file that's in the `new'
197 * slot, if there's one there.
198 */
199
200 /* finishdup() will unuse the descriptors for us */
201 return (finishdup(p, old, new, retval));
202 }
203
204 /*
205 * The file control system call.
206 */
207 /* ARGSUSED */
208 int
209 sys_fcntl(struct proc *p, void *v, register_t *retval)
210 {
211 struct sys_fcntl_args /* {
212 syscallarg(int) fd;
213 syscallarg(int) cmd;
214 syscallarg(void *) arg;
215 } */ *uap = v;
216 struct filedesc *fdp;
217 struct file *fp;
218 struct vnode *vp;
219 int fd, i, tmp, error, flg, cmd, newmin;
220 struct flock fl;
221
222 fd = SCARG(uap, fd);
223 fdp = p->p_fd;
224 error = 0;
225 flg = F_POSIX;
226
227 restart:
228 if ((u_int)fd >= fdp->fd_nfiles ||
229 (fp = fdp->fd_ofiles[fd]) == NULL ||
230 (fp->f_iflags & FIF_WANTCLOSE) != 0)
231 return (EBADF);
232
233 FILE_USE(fp);
234
235 cmd = SCARG(uap, cmd);
236 if ((cmd & F_FSCTL)) {
237 error = fcntl_forfs(fd, p, cmd, SCARG(uap, arg));
238 goto out;
239 }
240
241 switch (cmd) {
242
243 case F_DUPFD:
244 newmin = (long)SCARG(uap, arg);
245 if ((u_int)newmin >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
246 (u_int)newmin >= maxfiles) {
247 error = EINVAL;
248 goto out;
249 }
250 if ((error = fdalloc(p, newmin, &i)) != 0) {
251 if (error == ENOSPC) {
252 fdexpand(p);
253 FILE_UNUSE(fp, p);
254 goto restart;
255 }
256 goto out;
257 }
258
259 /* finishdup() will unuse the descriptors for us */
260 return (finishdup(p, fd, i, retval));
261
262 case F_GETFD:
263 *retval = fdp->fd_ofileflags[fd] & UF_EXCLOSE ? 1 : 0;
264 break;
265
266 case F_SETFD:
267 if ((long)SCARG(uap, arg) & 1)
268 fdp->fd_ofileflags[fd] |= UF_EXCLOSE;
269 else
270 fdp->fd_ofileflags[fd] &= ~UF_EXCLOSE;
271 break;
272
273 case F_GETFL:
274 *retval = OFLAGS(fp->f_flag);
275 break;
276
277 case F_SETFL:
278 tmp = FFLAGS((long)SCARG(uap, arg)) & FCNTLFLAGS;
279 error = (*fp->f_ops->fo_fcntl)(fp, F_SETFL, (caddr_t)&tmp, p);
280 if (error)
281 goto out;
282 fp->f_flag &= ~FCNTLFLAGS;
283 fp->f_flag |= tmp;
284 tmp = fp->f_flag & FNONBLOCK;
285 error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p);
286 if (error)
287 goto out;
288 tmp = fp->f_flag & FASYNC;
289 error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, (caddr_t)&tmp, p);
290 if (error == 0)
291 goto out;
292 fp->f_flag &= ~FNONBLOCK;
293 tmp = 0;
294 (void) (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p);
295 break;
296
297 case F_GETOWN:
298 if (fp->f_type == DTYPE_SOCKET) {
299 *retval = ((struct socket *)fp->f_data)->so_pgid;
300 goto out;
301 }
302 error = (*fp->f_ops->fo_ioctl)
303 (fp, TIOCGPGRP, (caddr_t)retval, p);
304 *retval = -*retval;
305 break;
306
307 case F_SETOWN:
308 if (fp->f_type == DTYPE_SOCKET) {
309 ((struct socket *)fp->f_data)->so_pgid =
310 (long)SCARG(uap, arg);
311 goto out;
312 }
313 if ((long)SCARG(uap, arg) <= 0) {
314 tmp = (-(long)SCARG(uap, arg));
315 } else {
316 struct proc *p1 = pfind((long)SCARG(uap, arg));
317 if (p1 == 0) {
318 error = ESRCH;
319 goto out;
320 }
321 tmp = (long)p1->p_pgrp->pg_id;
322 }
323 error = (*fp->f_ops->fo_ioctl)
324 (fp, TIOCSPGRP, (caddr_t)&tmp, p);
325 break;
326
327 case F_SETLKW:
328 flg |= F_WAIT;
329 /* Fall into F_SETLK */
330
331 case F_SETLK:
332 if (fp->f_type != DTYPE_VNODE) {
333 error = EINVAL;
334 goto out;
335 }
336 vp = (struct vnode *)fp->f_data;
337 /* Copy in the lock structure */
338 error = copyin((caddr_t)SCARG(uap, arg), (caddr_t)&fl,
339 sizeof(fl));
340 if (error)
341 goto out;
342 if (fl.l_whence == SEEK_CUR)
343 fl.l_start += fp->f_offset;
344 switch (fl.l_type) {
345 case F_RDLCK:
346 if ((fp->f_flag & FREAD) == 0) {
347 error = EBADF;
348 goto out;
349 }
350 p->p_flag |= P_ADVLOCK;
351 error = VOP_ADVLOCK(vp, (caddr_t)p, F_SETLK, &fl, flg);
352 goto out;
353
354 case F_WRLCK:
355 if ((fp->f_flag & FWRITE) == 0) {
356 error = EBADF;
357 goto out;
358 }
359 p->p_flag |= P_ADVLOCK;
360 error = VOP_ADVLOCK(vp, (caddr_t)p, F_SETLK, &fl, flg);
361 goto out;
362
363 case F_UNLCK:
364 error = VOP_ADVLOCK(vp, (caddr_t)p, F_UNLCK, &fl,
365 F_POSIX);
366 goto out;
367
368 default:
369 error = EINVAL;
370 goto out;
371 }
372
373 case F_GETLK:
374 if (fp->f_type != DTYPE_VNODE) {
375 error = EINVAL;
376 goto out;
377 }
378 vp = (struct vnode *)fp->f_data;
379 /* Copy in the lock structure */
380 error = copyin((caddr_t)SCARG(uap, arg), (caddr_t)&fl,
381 sizeof(fl));
382 if (error)
383 goto out;
384 if (fl.l_whence == SEEK_CUR)
385 fl.l_start += fp->f_offset;
386 if (fl.l_type != F_RDLCK &&
387 fl.l_type != F_WRLCK &&
388 fl.l_type != F_UNLCK) {
389 error = EINVAL;
390 goto out;
391 }
392 error = VOP_ADVLOCK(vp, (caddr_t)p, F_GETLK, &fl, F_POSIX);
393 if (error)
394 goto out;
395 error = copyout((caddr_t)&fl, (caddr_t)SCARG(uap, arg),
396 sizeof(fl));
397 break;
398
399 default:
400 error = EINVAL;
401 }
402
403 out:
404 FILE_UNUSE(fp, p);
405 return (error);
406 }
407
408 /*
409 * Common code for dup, dup2, and fcntl(F_DUPFD).
410 */
411 int
412 finishdup(struct proc *p, int old, int new, register_t *retval)
413 {
414 struct filedesc *fdp;
415 struct file *fp, *delfp;
416
417 fdp = p->p_fd;
418
419 /*
420 * If there is a file in the new slot, remember it so we
421 * can close it after we've finished the dup. We need
422 * to do it after the dup is finished, since closing
423 * the file may block.
424 *
425 * Note: `old' is already used for us.
426 */
427 delfp = fdp->fd_ofiles[new];
428
429 fp = fdp->fd_ofiles[old];
430 fdp->fd_ofiles[new] = fp;
431 fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] &~ UF_EXCLOSE;
432 fp->f_count++;
433 /*
434 * Note, don't have to mark it "used" in the table if there
435 * was already a file in the `new' slot.
436 */
437 if (delfp == NULL)
438 fd_used(fdp, new);
439 *retval = new;
440 FILE_UNUSE(fp, p);
441
442 if (delfp != NULL) {
443 FILE_USE(delfp);
444 (void) closef(delfp, p);
445 }
446 return (0);
447 }
448
449 void
450 fdremove(struct filedesc *fdp, int fd)
451 {
452
453 fdp->fd_ofiles[fd] = NULL;
454 fd_unused(fdp, fd);
455 }
456
457 int
458 fdrelease(struct proc *p, int fd)
459 {
460 struct filedesc *fdp;
461 struct file **fpp, *fp;
462
463 fdp = p->p_fd;
464 fpp = &fdp->fd_ofiles[fd];
465 fp = *fpp;
466 if (fp == NULL)
467 return (EBADF);
468
469 FILE_USE(fp);
470
471 *fpp = NULL;
472 fdp->fd_ofileflags[fd] = 0;
473 fd_unused(fdp, fd);
474 return (closef(fp, p));
475 }
476
477 /*
478 * Close a file descriptor.
479 */
480 /* ARGSUSED */
481 int
482 sys_close(struct proc *p, void *v, register_t *retval)
483 {
484 struct sys_close_args /* {
485 syscallarg(int) fd;
486 } */ *uap = v;
487 int fd;
488 struct filedesc *fdp;
489
490 fd = SCARG(uap, fd);
491 fdp = p->p_fd;
492 if ((u_int)fd >= fdp->fd_nfiles)
493 return (EBADF);
494 return (fdrelease(p, fd));
495 }
496
497 /*
498 * Return status information about a file descriptor.
499 */
500 /* ARGSUSED */
501 int
502 sys___fstat13(struct proc *p, void *v, register_t *retval)
503 {
504 struct sys___fstat13_args /* {
505 syscallarg(int) fd;
506 syscallarg(struct stat *) sb;
507 } */ *uap = v;
508 int fd;
509 struct filedesc *fdp;
510 struct file *fp;
511 struct stat ub;
512 int error;
513
514 fd = SCARG(uap, fd);
515 fdp = p->p_fd;
516 if ((u_int)fd >= fdp->fd_nfiles ||
517 (fp = fdp->fd_ofiles[fd]) == NULL ||
518 (fp->f_iflags & FIF_WANTCLOSE) != 0)
519 return (EBADF);
520
521 FILE_USE(fp);
522 error = (*fp->f_ops->fo_stat)(fp, &ub, p);
523 FILE_UNUSE(fp, p);
524
525 if (error == 0)
526 error = copyout(&ub, SCARG(uap, sb), sizeof(ub));
527
528 return (error);
529 }
530
531 /*
532 * Return pathconf information about a file descriptor.
533 */
534 /* ARGSUSED */
535 int
536 sys_fpathconf(struct proc *p, void *v, register_t *retval)
537 {
538 struct sys_fpathconf_args /* {
539 syscallarg(int) fd;
540 syscallarg(int) name;
541 } */ *uap = v;
542 int fd;
543 struct filedesc *fdp;
544 struct file *fp;
545 struct vnode *vp;
546 int error;
547
548 fd = SCARG(uap, fd);
549 fdp = p->p_fd;
550 error = 0;
551
552 if ((u_int)fd >= fdp->fd_nfiles ||
553 (fp = fdp->fd_ofiles[fd]) == NULL ||
554 (fp->f_iflags & FIF_WANTCLOSE) != 0)
555 return (EBADF);
556
557 FILE_USE(fp);
558
559 switch (fp->f_type) {
560
561 case DTYPE_SOCKET:
562 if (SCARG(uap, name) != _PC_PIPE_BUF)
563 error = EINVAL;
564 else
565 *retval = PIPE_BUF;
566 break;
567
568 case DTYPE_VNODE:
569 vp = (struct vnode *)fp->f_data;
570 error = VOP_PATHCONF(vp, SCARG(uap, name), retval);
571 break;
572
573 default:
574 panic("fpathconf");
575 }
576
577 FILE_UNUSE(fp, p);
578 return (error);
579 }
580
581 /*
582 * Allocate a file descriptor for the process.
583 */
584 int fdexpanded; /* XXX: what else uses this? */
585
586 int
587 fdalloc(struct proc *p, int want, int *result)
588 {
589 struct filedesc *fdp;
590 int i, lim, last;
591
592 fdp = p->p_fd;
593
594 /*
595 * Search for a free descriptor starting at the higher
596 * of want or fd_freefile. If that fails, consider
597 * expanding the ofile array.
598 */
599 lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles);
600 for (;;) {
601 last = min(fdp->fd_nfiles, lim);
602 if ((i = want) < fdp->fd_freefile)
603 i = fdp->fd_freefile;
604 for (; i < last; i++) {
605 if (fdp->fd_ofiles[i] == NULL) {
606 fd_used(fdp, i);
607 if (want <= fdp->fd_freefile)
608 fdp->fd_freefile = i;
609 *result = i;
610 return (0);
611 }
612 }
613
614 /* No space in current array. Expand? */
615 if (fdp->fd_nfiles >= lim)
616 return (EMFILE);
617
618 /* Let the caller do it. */
619 return (ENOSPC);
620 }
621 }
622
623 void
624 fdexpand(struct proc *p)
625 {
626 struct filedesc *fdp;
627 int i, nfiles;
628 struct file **newofile;
629 char *newofileflags;
630
631 fdp = p->p_fd;
632
633 if (fdp->fd_nfiles < NDEXTENT)
634 nfiles = NDEXTENT;
635 else
636 nfiles = 2 * fdp->fd_nfiles;
637 newofile = malloc(nfiles * OFILESIZE, M_FILEDESC, M_WAITOK);
638 newofileflags = (char *) &newofile[nfiles];
639 /*
640 * Copy the existing ofile and ofileflags arrays
641 * and zero the new portion of each array.
642 */
643 memcpy(newofile, fdp->fd_ofiles,
644 (i = sizeof(struct file *) * fdp->fd_nfiles));
645 memset((char *)newofile + i, 0,
646 nfiles * sizeof(struct file *) - i);
647 memcpy(newofileflags, fdp->fd_ofileflags,
648 (i = sizeof(char) * fdp->fd_nfiles));
649 memset(newofileflags + i, 0, nfiles * sizeof(char) - i);
650 if (fdp->fd_nfiles > NDFILE)
651 free(fdp->fd_ofiles, M_FILEDESC);
652 fdp->fd_ofiles = newofile;
653 fdp->fd_ofileflags = newofileflags;
654 fdp->fd_nfiles = nfiles;
655 fdexpanded++;
656 }
657
658 /*
659 * Check to see whether n user file descriptors
660 * are available to the process p.
661 */
662 int
663 fdavail(struct proc *p, int n)
664 {
665 struct filedesc *fdp;
666 struct file **fpp;
667 int i, lim;
668
669 fdp = p->p_fd;
670 lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles);
671 if ((i = lim - fdp->fd_nfiles) > 0 && (n -= i) <= 0)
672 return (1);
673 fpp = &fdp->fd_ofiles[fdp->fd_freefile];
674 for (i = min(lim,fdp->fd_nfiles) - fdp->fd_freefile; --i >= 0; fpp++)
675 if (*fpp == NULL && --n <= 0)
676 return (1);
677 return (0);
678 }
679
680 /*
681 * Initialize the data structures necessary for managing files.
682 */
683 void
684 finit(void)
685 {
686
687 pool_init(&file_pool, sizeof(struct file), 0, 0, 0, "filepl",
688 0, pool_page_alloc_nointr, pool_page_free_nointr, M_FILE);
689 pool_init(&cwdi_pool, sizeof(struct cwdinfo), 0, 0, 0, "cwdipl",
690 0, pool_page_alloc_nointr, pool_page_free_nointr, M_FILEDESC);
691 pool_init(&filedesc0_pool, sizeof(struct filedesc0), 0, 0, 0, "fdescpl",
692 0, pool_page_alloc_nointr, pool_page_free_nointr, M_FILEDESC);
693 }
694
695 /*
696 * Create a new open file structure and allocate
697 * a file decriptor for the process that refers to it.
698 */
699 int
700 falloc(struct proc *p, struct file **resultfp, int *resultfd)
701 {
702 struct file *fp, *fq;
703 int error, i;
704
705 restart:
706 if ((error = fdalloc(p, 0, &i)) != 0) {
707 if (error == ENOSPC) {
708 fdexpand(p);
709 goto restart;
710 }
711 return (error);
712 }
713 if (nfiles >= maxfiles) {
714 tablefull("file", "increase kern.maxfiles or MAXFILES");
715 return (ENFILE);
716 }
717 /*
718 * Allocate a new file descriptor.
719 * If the process has file descriptor zero open, add to the list
720 * of open files at that point, otherwise put it at the front of
721 * the list of open files.
722 */
723 nfiles++;
724 fp = pool_get(&file_pool, PR_WAITOK);
725 memset(fp, 0, sizeof(struct file));
726 if ((fq = p->p_fd->fd_ofiles[0]) != NULL) {
727 LIST_INSERT_AFTER(fq, fp, f_list);
728 } else {
729 LIST_INSERT_HEAD(&filehead, fp, f_list);
730 }
731 p->p_fd->fd_ofiles[i] = fp;
732 fp->f_count = 1;
733 fp->f_cred = p->p_ucred;
734 crhold(fp->f_cred);
735 if (resultfp) {
736 FILE_USE(fp);
737 *resultfp = fp;
738 }
739 if (resultfd)
740 *resultfd = i;
741 return (0);
742 }
743
744 /*
745 * Free a file descriptor.
746 */
747 void
748 ffree(struct file *fp)
749 {
750
751 #ifdef DIAGNOSTIC
752 if (fp->f_usecount)
753 panic("ffree");
754 #endif
755
756 LIST_REMOVE(fp, f_list);
757 crfree(fp->f_cred);
758 #ifdef DIAGNOSTIC
759 fp->f_count = 0;
760 #endif
761 nfiles--;
762 pool_put(&file_pool, fp);
763 }
764
765 /*
766 * Create an initial cwdinfo structure, using the same current and root
767 * directories as p.
768 */
769 struct cwdinfo *
770 cwdinit(struct proc *p)
771 {
772 struct cwdinfo *cwdi;
773
774 cwdi = pool_get(&cwdi_pool, PR_WAITOK);
775
776 cwdi->cwdi_cdir = p->p_cwdi->cwdi_cdir;
777 if (cwdi->cwdi_cdir)
778 VREF(cwdi->cwdi_cdir);
779 cwdi->cwdi_rdir = p->p_cwdi->cwdi_rdir;
780 if (cwdi->cwdi_rdir)
781 VREF(cwdi->cwdi_rdir);
782 cwdi->cwdi_cmask = p->p_cwdi->cwdi_cmask;
783 cwdi->cwdi_refcnt = 1;
784
785 return (cwdi);
786 }
787
788 /*
789 * Make p2 share p1's cwdinfo.
790 */
791 void
792 cwdshare(struct proc *p1, struct proc *p2)
793 {
794
795 p2->p_cwdi = p1->p_cwdi;
796 p1->p_cwdi->cwdi_refcnt++;
797 }
798
799 /*
800 * Make this process not share its cwdinfo structure, maintaining
801 * all cwdinfo state.
802 */
803 void
804 cwdunshare(struct proc *p)
805 {
806 struct cwdinfo *newcwdi;
807
808 if (p->p_cwdi->cwdi_refcnt == 1)
809 return;
810
811 newcwdi = cwdinit(p);
812 cwdfree(p);
813 p->p_cwdi = newcwdi;
814 }
815
816 /*
817 * Release a cwdinfo structure.
818 */
819 void
820 cwdfree(struct proc *p)
821 {
822 struct cwdinfo *cwdi;
823
824 cwdi = p->p_cwdi;
825 if (--cwdi->cwdi_refcnt > 0)
826 return;
827
828 p->p_cwdi = NULL;
829
830 vrele(cwdi->cwdi_cdir);
831 if (cwdi->cwdi_rdir)
832 vrele(cwdi->cwdi_rdir);
833 pool_put(&cwdi_pool, cwdi);
834 }
835
836 /*
837 * Create an initial filedesc structure, using the same current and root
838 * directories as p.
839 */
840 struct filedesc *
841 fdinit(struct proc *p)
842 {
843 struct filedesc0 *newfdp;
844
845 newfdp = pool_get(&filedesc0_pool, PR_WAITOK);
846 memset(newfdp, 0, sizeof(struct filedesc0));
847
848 fdinit1(newfdp);
849
850 return (&newfdp->fd_fd);
851 }
852
853 /*
854 * Initialize a file descriptor table.
855 */
856 void
857 fdinit1(struct filedesc0 *newfdp)
858 {
859
860 newfdp->fd_fd.fd_refcnt = 1;
861 newfdp->fd_fd.fd_ofiles = newfdp->fd_dfiles;
862 newfdp->fd_fd.fd_ofileflags = newfdp->fd_dfileflags;
863 newfdp->fd_fd.fd_nfiles = NDFILE;
864 }
865
866 /*
867 * Make p2 share p1's filedesc structure.
868 */
869 void
870 fdshare(struct proc *p1, struct proc *p2)
871 {
872
873 p2->p_fd = p1->p_fd;
874 p1->p_fd->fd_refcnt++;
875 }
876
877 /*
878 * Make this process not share its filedesc structure, maintaining
879 * all file descriptor state.
880 */
881 void
882 fdunshare(struct proc *p)
883 {
884 struct filedesc *newfd;
885
886 if (p->p_fd->fd_refcnt == 1)
887 return;
888
889 newfd = fdcopy(p);
890 fdfree(p);
891 p->p_fd = newfd;
892 }
893
894 /*
895 * Clear a process's fd table.
896 */
897 void
898 fdclear(struct proc *p)
899 {
900 struct filedesc *newfd;
901
902 newfd = fdinit(p);
903 fdfree(p);
904 p->p_fd = newfd;
905 }
906
907 /*
908 * Copy a filedesc structure.
909 */
910 struct filedesc *
911 fdcopy(struct proc *p)
912 {
913 struct filedesc *newfdp, *fdp;
914 struct file **fpp;
915 int i;
916
917 fdp = p->p_fd;
918 newfdp = pool_get(&filedesc0_pool, PR_WAITOK);
919 memcpy(newfdp, fdp, sizeof(struct filedesc));
920 newfdp->fd_refcnt = 1;
921
922 /*
923 * If the number of open files fits in the internal arrays
924 * of the open file structure, use them, otherwise allocate
925 * additional memory for the number of descriptors currently
926 * in use.
927 */
928 if (newfdp->fd_lastfile < NDFILE) {
929 newfdp->fd_ofiles = ((struct filedesc0 *) newfdp)->fd_dfiles;
930 newfdp->fd_ofileflags =
931 ((struct filedesc0 *) newfdp)->fd_dfileflags;
932 i = NDFILE;
933 } else {
934 /*
935 * Compute the smallest multiple of NDEXTENT needed
936 * for the file descriptors currently in use,
937 * allowing the table to shrink.
938 */
939 i = newfdp->fd_nfiles;
940 while (i >= 2 * NDEXTENT && i > newfdp->fd_lastfile * 2)
941 i /= 2;
942 newfdp->fd_ofiles = malloc(i * OFILESIZE, M_FILEDESC, M_WAITOK);
943 newfdp->fd_ofileflags = (char *) &newfdp->fd_ofiles[i];
944 }
945 newfdp->fd_nfiles = i;
946 memcpy(newfdp->fd_ofiles, fdp->fd_ofiles, i * sizeof(struct file **));
947 memcpy(newfdp->fd_ofileflags, fdp->fd_ofileflags, i * sizeof(char));
948 fpp = newfdp->fd_ofiles;
949 for (i = newfdp->fd_lastfile; i >= 0; i--, fpp++)
950 if (*fpp != NULL)
951 (*fpp)->f_count++;
952 return (newfdp);
953 }
954
955 /*
956 * Release a filedesc structure.
957 */
958 void
959 fdfree(struct proc *p)
960 {
961 struct filedesc *fdp;
962 struct file **fpp, *fp;
963 int i;
964
965 fdp = p->p_fd;
966 if (--fdp->fd_refcnt > 0)
967 return;
968 fpp = fdp->fd_ofiles;
969 for (i = fdp->fd_lastfile; i >= 0; i--, fpp++) {
970 fp = *fpp;
971 if (fp != NULL) {
972 *fpp = NULL;
973 FILE_USE(fp);
974 (void) closef(fp, p);
975 }
976 }
977 p->p_fd = NULL;
978 if (fdp->fd_nfiles > NDFILE)
979 free(fdp->fd_ofiles, M_FILEDESC);
980 pool_put(&filedesc0_pool, fdp);
981 }
982
983 /*
984 * Internal form of close.
985 * Decrement reference count on file structure.
986 * Note: p may be NULL when closing a file
987 * that was being passed in a message.
988 *
989 * Note: we expect the caller is holding a usecount, and expects us
990 * to drop it (the caller thinks the file is going away forever).
991 */
992 int
993 closef(struct file *fp, struct proc *p)
994 {
995 struct vnode *vp;
996 struct flock lf;
997 int error;
998
999 if (fp == NULL)
1000 return (0);
1001
1002 /*
1003 * POSIX record locking dictates that any close releases ALL
1004 * locks owned by this process. This is handled by setting
1005 * a flag in the unlock to free ONLY locks obeying POSIX
1006 * semantics, and not to free BSD-style file locks.
1007 * If the descriptor was in a message, POSIX-style locks
1008 * aren't passed with the descriptor.
1009 */
1010 if (p && (p->p_flag & P_ADVLOCK) && fp->f_type == DTYPE_VNODE) {
1011 lf.l_whence = SEEK_SET;
1012 lf.l_start = 0;
1013 lf.l_len = 0;
1014 lf.l_type = F_UNLCK;
1015 vp = (struct vnode *)fp->f_data;
1016 (void) VOP_ADVLOCK(vp, (caddr_t)p, F_UNLCK, &lf, F_POSIX);
1017 }
1018
1019 /*
1020 * If WANTCLOSE is set, then the reference count on the file
1021 * is 0, but there were multiple users of the file. This can
1022 * happen if a filedesc structure is shared by multiple
1023 * processes.
1024 */
1025 if (fp->f_iflags & FIF_WANTCLOSE) {
1026 /*
1027 * Another user of the file is already closing, and is
1028 * simply waiting for other users of the file to drain.
1029 * Release our usecount, and wake up the closer if it
1030 * is the only remaining use.
1031 */
1032 #ifdef DIAGNOSTIC
1033 if (fp->f_count != 0)
1034 panic("closef: wantclose and count != 0");
1035 if (fp->f_usecount < 2)
1036 panic("closef: wantclose and usecount < 2");
1037 #endif
1038 if (--fp->f_usecount == 1)
1039 wakeup(&fp->f_usecount);
1040 return (0);
1041 } else {
1042 /*
1043 * Decrement the reference count. If we were not the
1044 * last reference, then release our use and just
1045 * return.
1046 */
1047 if (--fp->f_count > 0) {
1048 #ifdef DIAGNOSTIC
1049 if (fp->f_usecount < 1)
1050 panic("closef: no wantclose and usecount < 1");
1051 #endif
1052 fp->f_usecount--;
1053 return (0);
1054 }
1055 if (fp->f_count < 0)
1056 panic("closef: count < 0");
1057 }
1058
1059 /*
1060 * The reference count is now 0. However, there may be
1061 * multiple potential users of this file. This can happen
1062 * if multiple processes shared a single filedesc structure.
1063 *
1064 * Notify these potential users that the file is closing.
1065 * This will prevent them from adding additional uses to
1066 * the file.
1067 */
1068 fp->f_iflags |= FIF_WANTCLOSE;
1069
1070 /*
1071 * We expect the caller to add a use to the file. So, if we
1072 * are the last user, usecount will be 1. If it is not, we
1073 * must wait for the usecount to drain. When it drains back
1074 * to 1, we will be awakened so that we may proceed with the
1075 * close.
1076 */
1077 #ifdef DIAGNOSTIC
1078 if (fp->f_usecount < 1)
1079 panic("closef: usecount < 1");
1080 #endif
1081 while (fp->f_usecount > 1)
1082 (void) tsleep(&fp->f_usecount, PRIBIO, "closef", 0);
1083 #ifdef DIAGNOSTIC
1084 if (fp->f_usecount != 1)
1085 panic("closef: usecount != 1");
1086 #endif
1087
1088 if ((fp->f_flag & FHASLOCK) && fp->f_type == DTYPE_VNODE) {
1089 lf.l_whence = SEEK_SET;
1090 lf.l_start = 0;
1091 lf.l_len = 0;
1092 lf.l_type = F_UNLCK;
1093 vp = (struct vnode *)fp->f_data;
1094 (void) VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK);
1095 }
1096 if (fp->f_ops)
1097 error = (*fp->f_ops->fo_close)(fp, p);
1098 else
1099 error = 0;
1100
1101 /* Nothing references the file now, drop the final use (us). */
1102 fp->f_usecount--;
1103
1104 ffree(fp);
1105 return (error);
1106 }
1107
1108 /*
1109 * Apply an advisory lock on a file descriptor.
1110 *
1111 * Just attempt to get a record lock of the requested type on
1112 * the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0).
1113 */
1114 /* ARGSUSED */
1115 int
1116 sys_flock(struct proc *p, void *v, register_t *retval)
1117 {
1118 struct sys_flock_args /* {
1119 syscallarg(int) fd;
1120 syscallarg(int) how;
1121 } */ *uap = v;
1122 int fd, how, error;
1123 struct filedesc *fdp;
1124 struct file *fp;
1125 struct vnode *vp;
1126 struct flock lf;
1127
1128 fd = SCARG(uap, fd);
1129 how = SCARG(uap, how);
1130 fdp = p->p_fd;
1131 error = 0;
1132 if ((u_int)fd >= fdp->fd_nfiles ||
1133 (fp = fdp->fd_ofiles[fd]) == NULL ||
1134 (fp->f_iflags & FIF_WANTCLOSE) != 0)
1135 return (EBADF);
1136
1137 FILE_USE(fp);
1138
1139 if (fp->f_type != DTYPE_VNODE) {
1140 error = EOPNOTSUPP;
1141 goto out;
1142 }
1143
1144 vp = (struct vnode *)fp->f_data;
1145 lf.l_whence = SEEK_SET;
1146 lf.l_start = 0;
1147 lf.l_len = 0;
1148 if (how & LOCK_UN) {
1149 lf.l_type = F_UNLCK;
1150 fp->f_flag &= ~FHASLOCK;
1151 error = VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK);
1152 goto out;
1153 }
1154 if (how & LOCK_EX)
1155 lf.l_type = F_WRLCK;
1156 else if (how & LOCK_SH)
1157 lf.l_type = F_RDLCK;
1158 else {
1159 error = EINVAL;
1160 goto out;
1161 }
1162 fp->f_flag |= FHASLOCK;
1163 if (how & LOCK_NB)
1164 error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, F_FLOCK);
1165 else
1166 error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
1167 F_FLOCK|F_WAIT);
1168 out:
1169 FILE_UNUSE(fp, p);
1170 return (error);
1171 }
1172
1173 /*
1174 * File Descriptor pseudo-device driver (/dev/fd/).
1175 *
1176 * Opening minor device N dup()s the file (if any) connected to file
1177 * descriptor N belonging to the calling process. Note that this driver
1178 * consists of only the ``open()'' routine, because all subsequent
1179 * references to this file will be direct to the other driver.
1180 */
1181 /* ARGSUSED */
1182 int
1183 filedescopen(dev_t dev, int mode, int type, struct proc *p)
1184 {
1185
1186 /*
1187 * XXX Kludge: set p->p_dupfd to contain the value of the
1188 * the file descriptor being sought for duplication. The error
1189 * return ensures that the vnode for this device will be released
1190 * by vn_open. Open will detect this special error and take the
1191 * actions in dupfdopen below. Other callers of vn_open or VOP_OPEN
1192 * will simply report the error.
1193 */
1194 p->p_dupfd = minor(dev);
1195 return (ENODEV);
1196 }
1197
1198 /*
1199 * Duplicate the specified descriptor to a free descriptor.
1200 */
1201 int
1202 dupfdopen(struct proc *p, int indx, int dfd, int mode, int error)
1203 {
1204 struct filedesc *fdp;
1205 struct file *wfp, *fp;
1206
1207 fdp = p->p_fd;
1208 /*
1209 * If the to-be-dup'd fd number is greater than the allowed number
1210 * of file descriptors, or the fd to be dup'd has already been
1211 * closed, reject. Note, check for new == old is necessary as
1212 * falloc could allocate an already closed to-be-dup'd descriptor
1213 * as the new descriptor.
1214 */
1215 fp = fdp->fd_ofiles[indx];
1216 if ((u_int)dfd >= fdp->fd_nfiles ||
1217 (wfp = fdp->fd_ofiles[dfd]) == NULL ||
1218 (wfp->f_iflags & FIF_WANTCLOSE) != 0 ||
1219 fp == wfp)
1220 return (EBADF);
1221
1222 FILE_USE(wfp);
1223
1224 /*
1225 * There are two cases of interest here.
1226 *
1227 * For ENODEV simply dup (dfd) to file descriptor
1228 * (indx) and return.
1229 *
1230 * For ENXIO steal away the file structure from (dfd) and
1231 * store it in (indx). (dfd) is effectively closed by
1232 * this operation.
1233 *
1234 * Any other error code is just returned.
1235 */
1236 switch (error) {
1237 case ENODEV:
1238 /*
1239 * Check that the mode the file is being opened for is a
1240 * subset of the mode of the existing descriptor.
1241 */
1242 if (((mode & (FREAD|FWRITE)) | wfp->f_flag) != wfp->f_flag) {
1243 FILE_UNUSE(wfp, p);
1244 return (EACCES);
1245 }
1246 fdp->fd_ofiles[indx] = wfp;
1247 fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
1248 wfp->f_count++;
1249 fd_used(fdp, indx);
1250 FILE_UNUSE(wfp, p);
1251 return (0);
1252
1253 case ENXIO:
1254 /*
1255 * Steal away the file pointer from dfd, and stuff it into indx.
1256 */
1257 fdp->fd_ofiles[indx] = fdp->fd_ofiles[dfd];
1258 fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
1259 fdp->fd_ofiles[dfd] = NULL;
1260 fdp->fd_ofileflags[dfd] = 0;
1261 /*
1262 * Complete the clean up of the filedesc structure by
1263 * recomputing the various hints.
1264 */
1265 fd_used(fdp, indx);
1266 fd_unused(fdp, dfd);
1267 FILE_UNUSE(wfp, p);
1268 return (0);
1269
1270 default:
1271 FILE_UNUSE(wfp, p);
1272 return (error);
1273 }
1274 /* NOTREACHED */
1275 }
1276
1277 /*
1278 * fcntl call which is being passed to the file's fs.
1279 */
1280 int
1281 fcntl_forfs(int fd, struct proc *p, int cmd, void *arg)
1282 {
1283 struct file *fp;
1284 struct filedesc *fdp;
1285 int error;
1286 u_int size;
1287 caddr_t data, memp;
1288 #define STK_PARAMS 128
1289 char stkbuf[STK_PARAMS];
1290
1291 /* fd's value was validated in sys_fcntl before calling this routine */
1292 fdp = p->p_fd;
1293 fp = fdp->fd_ofiles[fd];
1294
1295 if ((fp->f_flag & (FREAD | FWRITE)) == 0)
1296 return (EBADF);
1297
1298 /*
1299 * Interpret high order word to find amount of data to be
1300 * copied to/from the user's address space.
1301 */
1302 size = (size_t)F_PARAM_LEN(cmd);
1303 if (size > F_PARAM_MAX)
1304 return (EINVAL);
1305 memp = NULL;
1306 if (size > sizeof(stkbuf)) {
1307 memp = (caddr_t)malloc((u_long)size, M_IOCTLOPS, M_WAITOK);
1308 data = memp;
1309 } else
1310 data = stkbuf;
1311 if (cmd & F_FSIN) {
1312 if (size) {
1313 error = copyin(arg, data, size);
1314 if (error) {
1315 if (memp)
1316 free(memp, M_IOCTLOPS);
1317 return (error);
1318 }
1319 } else
1320 *(caddr_t *)data = arg;
1321 } else if ((cmd & F_FSOUT) && size)
1322 /*
1323 * Zero the buffer so the user always
1324 * gets back something deterministic.
1325 */
1326 memset(data, 0, size);
1327 else if (cmd & F_FSVOID)
1328 *(caddr_t *)data = arg;
1329
1330
1331 error = (*fp->f_ops->fo_fcntl)(fp, cmd, data, p);
1332
1333 /*
1334 * Copy any data to user, size was
1335 * already set and checked above.
1336 */
1337 if (error == 0 && (cmd & F_FSOUT) && size)
1338 error = copyout(data, arg, size);
1339 if (memp)
1340 free(memp, M_IOCTLOPS);
1341 return (error);
1342 }
1343
1344 /*
1345 * Close any files on exec?
1346 */
1347 void
1348 fdcloseexec(struct proc *p)
1349 {
1350 struct filedesc *fdp;
1351 int fd;
1352
1353 fdp = p->p_fd;
1354 for (fd = 0; fd <= fdp->fd_lastfile; fd++)
1355 if (fdp->fd_ofileflags[fd] & UF_EXCLOSE)
1356 (void) fdrelease(p, fd);
1357 }
1358