kern_descrip.c revision 1.170 1 /* $NetBSD: kern_descrip.c,v 1.170 2008/01/27 16:16:50 martin Exp $ */
2
3 /*
4 * Copyright (c) 1982, 1986, 1989, 1991, 1993
5 * The Regents of the University of California. All rights reserved.
6 * (c) UNIX System Laboratories, Inc.
7 * All or some portions of this file are derived from material licensed
8 * to the University of California by American Telephone and Telegraph
9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
10 * the permission of UNIX System Laboratories, Inc.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 *
36 * @(#)kern_descrip.c 8.8 (Berkeley) 2/14/95
37 */
38
39 #include <sys/cdefs.h>
40 __KERNEL_RCSID(0, "$NetBSD: kern_descrip.c,v 1.170 2008/01/27 16:16:50 martin Exp $");
41
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/filedesc.h>
45 #include <sys/kernel.h>
46 #include <sys/vnode.h>
47 #include <sys/proc.h>
48 #include <sys/file.h>
49 #include <sys/namei.h>
50 #include <sys/socket.h>
51 #include <sys/socketvar.h>
52 #include <sys/stat.h>
53 #include <sys/ioctl.h>
54 #include <sys/fcntl.h>
55 #include <sys/malloc.h>
56 #include <sys/pool.h>
57 #include <sys/syslog.h>
58 #include <sys/unistd.h>
59 #include <sys/resourcevar.h>
60 #include <sys/conf.h>
61 #include <sys/event.h>
62 #include <sys/kauth.h>
63 #include <sys/atomic.h>
64
65 #include <sys/mount.h>
66 #include <sys/syscallargs.h>
67
68 static int cwdi_ctor(void *, void *, int);
69 static void cwdi_dtor(void *, void *);
70 static int file_ctor(void *, void *, int);
71 static void file_dtor(void *, void *);
72 int do_posix_fadvise(struct lwp *l, int fd, off_t offset,
73 off_t len, int advice, register_t *retval);
74
75 /*
76 * Descriptor management.
77 */
78 struct filelist filehead; /* head of list of open files */
79 u_int nfiles; /* actual number of open files */
80
81 static pool_cache_t cwdi_cache;
82 static pool_cache_t filedesc0_cache;
83 static pool_cache_t file_cache;
84
85 /* Global file list lock */
86 kmutex_t filelist_lock;
87
88 MALLOC_DEFINE(M_FILE, "file", "Open file structure");
89 MALLOC_DEFINE(M_FILEDESC, "file desc", "Open file descriptor table");
90 MALLOC_DEFINE(M_IOCTLOPS, "ioctlops", "ioctl data buffer");
91
92 static inline int
93 find_next_zero(uint32_t *bitmap, int want, u_int bits)
94 {
95 int i, off, maxoff;
96 uint32_t sub;
97
98 if (want > bits)
99 return -1;
100
101 off = want >> NDENTRYSHIFT;
102 i = want & NDENTRYMASK;
103 if (i) {
104 sub = bitmap[off] | ((u_int)~0 >> (NDENTRIES - i));
105 if (sub != ~0)
106 goto found;
107 off++;
108 }
109
110 maxoff = NDLOSLOTS(bits);
111 while (off < maxoff) {
112 if ((sub = bitmap[off]) != ~0)
113 goto found;
114 off++;
115 }
116
117 return (-1);
118
119 found:
120 return (off << NDENTRYSHIFT) + ffs(~sub) - 1;
121 }
122
123 static int
124 find_last_set(struct filedesc *fd, int last)
125 {
126 int off, i;
127 struct file **ofiles = fd->fd_ofiles;
128 uint32_t *bitmap = fd->fd_lomap;
129
130 off = (last - 1) >> NDENTRYSHIFT;
131
132 while (off >= 0 && !bitmap[off])
133 off--;
134
135 if (off < 0)
136 return (-1);
137
138 i = ((off + 1) << NDENTRYSHIFT) - 1;
139 if (i >= last)
140 i = last - 1;
141
142 while (i > 0 && ofiles[i] == NULL)
143 i--;
144
145 return (i);
146 }
147
148 static inline void
149 fd_used(struct filedesc *fdp, int fd)
150 {
151 u_int off = fd >> NDENTRYSHIFT;
152
153 KASSERT(rw_write_held(&fdp->fd_lock));
154 KDASSERT((fdp->fd_lomap[off] & (1 << (fd & NDENTRYMASK))) == 0);
155
156 fdp->fd_lomap[off] |= 1 << (fd & NDENTRYMASK);
157 if (fdp->fd_lomap[off] == ~0) {
158 KDASSERT((fdp->fd_himap[off >> NDENTRYSHIFT] &
159 (1 << (off & NDENTRYMASK))) == 0);
160 fdp->fd_himap[off >> NDENTRYSHIFT] |= 1 << (off & NDENTRYMASK);
161 }
162
163 if (fd > fdp->fd_lastfile)
164 fdp->fd_lastfile = fd;
165 }
166
167 static inline void
168 fd_unused(struct filedesc *fdp, int fd)
169 {
170 u_int off = fd >> NDENTRYSHIFT;
171
172 KASSERT(rw_write_held(&fdp->fd_lock));
173 if (fd < fdp->fd_freefile)
174 fdp->fd_freefile = fd;
175
176 if (fdp->fd_lomap[off] == ~0) {
177 KDASSERT((fdp->fd_himap[off >> NDENTRYSHIFT] &
178 (1 << (off & NDENTRYMASK))) != 0);
179 fdp->fd_himap[off >> NDENTRYSHIFT] &=
180 ~(1 << (off & NDENTRYMASK));
181 }
182 KDASSERT((fdp->fd_lomap[off] & (1 << (fd & NDENTRYMASK))) != 0);
183 fdp->fd_lomap[off] &= ~(1 << (fd & NDENTRYMASK));
184
185 #ifdef DIAGNOSTIC
186 if (fd > fdp->fd_lastfile)
187 panic("fd_unused: fd_lastfile inconsistent");
188 #endif
189 if (fd == fdp->fd_lastfile)
190 fdp->fd_lastfile = find_last_set(fdp, fd);
191 }
192
193 /*
194 * Lookup the file structure corresponding to a file descriptor
195 * and return it locked.
196 * Note: typical usage is: `fp = fd_getfile(..); FILE_USE(fp);'
197 * The locking strategy has been optimised for this case, i.e.
198 * fd_getfile() returns the file locked while FILE_USE() will increment
199 * the file's use count and unlock.
200 */
201 struct file *
202 fd_getfile(struct filedesc *fdp, int fd)
203 {
204 struct file *fp;
205
206 rw_enter(&fdp->fd_lock, RW_READER);
207 if ((u_int) fd >= fdp->fd_nfiles || (fp = fdp->fd_ofiles[fd]) == NULL) {
208 rw_exit(&fdp->fd_lock);
209 return (NULL);
210 }
211
212 FILE_LOCK(fp);
213 if (FILE_IS_USABLE(fp) == 0) {
214 FILE_UNLOCK(fp);
215 rw_exit(&fdp->fd_lock);
216 return (NULL);
217 }
218 rw_exit(&fdp->fd_lock);
219
220 return (fp);
221 }
222
223 /*
224 * Common code for dup, dup2, and fcntl(F_DUPFD).
225 */
226 static int
227 finishdup(struct lwp *l, int old, int new, register_t *retval)
228 {
229 struct filedesc *fdp;
230 struct file *fp, *delfp;
231
232 fdp = l->l_proc->p_fd;
233
234 /*
235 * If there is a file in the new slot, remember it so we
236 * can close it after we've finished the dup. We need
237 * to do it after the dup is finished, since closing
238 * the file may block.
239 *
240 * Note: `old' is already used for us.
241 * Note: Caller already marked `new' slot "used".
242 */
243 rw_enter(&fdp->fd_lock, RW_WRITER);
244 delfp = fdp->fd_ofiles[new];
245
246 fp = fdp->fd_ofiles[old];
247 KDASSERT(fp != NULL);
248 fdp->fd_ofiles[new] = fp;
249 fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] &~ UF_EXCLOSE;
250 rw_exit(&fdp->fd_lock);
251
252 *retval = new;
253 FILE_LOCK(fp);
254 fp->f_count++;
255 FILE_UNUSE_HAVELOCK(fp, l);
256
257 if (delfp != NULL) {
258 FILE_LOCK(delfp);
259 FILE_USE(delfp);
260 if (new < fdp->fd_knlistsize)
261 knote_fdclose(l, new);
262 (void) closef(delfp, l);
263 }
264 return (0);
265 }
266
267 /*
268 * Initialize the descriptor system.
269 */
270 void
271 filedesc_init(void)
272 {
273
274 mutex_init(&filelist_lock, MUTEX_DEFAULT, IPL_NONE);
275
276 file_cache = pool_cache_init(sizeof(struct file), 0, 0, 0,
277 "filepl", NULL, IPL_NONE, file_ctor, file_dtor, NULL);
278 KASSERT(file_cache != NULL);
279
280 cwdi_cache = pool_cache_init(sizeof(struct cwdinfo), 0, 0, 0,
281 "cwdipl", NULL, IPL_NONE, cwdi_ctor, cwdi_dtor, NULL);
282 KASSERT(cwdi_cache != NULL);
283
284 filedesc0_cache = pool_cache_init(sizeof(struct filedesc0), 0, 0, 0,
285 "fdescpl", NULL, IPL_NONE, NULL, NULL, NULL);
286 KASSERT(filedesc0_cache != NULL);
287 }
288
289 /*
290 * System calls on descriptors.
291 */
292
293 /*
294 * Duplicate a file descriptor.
295 */
296 /* ARGSUSED */
297 int
298 sys_dup(struct lwp *l, const struct sys_dup_args *uap, register_t *retval)
299 {
300 /* {
301 syscallarg(int) fd;
302 } */
303 struct file *fp;
304 struct filedesc *fdp;
305 struct proc *p;
306 int old, new, error;
307
308 p = l->l_proc;
309 fdp = p->p_fd;
310 old = SCARG(uap, fd);
311
312 restart:
313 if ((fp = fd_getfile(fdp, old)) == NULL)
314 return (EBADF);
315
316 FILE_USE(fp);
317
318 if ((error = fdalloc(p, 0, &new)) != 0) {
319 if (error == ENOSPC) {
320 fdexpand(p);
321 FILE_UNUSE(fp, l);
322 goto restart;
323 }
324 FILE_UNUSE(fp, l);
325 return (error);
326 }
327
328 /* finishdup() will unuse the descriptors for us */
329 return (finishdup(l, old, new, retval));
330 }
331
332 /*
333 * Duplicate a file descriptor to a particular value.
334 */
335 /* ARGSUSED */
336 int
337 sys_dup2(struct lwp *l, const struct sys_dup2_args *uap, register_t *retval)
338 {
339 /* {
340 syscallarg(int) from;
341 syscallarg(int) to;
342 } */
343 struct file *fp;
344 struct filedesc *fdp;
345 struct proc *p;
346 int old, new, i, error;
347
348 p = l->l_proc;
349 fdp = p->p_fd;
350 old = SCARG(uap, from);
351 new = SCARG(uap, to);
352
353 restart:
354 if ((fp = fd_getfile(fdp, old)) == NULL)
355 return (EBADF);
356
357 if ((u_int)new >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
358 (u_int)new >= maxfiles) {
359 FILE_UNLOCK(fp);
360 return (EBADF);
361 }
362
363 if (old == new) {
364 FILE_UNLOCK(fp);
365 *retval = new;
366 return (0);
367 }
368
369 FILE_USE(fp);
370
371 if (new >= fdp->fd_nfiles) {
372 if ((error = fdalloc(p, new, &i)) != 0) {
373 if (error == ENOSPC) {
374 fdexpand(p);
375 FILE_UNUSE(fp, l);
376 goto restart;
377 }
378 FILE_UNUSE(fp, l);
379 return (error);
380 }
381 if (new != i)
382 panic("dup2: fdalloc");
383 } else {
384 rw_enter(&fdp->fd_lock, RW_WRITER);
385 /*
386 * Mark `new' slot "used" only if it was empty.
387 */
388 if (fdp->fd_ofiles[new] == NULL)
389 fd_used(fdp, new);
390 rw_exit(&fdp->fd_lock);
391 }
392
393 /*
394 * finishdup() will close the file that's in the `new'
395 * slot, if there's one there.
396 */
397
398 /* finishdup() will unuse the descriptors for us */
399 return (finishdup(l, old, new, retval));
400 }
401
402 /*
403 * fcntl call which is being passed to the file's fs.
404 */
405 static int
406 fcntl_forfs(int fd, struct lwp *l, int cmd, void *arg)
407 {
408 struct file *fp;
409 struct filedesc *fdp;
410 int error;
411 u_int size;
412 void *data, *memp;
413 #define STK_PARAMS 128
414 char stkbuf[STK_PARAMS];
415
416 /* fd's value was validated in sys_fcntl before calling this routine */
417 fdp = l->l_proc->p_fd;
418 fp = fdp->fd_ofiles[fd];
419
420 if ((fp->f_flag & (FREAD | FWRITE)) == 0)
421 return (EBADF);
422
423 /*
424 * Interpret high order word to find amount of data to be
425 * copied to/from the user's address space.
426 */
427 size = (size_t)F_PARAM_LEN(cmd);
428 if (size > F_PARAM_MAX)
429 return (EINVAL);
430 memp = NULL;
431 if (size > sizeof(stkbuf)) {
432 memp = malloc((u_long)size, M_IOCTLOPS, M_WAITOK);
433 data = memp;
434 } else
435 data = stkbuf;
436 if (cmd & F_FSIN) {
437 if (size) {
438 error = copyin(arg, data, size);
439 if (error) {
440 if (memp)
441 free(memp, M_IOCTLOPS);
442 return (error);
443 }
444 } else
445 *(void **)data = arg;
446 } else if ((cmd & F_FSOUT) && size)
447 /*
448 * Zero the buffer so the user always
449 * gets back something deterministic.
450 */
451 memset(data, 0, size);
452 else if (cmd & F_FSVOID)
453 *(void **)data = arg;
454
455
456 error = (*fp->f_ops->fo_fcntl)(fp, cmd, data, l);
457
458 /*
459 * Copy any data to user, size was
460 * already set and checked above.
461 */
462 if (error == 0 && (cmd & F_FSOUT) && size)
463 error = copyout(data, arg, size);
464 if (memp)
465 free(memp, M_IOCTLOPS);
466 return (error);
467 }
468
469 int
470 do_fcntl_lock(struct lwp *l, int fd, int cmd, struct flock *fl)
471 {
472 struct file *fp;
473 struct vnode *vp;
474 struct proc *p = l->l_proc;
475 int error, flg;
476
477 if ((fp = fd_getfile(p->p_fd, fd)) == NULL)
478 return (EBADF);
479
480 FILE_USE(fp);
481
482 if (fp->f_type != DTYPE_VNODE) {
483 error = EINVAL;
484 goto out;
485 }
486 vp = (struct vnode *)fp->f_data;
487 if (fl->l_whence == SEEK_CUR)
488 fl->l_start += fp->f_offset;
489
490 flg = F_POSIX;
491
492 switch (cmd) {
493
494 case F_SETLKW:
495 flg |= F_WAIT;
496 /* Fall into F_SETLK */
497
498 case F_SETLK:
499 switch (fl->l_type) {
500 case F_RDLCK:
501 if ((fp->f_flag & FREAD) == 0) {
502 error = EBADF;
503 goto out;
504 }
505 p->p_flag |= PK_ADVLOCK;
506 error = VOP_ADVLOCK(vp, p, F_SETLK, fl, flg);
507 goto out;
508
509 case F_WRLCK:
510 if ((fp->f_flag & FWRITE) == 0) {
511 error = EBADF;
512 goto out;
513 }
514 p->p_flag |= PK_ADVLOCK;
515 error = VOP_ADVLOCK(vp, p, F_SETLK, fl, flg);
516 goto out;
517
518 case F_UNLCK:
519 error = VOP_ADVLOCK(vp, p, F_UNLCK, fl, F_POSIX);
520 goto out;
521
522 default:
523 error = EINVAL;
524 goto out;
525 }
526
527 case F_GETLK:
528 if (fl->l_type != F_RDLCK &&
529 fl->l_type != F_WRLCK &&
530 fl->l_type != F_UNLCK) {
531 error = EINVAL;
532 goto out;
533 }
534 error = VOP_ADVLOCK(vp, p, F_GETLK, fl, F_POSIX);
535 break;
536
537 default:
538 error = EINVAL;
539 break;
540 }
541
542 out:
543 FILE_UNUSE(fp, l);
544 return error;
545 }
546
547 /*
548 * The file control system call.
549 */
550 /* ARGSUSED */
551 int
552 sys_fcntl(struct lwp *l, const struct sys_fcntl_args *uap, register_t *retval)
553 {
554 /* {
555 syscallarg(int) fd;
556 syscallarg(int) cmd;
557 syscallarg(void *) arg;
558 } */
559 struct filedesc *fdp;
560 struct file *fp;
561 struct proc *p;
562 int fd, i, tmp, error, cmd, newmin;
563 struct flock fl;
564
565 p = l->l_proc;
566 fd = SCARG(uap, fd);
567 cmd = SCARG(uap, cmd);
568 fdp = p->p_fd;
569 error = 0;
570
571 switch (cmd) {
572 case F_CLOSEM:
573 if (fd < 0)
574 return EBADF;
575 while (fdp->fd_lastfile >= fd)
576 fdrelease(l, fdp->fd_lastfile);
577 return 0;
578
579 case F_MAXFD:
580 *retval = fdp->fd_lastfile;
581 return 0;
582
583 case F_SETLKW:
584 case F_SETLK:
585 case F_GETLK:
586 error = copyin(SCARG(uap, arg), &fl, sizeof(fl));
587 if (error)
588 return error;
589 error = do_fcntl_lock(l, fd, cmd, &fl);
590 if (cmd == F_GETLK && error == 0)
591 error = copyout(&fl, SCARG(uap, arg), sizeof(fl));
592 return error;
593
594 default:
595 /* Handled below */
596 break;
597 }
598
599 restart:
600 if ((fp = fd_getfile(fdp, fd)) == NULL)
601 return (EBADF);
602
603 FILE_USE(fp);
604
605 if ((cmd & F_FSCTL)) {
606 error = fcntl_forfs(fd, l, cmd, SCARG(uap, arg));
607 goto out;
608 }
609
610 switch (cmd) {
611
612 case F_DUPFD:
613 newmin = (long)SCARG(uap, arg);
614 if ((u_int)newmin >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
615 (u_int)newmin >= maxfiles) {
616 error = EINVAL;
617 goto out;
618 }
619 if ((error = fdalloc(p, newmin, &i)) != 0) {
620 if (error == ENOSPC) {
621 fdexpand(p);
622 FILE_UNUSE(fp, l);
623 goto restart;
624 }
625 goto out;
626 }
627
628 /* finishdup() will unuse the descriptors for us */
629 return (finishdup(l, fd, i, retval));
630
631 case F_GETFD:
632 *retval = fdp->fd_ofileflags[fd] & UF_EXCLOSE ? 1 : 0;
633 break;
634
635 case F_SETFD:
636 if ((long)SCARG(uap, arg) & 1)
637 fdp->fd_ofileflags[fd] |= UF_EXCLOSE;
638 else
639 fdp->fd_ofileflags[fd] &= ~UF_EXCLOSE;
640 break;
641
642 case F_GETFL:
643 *retval = OFLAGS(fp->f_flag);
644 break;
645
646 case F_SETFL:
647 tmp = FFLAGS((long)SCARG(uap, arg)) & FCNTLFLAGS;
648 error = (*fp->f_ops->fo_fcntl)(fp, F_SETFL, &tmp, l);
649 if (error)
650 break;
651 i = tmp ^ fp->f_flag;
652 if (i & FNONBLOCK) {
653 int flgs = tmp & FNONBLOCK;
654 error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, &flgs, l);
655 if (error)
656 goto reset_fcntl;
657 }
658 if (i & FASYNC) {
659 int flgs = tmp & FASYNC;
660 error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, &flgs, l);
661 if (error) {
662 if (i & FNONBLOCK) {
663 tmp = fp->f_flag & FNONBLOCK;
664 (void)(*fp->f_ops->fo_ioctl)(fp,
665 FIONBIO, &tmp, l);
666 }
667 goto reset_fcntl;
668 }
669 }
670 fp->f_flag = (fp->f_flag & ~FCNTLFLAGS) | tmp;
671 break;
672 reset_fcntl:
673 (void)(*fp->f_ops->fo_fcntl)(fp, F_SETFL, &fp->f_flag, l);
674 break;
675
676 case F_GETOWN:
677 error = (*fp->f_ops->fo_ioctl)(fp, FIOGETOWN, &tmp, l);
678 *retval = tmp;
679 break;
680
681 case F_SETOWN:
682 tmp = (int)(intptr_t) SCARG(uap, arg);
683 error = (*fp->f_ops->fo_ioctl)(fp, FIOSETOWN, &tmp, l);
684 break;
685
686 default:
687 error = EINVAL;
688 }
689
690 out:
691 FILE_UNUSE(fp, l);
692 return (error);
693 }
694
695 void
696 fdremove(struct filedesc *fdp, int fd)
697 {
698
699 rw_enter(&fdp->fd_lock, RW_WRITER);
700 fdp->fd_ofiles[fd] = NULL;
701 fd_unused(fdp, fd);
702 rw_exit(&fdp->fd_lock);
703 }
704
705 int
706 fdrelease(struct lwp *l, int fd)
707 {
708 struct proc *p = l->l_proc;
709 struct filedesc *fdp;
710 struct file **fpp, *fp;
711
712 fdp = p->p_fd;
713 rw_enter(&fdp->fd_lock, RW_WRITER);
714 if (fd < 0 || fd > fdp->fd_lastfile)
715 goto badf;
716 fpp = &fdp->fd_ofiles[fd];
717 fp = *fpp;
718 if (fp == NULL)
719 goto badf;
720
721 FILE_LOCK(fp);
722 if (!FILE_IS_USABLE(fp)) {
723 FILE_UNLOCK(fp);
724 goto badf;
725 }
726
727 FILE_USE(fp);
728
729 *fpp = NULL;
730 fdp->fd_ofileflags[fd] = 0;
731 fd_unused(fdp, fd);
732 rw_exit(&fdp->fd_lock);
733 if (fd < fdp->fd_knlistsize)
734 knote_fdclose(l, fd);
735 return (closef(fp, l));
736
737 badf:
738 rw_exit(&fdp->fd_lock);
739 return (EBADF);
740 }
741
742 /*
743 * Close a file descriptor.
744 */
745 /* ARGSUSED */
746 int
747 sys_close(struct lwp *l, const struct sys_close_args *uap, register_t *retval)
748 {
749 /* {
750 syscallarg(int) fd;
751 } */
752 int fd;
753 struct filedesc *fdp;
754 struct proc *p;
755
756 p = l->l_proc;
757 fd = SCARG(uap, fd);
758 fdp = p->p_fd;
759
760 #if 0
761 if (fd_getfile(fdp, fd) == NULL)
762 return (EBADF);
763 #endif
764
765 return (fdrelease(l, fd));
766 }
767
768 /*
769 * Return status information about a file descriptor.
770 * Common function for compat code.
771 */
772 int
773 do_sys_fstat(struct lwp *l, int fd, struct stat *sb)
774 {
775 struct file *fp;
776 int error;
777
778 fp = fd_getfile(l->l_proc->p_fd, fd);
779 if (fp == NULL)
780 return EBADF;
781
782 FILE_USE(fp);
783 error = (*fp->f_ops->fo_stat)(fp, sb, l);
784 FILE_UNUSE(fp, l);
785
786 return error;
787 }
788
789 /*
790 * Return status information about a file descriptor.
791 */
792 /* ARGSUSED */
793 int
794 sys___fstat30(struct lwp *l, const struct sys___fstat30_args *uap, register_t *retval)
795 {
796 /* {
797 syscallarg(int) fd;
798 syscallarg(struct stat *) sb;
799 } */
800 struct stat sb;
801 int error;
802
803 error = do_sys_fstat(l, SCARG(uap, fd), &sb);
804
805 if (error == 0)
806 error = copyout(&sb, SCARG(uap, sb), sizeof(sb));
807
808 return (error);
809 }
810
811 /*
812 * Return pathconf information about a file descriptor.
813 */
814 /* ARGSUSED */
815 int
816 sys_fpathconf(struct lwp *l, const struct sys_fpathconf_args *uap, register_t *retval)
817 {
818 /* {
819 syscallarg(int) fd;
820 syscallarg(int) name;
821 } */
822 int fd;
823 struct filedesc *fdp;
824 struct file *fp;
825 struct proc *p;
826 struct vnode *vp;
827 int error;
828
829 p = l->l_proc;
830 fd = SCARG(uap, fd);
831 fdp = p->p_fd;
832 error = 0;
833
834 if ((fp = fd_getfile(fdp, fd)) == NULL)
835 return (EBADF);
836
837 FILE_USE(fp);
838
839 switch (fp->f_type) {
840
841 case DTYPE_SOCKET:
842 case DTYPE_PIPE:
843 if (SCARG(uap, name) != _PC_PIPE_BUF)
844 error = EINVAL;
845 else
846 *retval = PIPE_BUF;
847 break;
848
849 case DTYPE_VNODE:
850 vp = (struct vnode *)fp->f_data;
851 error = VOP_PATHCONF(vp, SCARG(uap, name), retval);
852 break;
853
854 case DTYPE_KQUEUE:
855 error = EINVAL;
856 break;
857
858 default:
859 error = EOPNOTSUPP;
860 break;
861 }
862
863 FILE_UNUSE(fp, l);
864 return (error);
865 }
866
867 /*
868 * Allocate a file descriptor for the process.
869 */
870 int fdexpanded; /* XXX: what else uses this? */
871
872 int
873 fdalloc(struct proc *p, int want, int *result)
874 {
875 struct filedesc *fdp;
876 int i, lim, last, error;
877 u_int off, new;
878
879 fdp = p->p_fd;
880 rw_enter(&fdp->fd_lock, RW_WRITER);
881
882 /*
883 * Search for a free descriptor starting at the higher
884 * of want or fd_freefile. If that fails, consider
885 * expanding the ofile array.
886 */
887 lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles);
888 last = min(fdp->fd_nfiles, lim);
889 again:
890 if ((i = want) < fdp->fd_freefile)
891 i = fdp->fd_freefile;
892 off = i >> NDENTRYSHIFT;
893 new = find_next_zero(fdp->fd_himap, off,
894 (last + NDENTRIES - 1) >> NDENTRYSHIFT);
895 if (new != -1) {
896 i = find_next_zero(&fdp->fd_lomap[new],
897 new > off ? 0 : i & NDENTRYMASK, NDENTRIES);
898 if (i == -1) {
899 /*
900 * free file descriptor in this block was
901 * below want, try again with higher want.
902 */
903 want = (new + 1) << NDENTRYSHIFT;
904 goto again;
905 }
906 i += (new << NDENTRYSHIFT);
907 if (i < last) {
908 if (fdp->fd_ofiles[i] == NULL) {
909 fd_used(fdp, i);
910 if (want <= fdp->fd_freefile)
911 fdp->fd_freefile = i;
912 *result = i;
913 error = 0;
914 goto out;
915 }
916 }
917 }
918
919 /* No space in current array. Expand or let the caller do it. */
920 error = (fdp->fd_nfiles >= lim) ? EMFILE : ENOSPC;
921
922 out:
923 rw_exit(&fdp->fd_lock);
924 return (error);
925 }
926
927 void
928 fdexpand(struct proc *p)
929 {
930 struct filedesc *fdp;
931 int i, numfiles, oldnfiles;
932 struct file **newofile;
933 char *newofileflags;
934 uint32_t *newhimap = NULL, *newlomap = NULL;
935
936 fdp = p->p_fd;
937
938 restart:
939 oldnfiles = fdp->fd_nfiles;
940
941 if (oldnfiles < NDEXTENT)
942 numfiles = NDEXTENT;
943 else
944 numfiles = 2 * oldnfiles;
945
946 newofile = malloc(numfiles * OFILESIZE, M_FILEDESC, M_WAITOK);
947 if (NDHISLOTS(numfiles) > NDHISLOTS(oldnfiles)) {
948 newhimap = malloc(NDHISLOTS(numfiles) * sizeof(uint32_t),
949 M_FILEDESC, M_WAITOK);
950 newlomap = malloc(NDLOSLOTS(numfiles) * sizeof(uint32_t),
951 M_FILEDESC, M_WAITOK);
952 }
953
954 rw_enter(&fdp->fd_lock, RW_WRITER);
955 /* lock fdp */
956 if (fdp->fd_nfiles != oldnfiles) {
957 /* fdp changed; retry */
958 rw_exit(&fdp->fd_lock);
959 free(newofile, M_FILEDESC);
960 if (newhimap != NULL) free(newhimap, M_FILEDESC);
961 if (newlomap != NULL) free(newlomap, M_FILEDESC);
962 goto restart;
963 }
964
965 newofileflags = (char *) &newofile[numfiles];
966 /*
967 * Copy the existing ofile and ofileflags arrays
968 * and zero the new portion of each array.
969 */
970 memcpy(newofile, fdp->fd_ofiles,
971 (i = sizeof(struct file *) * fdp->fd_nfiles));
972 memset((char *)newofile + i, 0,
973 numfiles * sizeof(struct file *) - i);
974 memcpy(newofileflags, fdp->fd_ofileflags,
975 (i = sizeof(char) * fdp->fd_nfiles));
976 memset(newofileflags + i, 0, numfiles * sizeof(char) - i);
977 if (oldnfiles > NDFILE)
978 free(fdp->fd_ofiles, M_FILEDESC);
979
980 if (NDHISLOTS(numfiles) > NDHISLOTS(oldnfiles)) {
981 memcpy(newhimap, fdp->fd_himap,
982 (i = NDHISLOTS(oldnfiles) * sizeof(uint32_t)));
983 memset((char *)newhimap + i, 0,
984 NDHISLOTS(numfiles) * sizeof(uint32_t) - i);
985
986 memcpy(newlomap, fdp->fd_lomap,
987 (i = NDLOSLOTS(oldnfiles) * sizeof(uint32_t)));
988 memset((char *)newlomap + i, 0,
989 NDLOSLOTS(numfiles) * sizeof(uint32_t) - i);
990
991 if (NDHISLOTS(oldnfiles) > NDHISLOTS(NDFILE)) {
992 free(fdp->fd_himap, M_FILEDESC);
993 free(fdp->fd_lomap, M_FILEDESC);
994 }
995 fdp->fd_himap = newhimap;
996 fdp->fd_lomap = newlomap;
997 }
998
999 fdp->fd_ofiles = newofile;
1000 fdp->fd_ofileflags = newofileflags;
1001 fdp->fd_nfiles = numfiles;
1002
1003 rw_exit(&fdp->fd_lock);
1004
1005 fdexpanded++;
1006 }
1007
1008 /*
1009 * Create a new open file structure and allocate
1010 * a file descriptor for the process that refers to it.
1011 */
1012 int
1013 falloc(struct lwp *l, struct file **resultfp, int *resultfd)
1014 {
1015 struct filedesc *fdp;
1016 struct file *fp;
1017 struct proc *p;
1018 int error, i;
1019
1020 p = l->l_proc;
1021 fdp = p->p_fd;
1022
1023 restart:
1024 if ((error = fdalloc(p, 0, &i)) != 0) {
1025 if (error == ENOSPC) {
1026 fdexpand(p);
1027 goto restart;
1028 }
1029 return (error);
1030 }
1031
1032 fp = pool_cache_get(file_cache, PR_WAITOK);
1033
1034 if (atomic_inc_uint_nv(&nfiles) >= maxfiles) {
1035 atomic_dec_uint(&nfiles);
1036 tablefull("file", "increase kern.maxfiles or MAXFILES");
1037 rw_enter(&fdp->fd_lock, RW_WRITER);
1038 fd_unused(fdp, i);
1039 rw_exit(&fdp->fd_lock);
1040 pool_cache_put(file_cache, fp);
1041 return (ENFILE);
1042 }
1043
1044 fp->f_advice = 0;
1045 fp->f_msgcount = 0;
1046 fp->f_offset = 0;
1047
1048 /*
1049 * Allocate a new file descriptor.
1050 * If the process has file descriptor zero open, add to the list
1051 * of open files at that point, otherwise put it at the front of
1052 * the list of open files.
1053 */
1054 fp->f_iflags = FIF_LARVAL;
1055 fp->f_cred = l->l_cred;
1056 kauth_cred_hold(fp->f_cred);
1057
1058 FILE_LOCK(fp);
1059 fp->f_count = 1;
1060 FILE_UNLOCK(fp);
1061
1062 rw_enter(&fdp->fd_lock, RW_WRITER); /* XXXAD check order */
1063 KDASSERT(fdp->fd_ofiles[i] == NULL);
1064 fdp->fd_ofiles[i] = fp;
1065 rw_exit(&fdp->fd_lock);
1066
1067 if (resultfp) {
1068 fp->f_usecount = 1;
1069 *resultfp = fp;
1070 }
1071 if (resultfd)
1072 *resultfd = i;
1073
1074 return (0);
1075 }
1076
1077 /*
1078 * Free a file descriptor.
1079 */
1080 void
1081 ffree(struct file *fp)
1082 {
1083
1084 KASSERT(fp->f_usecount == 0);
1085
1086 atomic_dec_uint(&nfiles);
1087 kauth_cred_free(fp->f_cred);
1088 pool_cache_put(file_cache, fp);
1089 }
1090
1091 /*
1092 * Create an initial cwdinfo structure, using the same current and root
1093 * directories as p.
1094 */
1095 struct cwdinfo *
1096 cwdinit(struct proc *p)
1097 {
1098 struct cwdinfo *cwdi;
1099 struct cwdinfo *copy;
1100
1101 cwdi = pool_cache_get(cwdi_cache, PR_WAITOK);
1102 copy = p->p_cwdi;
1103
1104 rw_enter(©->cwdi_lock, RW_READER);
1105 cwdi->cwdi_cdir = p->p_cwdi->cwdi_cdir;
1106 if (cwdi->cwdi_cdir)
1107 VREF(cwdi->cwdi_cdir);
1108 cwdi->cwdi_rdir = p->p_cwdi->cwdi_rdir;
1109 if (cwdi->cwdi_rdir)
1110 VREF(cwdi->cwdi_rdir);
1111 cwdi->cwdi_edir = p->p_cwdi->cwdi_edir;
1112 if (cwdi->cwdi_edir)
1113 VREF(cwdi->cwdi_edir);
1114 cwdi->cwdi_cmask = p->p_cwdi->cwdi_cmask;
1115 cwdi->cwdi_refcnt = 1;
1116 rw_exit(©->cwdi_lock);
1117
1118 return (cwdi);
1119 }
1120
1121 static int
1122 cwdi_ctor(void *arg, void *obj, int flags)
1123 {
1124 struct cwdinfo *cwdi = obj;
1125
1126 rw_init(&cwdi->cwdi_lock);
1127
1128 return 0;
1129 }
1130
1131 static void
1132 cwdi_dtor(void *arg, void *obj)
1133 {
1134 struct cwdinfo *cwdi = obj;
1135
1136 rw_destroy(&cwdi->cwdi_lock);
1137 }
1138
1139 static int
1140 file_ctor(void *arg, void *obj, int flags)
1141 {
1142 struct file *fp = obj;
1143
1144 memset(fp, 0, sizeof(*fp));
1145 mutex_init(&fp->f_lock, MUTEX_DEFAULT, IPL_NONE);
1146 cv_init(&fp->f_cv, "closef");
1147
1148 mutex_enter(&filelist_lock);
1149 LIST_INSERT_HEAD(&filehead, fp, f_list);
1150 mutex_exit(&filelist_lock);
1151
1152 return 0;
1153 }
1154
1155 static void
1156 file_dtor(void *arg, void *obj)
1157 {
1158 struct file *fp = obj;
1159
1160 mutex_enter(&filelist_lock);
1161 LIST_REMOVE(fp, f_list);
1162 mutex_exit(&filelist_lock);
1163
1164 mutex_destroy(&fp->f_lock);
1165 cv_destroy(&fp->f_cv);
1166 }
1167
1168 struct file *
1169 fgetdummy(void)
1170 {
1171 struct file *fp;
1172
1173 fp = kmem_alloc(sizeof(*fp), KM_SLEEP);
1174 if (fp != NULL) {
1175 memset(fp, 0, sizeof(*fp));
1176 mutex_init(&fp->f_lock, MUTEX_DEFAULT, IPL_NONE);
1177 }
1178 return fp;
1179 }
1180
1181 void
1182 fputdummy(struct file *fp)
1183 {
1184
1185 mutex_destroy(&fp->f_lock);
1186 kmem_free(fp, sizeof(*fp));
1187 }
1188
1189 /*
1190 * Make p2 share p1's cwdinfo.
1191 */
1192 void
1193 cwdshare(struct proc *p1, struct proc *p2)
1194 {
1195 struct cwdinfo *cwdi = p1->p_cwdi;
1196
1197 atomic_inc_uint(&cwdi->cwdi_refcnt);
1198 p2->p_cwdi = cwdi;
1199 }
1200
1201 /*
1202 * Make this process not share its cwdinfo structure, maintaining
1203 * all cwdinfo state.
1204 */
1205 void
1206 cwdunshare(struct proc *p)
1207 {
1208 struct cwdinfo *oldcwdi, *newcwdi;
1209
1210 if (p->p_cwdi->cwdi_refcnt == 1)
1211 return;
1212
1213 newcwdi = cwdinit(p);
1214 oldcwdi = p->p_cwdi;
1215 p->p_cwdi = newcwdi;
1216 cwdfree(oldcwdi);
1217 }
1218
1219 /*
1220 * Release a cwdinfo structure.
1221 */
1222 void
1223 cwdfree(struct cwdinfo *cwdi)
1224 {
1225
1226 if (atomic_dec_uint_nv(&cwdi->cwdi_refcnt) > 0)
1227 return;
1228
1229 vrele(cwdi->cwdi_cdir);
1230 if (cwdi->cwdi_rdir)
1231 vrele(cwdi->cwdi_rdir);
1232 if (cwdi->cwdi_edir)
1233 vrele(cwdi->cwdi_edir);
1234 pool_cache_put(cwdi_cache, cwdi);
1235 }
1236
1237 /*
1238 * Create an initial filedesc structure, using the same current and root
1239 * directories as p.
1240 */
1241 struct filedesc *
1242 fdinit(struct proc *p)
1243 {
1244 struct filedesc0 *newfdp;
1245
1246 newfdp = pool_cache_get(filedesc0_cache, PR_WAITOK);
1247 memset(newfdp, 0, sizeof(struct filedesc0));
1248
1249 fdinit1(newfdp);
1250
1251 return (&newfdp->fd_fd);
1252 }
1253
1254 /*
1255 * Initialize a file descriptor table.
1256 */
1257 void
1258 fdinit1(struct filedesc0 *newfdp)
1259 {
1260
1261 newfdp->fd_fd.fd_refcnt = 1;
1262 newfdp->fd_fd.fd_ofiles = newfdp->fd_dfiles;
1263 newfdp->fd_fd.fd_ofileflags = newfdp->fd_dfileflags;
1264 newfdp->fd_fd.fd_nfiles = NDFILE;
1265 newfdp->fd_fd.fd_knlistsize = -1;
1266 newfdp->fd_fd.fd_himap = newfdp->fd_dhimap;
1267 newfdp->fd_fd.fd_lomap = newfdp->fd_dlomap;
1268 newfdp->fd_fd.fd_lastfile = -1;
1269 rw_init(&newfdp->fd_fd.fd_lock);
1270 }
1271
1272 /*
1273 * Make p2 share p1's filedesc structure.
1274 */
1275 void
1276 fdshare(struct proc *p1, struct proc *p2)
1277 {
1278 struct filedesc *fdp = p1->p_fd;
1279
1280 p2->p_fd = fdp;
1281 atomic_inc_uint(&fdp->fd_refcnt);
1282 }
1283
1284 /*
1285 * Make this process not share its filedesc structure, maintaining
1286 * all file descriptor state.
1287 */
1288 void
1289 fdunshare(struct lwp *l)
1290 {
1291 struct proc *p = l->l_proc;
1292 struct filedesc *newfd;
1293
1294 if (p->p_fd->fd_refcnt == 1)
1295 return;
1296
1297 newfd = fdcopy(p);
1298 fdfree(l);
1299 p->p_fd = newfd;
1300 }
1301
1302 /*
1303 * Clear a process's fd table.
1304 */
1305 void
1306 fdclear(struct lwp *l)
1307 {
1308 struct proc *p = l->l_proc;
1309 struct filedesc *newfd;
1310
1311 newfd = fdinit(p);
1312 fdfree(l);
1313 p->p_fd = newfd;
1314 }
1315
1316 /*
1317 * Copy a filedesc structure.
1318 */
1319 struct filedesc *
1320 fdcopy(struct proc *p)
1321 {
1322 struct filedesc *newfdp, *fdp;
1323 struct file **fpp, **nfpp;
1324 int i, numfiles, lastfile;
1325
1326 fdp = p->p_fd;
1327 newfdp = pool_cache_get(filedesc0_cache, PR_WAITOK);
1328 newfdp->fd_refcnt = 1;
1329 rw_init(&newfdp->fd_lock);
1330
1331 restart:
1332 numfiles = fdp->fd_nfiles;
1333 lastfile = fdp->fd_lastfile;
1334
1335 /*
1336 * If the number of open files fits in the internal arrays
1337 * of the open file structure, use them, otherwise allocate
1338 * additional memory for the number of descriptors currently
1339 * in use.
1340 */
1341 if (lastfile < NDFILE) {
1342 i = NDFILE;
1343 } else {
1344 /*
1345 * Compute the smallest multiple of NDEXTENT needed
1346 * for the file descriptors currently in use,
1347 * allowing the table to shrink.
1348 */
1349 i = numfiles;
1350 while (i >= 2 * NDEXTENT && i > lastfile * 2)
1351 i /= 2;
1352 newfdp->fd_ofiles = malloc(i * OFILESIZE, M_FILEDESC, M_WAITOK);
1353 }
1354 if (NDHISLOTS(i) > NDHISLOTS(NDFILE)) {
1355 newfdp->fd_himap = malloc(NDHISLOTS(i) * sizeof(uint32_t),
1356 M_FILEDESC, M_WAITOK);
1357 newfdp->fd_lomap = malloc(NDLOSLOTS(i) * sizeof(uint32_t),
1358 M_FILEDESC, M_WAITOK);
1359 }
1360
1361 rw_enter(&fdp->fd_lock, RW_READER);
1362 if (numfiles != fdp->fd_nfiles || lastfile != fdp->fd_lastfile) {
1363 rw_exit(&fdp->fd_lock);
1364 if (i > NDFILE)
1365 free(newfdp->fd_ofiles, M_FILEDESC);
1366 if (NDHISLOTS(i) > NDHISLOTS(NDFILE)) {
1367 free(newfdp->fd_himap, M_FILEDESC);
1368 free(newfdp->fd_lomap, M_FILEDESC);
1369 }
1370 goto restart;
1371 }
1372
1373 if (lastfile < NDFILE) {
1374 newfdp->fd_ofiles = ((struct filedesc0 *) newfdp)->fd_dfiles;
1375 newfdp->fd_ofileflags =
1376 ((struct filedesc0 *) newfdp)->fd_dfileflags;
1377 } else {
1378 newfdp->fd_ofileflags = (char *) &newfdp->fd_ofiles[i];
1379 }
1380 if (NDHISLOTS(i) <= NDHISLOTS(NDFILE)) {
1381 newfdp->fd_himap =
1382 ((struct filedesc0 *) newfdp)->fd_dhimap;
1383 newfdp->fd_lomap =
1384 ((struct filedesc0 *) newfdp)->fd_dlomap;
1385 }
1386
1387 newfdp->fd_nfiles = i;
1388 newfdp->fd_lastfile = lastfile;
1389 newfdp->fd_freefile = fdp->fd_freefile;
1390
1391 /* Clear the entries that will not be copied over.
1392 * Avoid calling memset with 0 size (i.e. when
1393 * lastfile == i-1 */
1394 if (lastfile < (i-1))
1395 memset(newfdp->fd_ofiles + lastfile + 1, 0,
1396 (i - lastfile - 1) * sizeof(struct file **));
1397 memcpy(newfdp->fd_ofileflags, fdp->fd_ofileflags, i * sizeof(char));
1398 if (i < NDENTRIES * NDENTRIES)
1399 i = NDENTRIES * NDENTRIES; /* size of inlined bitmaps */
1400 memcpy(newfdp->fd_himap, fdp->fd_himap, NDHISLOTS(i)*sizeof(uint32_t));
1401 memcpy(newfdp->fd_lomap, fdp->fd_lomap, NDLOSLOTS(i)*sizeof(uint32_t));
1402
1403 fpp = fdp->fd_ofiles;
1404 nfpp = newfdp->fd_ofiles;
1405 for (i = 0; i <= lastfile; i++, fpp++, nfpp++) {
1406 if ((*nfpp = *fpp) == NULL)
1407 continue;
1408
1409 if ((*fpp)->f_type == DTYPE_KQUEUE)
1410 /* kq descriptors cannot be copied. */
1411 fdremove(newfdp, i);
1412 else {
1413 FILE_LOCK(*fpp);
1414 (*fpp)->f_count++;
1415 FILE_UNLOCK(*fpp);
1416 }
1417 }
1418
1419 rw_exit(&fdp->fd_lock);
1420
1421 newfdp->fd_knlist = NULL;
1422 newfdp->fd_knlistsize = -1;
1423 newfdp->fd_knhash = NULL;
1424 newfdp->fd_knhashmask = 0;
1425
1426 return (newfdp);
1427 }
1428
1429 /*
1430 * Release a filedesc structure.
1431 */
1432 void
1433 fdfree(struct lwp *l)
1434 {
1435 struct proc *p = l->l_proc;
1436 struct filedesc *fdp;
1437 struct file **fpp, *fp;
1438 int i;
1439
1440 fdp = p->p_fd;
1441 if (atomic_dec_uint_nv(&fdp->fd_refcnt) > 0)
1442 return;
1443
1444 rw_destroy(&fdp->fd_lock);
1445 fpp = fdp->fd_ofiles;
1446 for (i = fdp->fd_lastfile; i >= 0; i--, fpp++) {
1447 fp = *fpp;
1448 if (fp != NULL) {
1449 *fpp = NULL;
1450 FILE_LOCK(fp);
1451 FILE_USE(fp);
1452 if ((fdp->fd_lastfile - i) < fdp->fd_knlistsize)
1453 knote_fdclose(l, fdp->fd_lastfile - i);
1454 (void) closef(fp, l);
1455 }
1456 }
1457 p->p_fd = NULL;
1458 if (fdp->fd_nfiles > NDFILE)
1459 free(fdp->fd_ofiles, M_FILEDESC);
1460 if (NDHISLOTS(fdp->fd_nfiles) > NDHISLOTS(NDFILE)) {
1461 free(fdp->fd_himap, M_FILEDESC);
1462 free(fdp->fd_lomap, M_FILEDESC);
1463 }
1464 if (fdp->fd_knlist)
1465 free(fdp->fd_knlist, M_KEVENT);
1466 if (fdp->fd_knhash)
1467 hashdone(fdp->fd_knhash, M_KEVENT);
1468 pool_cache_put(filedesc0_cache, fdp);
1469 }
1470
1471 /*
1472 * Internal form of close.
1473 * Decrement reference count on file structure.
1474 * Note: p may be NULL when closing a file
1475 * that was being passed in a message.
1476 *
1477 * Note: we expect the caller is holding a usecount, and expects us
1478 * to drop it (the caller thinks the file is going away forever).
1479 */
1480 int
1481 closef(struct file *fp, struct lwp *l)
1482 {
1483 struct proc *p = l ? l->l_proc : NULL;
1484 struct vnode *vp;
1485 struct flock lf;
1486 int error;
1487
1488 if (fp == NULL)
1489 return (0);
1490
1491 /*
1492 * POSIX record locking dictates that any close releases ALL
1493 * locks owned by this process. This is handled by setting
1494 * a flag in the unlock to free ONLY locks obeying POSIX
1495 * semantics, and not to free BSD-style file locks.
1496 * If the descriptor was in a message, POSIX-style locks
1497 * aren't passed with the descriptor.
1498 */
1499 if (p && (p->p_flag & PK_ADVLOCK) && fp->f_type == DTYPE_VNODE) {
1500 lf.l_whence = SEEK_SET;
1501 lf.l_start = 0;
1502 lf.l_len = 0;
1503 lf.l_type = F_UNLCK;
1504 vp = (struct vnode *)fp->f_data;
1505 (void) VOP_ADVLOCK(vp, p, F_UNLCK, &lf, F_POSIX);
1506 }
1507
1508 /*
1509 * If WANTCLOSE is set, then the reference count on the file
1510 * is 0, but there were multiple users of the file. This can
1511 * happen if a filedesc structure is shared by multiple
1512 * processes.
1513 */
1514 FILE_LOCK(fp);
1515 if (fp->f_iflags & FIF_WANTCLOSE) {
1516 /*
1517 * Another user of the file is already closing, and is
1518 * simply waiting for other users of the file to drain.
1519 * Release our usecount, and wake up the closer if it
1520 * is the only remaining use.
1521 */
1522 #ifdef DIAGNOSTIC
1523 if (fp->f_count != 0)
1524 panic("closef: wantclose and count != 0");
1525 if (fp->f_usecount < 2)
1526 panic("closef: wantclose and usecount < 2");
1527 #endif
1528 if (--fp->f_usecount == 1)
1529 cv_broadcast(&fp->f_cv);
1530 FILE_UNLOCK(fp);
1531 return (0);
1532 } else {
1533 /*
1534 * Decrement the reference count. If we were not the
1535 * last reference, then release our use and just
1536 * return.
1537 */
1538 if (--fp->f_count > 0) {
1539 #ifdef DIAGNOSTIC
1540 if (fp->f_usecount < 1)
1541 panic("closef: no wantclose and usecount < 1");
1542 #endif
1543 fp->f_usecount--;
1544 FILE_UNLOCK(fp);
1545 return (0);
1546 }
1547 }
1548
1549 /*
1550 * The reference count is now 0. However, there may be
1551 * multiple potential users of this file. This can happen
1552 * if multiple processes shared a single filedesc structure.
1553 *
1554 * Notify these potential users that the file is closing.
1555 * This will prevent them from adding additional uses to
1556 * the file.
1557 */
1558 fp->f_iflags |= FIF_WANTCLOSE;
1559
1560 /*
1561 * We expect the caller to add a use to the file. So, if we
1562 * are the last user, usecount will be 1. If it is not, we
1563 * must wait for the usecount to drain. When it drains back
1564 * to 1, we will be awakened so that we may proceed with the
1565 * close.
1566 */
1567 #ifdef DIAGNOSTIC
1568 if (fp->f_usecount < 1)
1569 panic("closef: usecount < 1");
1570 #endif
1571 while (fp->f_usecount > 1)
1572 cv_wait(&fp->f_cv, &fp->f_lock);
1573 #ifdef DIAGNOSTIC
1574 if (fp->f_usecount != 1)
1575 panic("closef: usecount != 1");
1576 #endif
1577
1578 FILE_UNLOCK(fp);
1579 if ((fp->f_flag & FHASLOCK) && fp->f_type == DTYPE_VNODE) {
1580 lf.l_whence = SEEK_SET;
1581 lf.l_start = 0;
1582 lf.l_len = 0;
1583 lf.l_type = F_UNLCK;
1584 vp = (struct vnode *)fp->f_data;
1585 (void) VOP_ADVLOCK(vp, fp, F_UNLCK, &lf, F_FLOCK);
1586 }
1587 if (fp->f_ops)
1588 error = (*fp->f_ops->fo_close)(fp, l);
1589 else
1590 error = 0;
1591
1592 /* Nothing references the file now, drop the final use (us). */
1593 fp->f_usecount--;
1594
1595 ffree(fp);
1596 return (error);
1597 }
1598
1599 /*
1600 * Apply an advisory lock on a file descriptor.
1601 *
1602 * Just attempt to get a record lock of the requested type on
1603 * the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0).
1604 */
1605 /* ARGSUSED */
1606 int
1607 sys_flock(struct lwp *l, const struct sys_flock_args *uap, register_t *retval)
1608 {
1609 /* {
1610 syscallarg(int) fd;
1611 syscallarg(int) how;
1612 } */
1613 int fd, how, error;
1614 struct proc *p;
1615 struct filedesc *fdp;
1616 struct file *fp;
1617 struct vnode *vp;
1618 struct flock lf;
1619
1620 p = l->l_proc;
1621 fd = SCARG(uap, fd);
1622 how = SCARG(uap, how);
1623 fdp = p->p_fd;
1624 error = 0;
1625
1626 if ((fp = fd_getfile(fdp, fd)) == NULL)
1627 return (EBADF);
1628
1629 FILE_USE(fp);
1630
1631 if (fp->f_type != DTYPE_VNODE) {
1632 error = EOPNOTSUPP;
1633 goto out;
1634 }
1635
1636 vp = (struct vnode *)fp->f_data;
1637 lf.l_whence = SEEK_SET;
1638 lf.l_start = 0;
1639 lf.l_len = 0;
1640 if (how & LOCK_UN) {
1641 lf.l_type = F_UNLCK;
1642 fp->f_flag &= ~FHASLOCK;
1643 error = VOP_ADVLOCK(vp, fp, F_UNLCK, &lf, F_FLOCK);
1644 goto out;
1645 }
1646 if (how & LOCK_EX)
1647 lf.l_type = F_WRLCK;
1648 else if (how & LOCK_SH)
1649 lf.l_type = F_RDLCK;
1650 else {
1651 error = EINVAL;
1652 goto out;
1653 }
1654 fp->f_flag |= FHASLOCK;
1655 if (how & LOCK_NB)
1656 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, F_FLOCK);
1657 else
1658 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf,
1659 F_FLOCK|F_WAIT);
1660 out:
1661 FILE_UNUSE(fp, l);
1662 return (error);
1663 }
1664
1665 int
1666 do_posix_fadvise(struct lwp *l, int fd, off_t offset, off_t len, int advice,
1667 register_t *retval)
1668 {
1669 struct proc *p = l->l_proc;
1670 struct file *fp;
1671 int error = 0;
1672
1673 fp = fd_getfile(p->p_fd, fd);
1674 if (fp == NULL) {
1675 error = EBADF;
1676 goto out;
1677 }
1678 FILE_USE(fp);
1679
1680 if (fp->f_type != DTYPE_VNODE) {
1681 if (fp->f_type == DTYPE_PIPE || fp->f_type == DTYPE_SOCKET) {
1682 error = ESPIPE;
1683 } else {
1684 error = EOPNOTSUPP;
1685 }
1686 goto out;
1687 }
1688
1689 switch (advice) {
1690 case POSIX_FADV_NORMAL:
1691 case POSIX_FADV_RANDOM:
1692 case POSIX_FADV_SEQUENTIAL:
1693 KASSERT(POSIX_FADV_NORMAL == UVM_ADV_NORMAL);
1694 KASSERT(POSIX_FADV_RANDOM == UVM_ADV_RANDOM);
1695 KASSERT(POSIX_FADV_SEQUENTIAL == UVM_ADV_SEQUENTIAL);
1696
1697 /*
1698 * we ignore offset and size.
1699 */
1700
1701 fp->f_advice = advice;
1702 break;
1703
1704 case POSIX_FADV_WILLNEED:
1705 case POSIX_FADV_DONTNEED:
1706 case POSIX_FADV_NOREUSE:
1707
1708 /*
1709 * not implemented yet.
1710 */
1711
1712 break;
1713 default:
1714 error = EINVAL;
1715 break;
1716 }
1717 out:
1718 if (fp != NULL) {
1719 FILE_UNUSE(fp, l);
1720 }
1721 *retval = error;
1722 return 0;
1723 }
1724
1725 /* ARGSUSED */
1726 int
1727 sys___posix_fadvise50(struct lwp *l,
1728 const struct sys___posix_fadvise50_args *uap, register_t *retval)
1729 {
1730 /* {
1731 syscallarg(int) fd;
1732 syscallarg(int) pad;
1733 syscallarg(off_t) offset;
1734 syscallarg(off_t) len;
1735 syscallarg(int) advice;
1736 } */
1737
1738 return do_posix_fadvise(l, SCARG(uap, fd), SCARG(uap, offset),
1739 SCARG(uap, len), SCARG(uap, advice), retval);
1740 }
1741
1742 /*
1743 * File Descriptor pseudo-device driver (/dev/fd/).
1744 *
1745 * Opening minor device N dup()s the file (if any) connected to file
1746 * descriptor N belonging to the calling process. Note that this driver
1747 * consists of only the ``open()'' routine, because all subsequent
1748 * references to this file will be direct to the other driver.
1749 */
1750 /* ARGSUSED */
1751 static int
1752 filedescopen(dev_t dev, int mode, int type, struct lwp *l)
1753 {
1754
1755 /*
1756 * XXX Kludge: set dupfd to contain the value of the
1757 * the file descriptor being sought for duplication. The error
1758 * return ensures that the vnode for this device will be released
1759 * by vn_open. Open will detect this special error and take the
1760 * actions in dupfdopen below. Other callers of vn_open or VOP_OPEN
1761 * will simply report the error.
1762 */
1763 l->l_dupfd = minor(dev); /* XXX */
1764 return EDUPFD;
1765 }
1766
1767 const struct cdevsw filedesc_cdevsw = {
1768 filedescopen, noclose, noread, nowrite, noioctl,
1769 nostop, notty, nopoll, nommap, nokqfilter, D_OTHER,
1770 };
1771
1772 /*
1773 * Duplicate the specified descriptor to a free descriptor.
1774 *
1775 * 'indx' has been fdalloc'ed (and will be fdremove'ed on error) by the caller.
1776 */
1777 int
1778 dupfdopen(struct lwp *l, int indx, int dfd, int mode, int error)
1779 {
1780 struct proc *p = l->l_proc;
1781 struct filedesc *fdp;
1782 struct file *wfp;
1783
1784 fdp = p->p_fd;
1785
1786 /* should be cleared by the caller */
1787 KASSERT(fdp->fd_ofiles[indx] == NULL);
1788
1789 /*
1790 * If the to-be-dup'd fd number is greater than the allowed number
1791 * of file descriptors, or the fd to be dup'd has already been
1792 * closed, reject.
1793 */
1794
1795 /*
1796 * Note, in the case of indx == dfd, fd_getfile below returns NULL.
1797 */
1798 if ((wfp = fd_getfile(fdp, dfd)) == NULL)
1799 return (EBADF);
1800
1801 FILE_USE(wfp);
1802
1803 /*
1804 * There are two cases of interest here.
1805 *
1806 * For EDUPFD simply dup (dfd) to file descriptor
1807 * (indx) and return.
1808 *
1809 * For EMOVEFD steal away the file structure from (dfd) and
1810 * store it in (indx). (dfd) is effectively closed by
1811 * this operation.
1812 *
1813 * Any other error code is just returned.
1814 */
1815 switch (error) {
1816 case EDUPFD:
1817 /*
1818 * Check that the mode the file is being opened for is a
1819 * subset of the mode of the existing descriptor.
1820 */
1821 if (((mode & (FREAD|FWRITE)) | wfp->f_flag) != wfp->f_flag) {
1822 FILE_UNUSE(wfp, l);
1823 return (EACCES);
1824 }
1825 rw_enter(&fdp->fd_lock, RW_WRITER);
1826 fdp->fd_ofiles[indx] = wfp;
1827 fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
1828 rw_exit(&fdp->fd_lock);
1829 FILE_LOCK(wfp);
1830 wfp->f_count++;
1831 /* 'indx' has been fd_used'ed by caller */
1832 FILE_UNUSE_HAVELOCK(wfp, l);
1833 return (0);
1834
1835 case EMOVEFD:
1836 /*
1837 * Steal away the file pointer from dfd, and stuff it into indx.
1838 */
1839 rw_enter(&fdp->fd_lock, RW_WRITER);
1840 fdp->fd_ofiles[indx] = fdp->fd_ofiles[dfd];
1841 fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
1842 fdp->fd_ofiles[dfd] = NULL;
1843 fdp->fd_ofileflags[dfd] = 0;
1844 /*
1845 * Complete the clean up of the filedesc structure by
1846 * recomputing the various hints.
1847 */
1848 /* 'indx' has been fd_used'ed by caller */
1849 fd_unused(fdp, dfd);
1850 rw_exit(&fdp->fd_lock);
1851 FILE_UNUSE(wfp, l);
1852 return (0);
1853
1854 default:
1855 FILE_UNUSE(wfp, l);
1856 return (error);
1857 }
1858 /* NOTREACHED */
1859 }
1860
1861 /*
1862 * Close any files on exec?
1863 */
1864 void
1865 fdcloseexec(struct lwp *l)
1866 {
1867 struct proc *p = l->l_proc;
1868 struct filedesc *fdp;
1869 int fd;
1870
1871 fdunshare(l);
1872 cwdunshare(p);
1873
1874 if (p->p_cwdi->cwdi_edir)
1875 vrele(p->p_cwdi->cwdi_edir);
1876
1877 fdp = p->p_fd;
1878 for (fd = 0; fd <= fdp->fd_lastfile; fd++)
1879 if (fdp->fd_ofileflags[fd] & UF_EXCLOSE)
1880 (void) fdrelease(l, fd);
1881 }
1882
1883 /*
1884 * It is unsafe for set[ug]id processes to be started with file
1885 * descriptors 0..2 closed, as these descriptors are given implicit
1886 * significance in the Standard C library. fdcheckstd() will create a
1887 * descriptor referencing /dev/null for each of stdin, stdout, and
1888 * stderr that is not already open.
1889 */
1890 #define CHECK_UPTO 3
1891 int
1892 fdcheckstd(struct lwp *l)
1893 {
1894 struct proc *p;
1895 struct nameidata nd;
1896 struct filedesc *fdp;
1897 struct file *fp;
1898 struct file *devnullfp = NULL; /* Quell compiler warning */
1899 struct proc *pp;
1900 register_t retval;
1901 int fd, i, error, flags = FREAD|FWRITE, devnull = -1;
1902 char closed[CHECK_UPTO * 3 + 1], which[3 + 1];
1903
1904 p = l->l_proc;
1905 closed[0] = '\0';
1906 if ((fdp = p->p_fd) == NULL)
1907 return (0);
1908 for (i = 0; i < CHECK_UPTO; i++) {
1909 if (fdp->fd_ofiles[i] != NULL)
1910 continue;
1911 snprintf(which, sizeof(which), ",%d", i);
1912 strlcat(closed, which, sizeof(closed));
1913 if (devnullfp == NULL) {
1914 if ((error = falloc(l, &fp, &fd)) != 0)
1915 return (error);
1916 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, "/dev/null");
1917 if ((error = vn_open(&nd, flags, 0)) != 0) {
1918 FILE_UNUSE(fp, l);
1919 ffree(fp);
1920 fdremove(p->p_fd, fd);
1921 return (error);
1922 }
1923 fp->f_data = nd.ni_vp;
1924 fp->f_flag = flags;
1925 fp->f_ops = &vnops;
1926 fp->f_type = DTYPE_VNODE;
1927 VOP_UNLOCK(nd.ni_vp, 0);
1928 devnull = fd;
1929 devnullfp = fp;
1930 FILE_SET_MATURE(fp);
1931 } else {
1932 restart:
1933 if ((error = fdalloc(p, 0, &fd)) != 0) {
1934 if (error == ENOSPC) {
1935 fdexpand(p);
1936 goto restart;
1937 }
1938 return (error);
1939 }
1940
1941 FILE_LOCK(devnullfp);
1942 FILE_USE(devnullfp);
1943 /* finishdup() will unuse the descriptors for us */
1944 if ((error = finishdup(l, devnull, fd, &retval)) != 0)
1945 return (error);
1946 }
1947 }
1948 if (devnullfp)
1949 FILE_UNUSE(devnullfp, l);
1950 if (closed[0] != '\0') {
1951 mutex_enter(&proclist_lock);
1952 pp = p->p_pptr;
1953 mutex_enter(&pp->p_mutex);
1954 log(LOG_WARNING, "set{u,g}id pid %d (%s) "
1955 "was invoked by uid %d ppid %d (%s) "
1956 "with fd %s closed\n",
1957 p->p_pid, p->p_comm, kauth_cred_geteuid(pp->p_cred),
1958 pp->p_pid, pp->p_comm, &closed[1]);
1959 mutex_exit(&pp->p_mutex);
1960 mutex_exit(&proclist_lock);
1961 }
1962 return (0);
1963 }
1964 #undef CHECK_UPTO
1965
1966 /*
1967 * Sets descriptor owner. If the owner is a process, 'pgid'
1968 * is set to positive value, process ID. If the owner is process group,
1969 * 'pgid' is set to -pg_id.
1970 */
1971 int
1972 fsetown(struct proc *p, pid_t *pgid, int cmd, const void *data)
1973 {
1974 int id = *(const int *)data;
1975 int error;
1976
1977 switch (cmd) {
1978 case TIOCSPGRP:
1979 if (id < 0)
1980 return (EINVAL);
1981 id = -id;
1982 break;
1983 default:
1984 break;
1985 }
1986
1987 if (id > 0 && !pfind(id))
1988 return (ESRCH);
1989 else if (id < 0 && (error = pgid_in_session(p, -id)))
1990 return (error);
1991
1992 *pgid = id;
1993 return (0);
1994 }
1995
1996 /*
1997 * Return descriptor owner information. If the value is positive,
1998 * it's process ID. If it's negative, it's process group ID and
1999 * needs the sign removed before use.
2000 */
2001 int
2002 fgetown(struct proc *p, pid_t pgid, int cmd, void *data)
2003 {
2004 switch (cmd) {
2005 case TIOCGPGRP:
2006 *(int *)data = -pgid;
2007 break;
2008 default:
2009 *(int *)data = pgid;
2010 break;
2011 }
2012 return (0);
2013 }
2014
2015 /*
2016 * Send signal to descriptor owner, either process or process group.
2017 */
2018 void
2019 fownsignal(pid_t pgid, int signo, int code, int band, void *fdescdata)
2020 {
2021 struct proc *p1;
2022 struct pgrp *pgrp;
2023 ksiginfo_t ksi;
2024
2025 KSI_INIT(&ksi);
2026 ksi.ksi_signo = signo;
2027 ksi.ksi_code = code;
2028 ksi.ksi_band = band;
2029
2030 /*
2031 * Since we may be called from an interrupt context, we must use
2032 * the proclist_mutex.
2033 */
2034 mutex_enter(&proclist_mutex);
2035 if (pgid > 0 && (p1 = p_find(pgid, PFIND_LOCKED)))
2036 kpsignal(p1, &ksi, fdescdata);
2037 else if (pgid < 0 && (pgrp = pg_find(-pgid, PFIND_LOCKED)))
2038 kpgsignal(pgrp, &ksi, fdescdata, 0);
2039 mutex_exit(&proclist_mutex);
2040 }
2041
2042 int
2043 fdclone(struct lwp *l, struct file *fp, int fd, int flag,
2044 const struct fileops *fops, void *data)
2045 {
2046 fp->f_flag = flag;
2047 fp->f_type = DTYPE_MISC;
2048 fp->f_ops = fops;
2049 fp->f_data = data;
2050
2051 l->l_dupfd = fd;
2052
2053 FILE_SET_MATURE(fp);
2054 FILE_UNUSE(fp, l);
2055 return EMOVEFD;
2056 }
2057
2058 /* ARGSUSED */
2059 int
2060 fnullop_fcntl(struct file *fp, u_int cmd, void *data, struct lwp *l)
2061 {
2062
2063 if (cmd == F_SETFL)
2064 return 0;
2065
2066 return EOPNOTSUPP;
2067 }
2068
2069 /* ARGSUSED */
2070 int
2071 fnullop_poll(struct file *fp, int which, struct lwp *l)
2072 {
2073
2074 return 0;
2075 }
2076
2077
2078 /* ARGSUSED */
2079 int
2080 fnullop_kqfilter(struct file *fp, struct knote *kn)
2081 {
2082
2083 return 0;
2084 }
2085
2086 /* ARGSUSED */
2087 int
2088 fbadop_read(struct file *fp, off_t *offset, struct uio *uio,
2089 kauth_cred_t cred, int flags)
2090 {
2091
2092 return EOPNOTSUPP;
2093 }
2094
2095 /* ARGSUSED */
2096 int
2097 fbadop_write(struct file *fp, off_t *offset, struct uio *uio,
2098 kauth_cred_t cred, int flags)
2099 {
2100
2101 return EOPNOTSUPP;
2102 }
2103
2104 /* ARGSUSED */
2105 int
2106 fbadop_ioctl(struct file *fp, u_long com, void *data, struct lwp *l)
2107 {
2108
2109 return EOPNOTSUPP;
2110 }
2111
2112 /* ARGSUSED */
2113 int
2114 fbadop_stat(struct file *fp, struct stat *sb, struct lwp *l)
2115 {
2116
2117 return EOPNOTSUPP;
2118 }
2119
2120 /* ARGSUSED */
2121 int
2122 fbadop_close(struct file *fp, struct lwp *l)
2123 {
2124
2125 return EOPNOTSUPP;
2126 }
2127