vfs_mount.c revision 1.32 1 /* $NetBSD: vfs_mount.c,v 1.32 2015/01/08 12:06:50 hannken Exp $ */
2
3 /*-
4 * Copyright (c) 1997-2011 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9 * NASA Ames Research Center, by Charles M. Hannum, and by Andrew Doran.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 /*
34 * Copyright (c) 1989, 1993
35 * The Regents of the University of California. All rights reserved.
36 * (c) UNIX System Laboratories, Inc.
37 * All or some portions of this file are derived from material licensed
38 * to the University of California by American Telephone and Telegraph
39 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
40 * the permission of UNIX System Laboratories, Inc.
41 *
42 * Redistribution and use in source and binary forms, with or without
43 * modification, are permitted provided that the following conditions
44 * are met:
45 * 1. Redistributions of source code must retain the above copyright
46 * notice, this list of conditions and the following disclaimer.
47 * 2. Redistributions in binary form must reproduce the above copyright
48 * notice, this list of conditions and the following disclaimer in the
49 * documentation and/or other materials provided with the distribution.
50 * 3. Neither the name of the University nor the names of its contributors
51 * may be used to endorse or promote products derived from this software
52 * without specific prior written permission.
53 *
54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64 * SUCH DAMAGE.
65 *
66 * @(#)vfs_subr.c 8.13 (Berkeley) 4/18/94
67 */
68
69 #include <sys/cdefs.h>
70 __KERNEL_RCSID(0, "$NetBSD: vfs_mount.c,v 1.32 2015/01/08 12:06:50 hannken Exp $");
71
72 #define _VFS_VNODE_PRIVATE
73
74 #include <sys/param.h>
75 #include <sys/kernel.h>
76
77 #include <sys/atomic.h>
78 #include <sys/buf.h>
79 #include <sys/conf.h>
80 #include <sys/fcntl.h>
81 #include <sys/filedesc.h>
82 #include <sys/device.h>
83 #include <sys/kauth.h>
84 #include <sys/kmem.h>
85 #include <sys/module.h>
86 #include <sys/mount.h>
87 #include <sys/namei.h>
88 #include <sys/extattr.h>
89 #include <sys/syscallargs.h>
90 #include <sys/sysctl.h>
91 #include <sys/systm.h>
92 #include <sys/vfs_syscalls.h>
93 #include <sys/vnode.h>
94
95 #include <miscfs/genfs/genfs.h>
96 #include <miscfs/syncfs/syncfs.h>
97 #include <miscfs/specfs/specdev.h>
98
99 /* Root filesystem and device. */
100 vnode_t * rootvnode;
101 device_t root_device;
102
103 /* Mounted filesystem list. */
104 struct mntlist mountlist;
105 kmutex_t mountlist_lock;
106
107 kmutex_t mntvnode_lock;
108 kmutex_t vfs_list_lock;
109
110 static specificdata_domain_t mount_specificdata_domain;
111 static kmutex_t mntid_lock;
112
113 static kmutex_t mountgen_lock;
114 static uint64_t mountgen;
115
116 void
117 vfs_mount_sysinit(void)
118 {
119
120 TAILQ_INIT(&mountlist);
121 mutex_init(&mountlist_lock, MUTEX_DEFAULT, IPL_NONE);
122 mutex_init(&mntvnode_lock, MUTEX_DEFAULT, IPL_NONE);
123 mutex_init(&vfs_list_lock, MUTEX_DEFAULT, IPL_NONE);
124
125 mount_specificdata_domain = specificdata_domain_create();
126 mutex_init(&mntid_lock, MUTEX_DEFAULT, IPL_NONE);
127 mutex_init(&mountgen_lock, MUTEX_DEFAULT, IPL_NONE);
128 mountgen = 0;
129 }
130
131 struct mount *
132 vfs_mountalloc(struct vfsops *vfsops, vnode_t *vp)
133 {
134 struct mount *mp;
135 int error __diagused;
136
137 mp = kmem_zalloc(sizeof(*mp), KM_SLEEP);
138 if (mp == NULL)
139 return NULL;
140
141 mp->mnt_op = vfsops;
142 mp->mnt_refcnt = 1;
143 TAILQ_INIT(&mp->mnt_vnodelist);
144 mutex_init(&mp->mnt_unmounting, MUTEX_DEFAULT, IPL_NONE);
145 mutex_init(&mp->mnt_renamelock, MUTEX_DEFAULT, IPL_NONE);
146 mutex_init(&mp->mnt_updating, MUTEX_DEFAULT, IPL_NONE);
147 error = vfs_busy(mp, NULL);
148 KASSERT(error == 0);
149 mp->mnt_vnodecovered = vp;
150 mount_initspecific(mp);
151
152 mutex_enter(&mountgen_lock);
153 mp->mnt_gen = mountgen++;
154 mutex_exit(&mountgen_lock);
155
156 return mp;
157 }
158
159 /*
160 * vfs_rootmountalloc: lookup a filesystem type, and if found allocate and
161 * initialize a mount structure for it.
162 *
163 * Devname is usually updated by mount(8) after booting.
164 */
165 int
166 vfs_rootmountalloc(const char *fstypename, const char *devname,
167 struct mount **mpp)
168 {
169 struct vfsops *vfsp = NULL;
170 struct mount *mp;
171
172 mutex_enter(&vfs_list_lock);
173 LIST_FOREACH(vfsp, &vfs_list, vfs_list)
174 if (!strncmp(vfsp->vfs_name, fstypename,
175 sizeof(mp->mnt_stat.f_fstypename)))
176 break;
177 if (vfsp == NULL) {
178 mutex_exit(&vfs_list_lock);
179 return (ENODEV);
180 }
181 vfsp->vfs_refcount++;
182 mutex_exit(&vfs_list_lock);
183
184 if ((mp = vfs_mountalloc(vfsp, NULL)) == NULL)
185 return ENOMEM;
186 mp->mnt_flag = MNT_RDONLY;
187 (void)strlcpy(mp->mnt_stat.f_fstypename, vfsp->vfs_name,
188 sizeof(mp->mnt_stat.f_fstypename));
189 mp->mnt_stat.f_mntonname[0] = '/';
190 mp->mnt_stat.f_mntonname[1] = '\0';
191 mp->mnt_stat.f_mntfromname[sizeof(mp->mnt_stat.f_mntfromname) - 1] =
192 '\0';
193 (void)copystr(devname, mp->mnt_stat.f_mntfromname,
194 sizeof(mp->mnt_stat.f_mntfromname) - 1, 0);
195 *mpp = mp;
196 return 0;
197 }
198
199 /*
200 * vfs_getnewfsid: get a new unique fsid.
201 */
202 void
203 vfs_getnewfsid(struct mount *mp)
204 {
205 static u_short xxxfs_mntid;
206 fsid_t tfsid;
207 int mtype;
208
209 mutex_enter(&mntid_lock);
210 mtype = makefstype(mp->mnt_op->vfs_name);
211 mp->mnt_stat.f_fsidx.__fsid_val[0] = makedev(mtype, 0);
212 mp->mnt_stat.f_fsidx.__fsid_val[1] = mtype;
213 mp->mnt_stat.f_fsid = mp->mnt_stat.f_fsidx.__fsid_val[0];
214 if (xxxfs_mntid == 0)
215 ++xxxfs_mntid;
216 tfsid.__fsid_val[0] = makedev(mtype & 0xff, xxxfs_mntid);
217 tfsid.__fsid_val[1] = mtype;
218 if (!TAILQ_EMPTY(&mountlist)) {
219 while (vfs_getvfs(&tfsid)) {
220 tfsid.__fsid_val[0]++;
221 xxxfs_mntid++;
222 }
223 }
224 mp->mnt_stat.f_fsidx.__fsid_val[0] = tfsid.__fsid_val[0];
225 mp->mnt_stat.f_fsid = mp->mnt_stat.f_fsidx.__fsid_val[0];
226 mutex_exit(&mntid_lock);
227 }
228
229 /*
230 * Lookup a mount point by filesystem identifier.
231 *
232 * XXX Needs to add a reference to the mount point.
233 */
234 struct mount *
235 vfs_getvfs(fsid_t *fsid)
236 {
237 struct mount *mp;
238
239 mutex_enter(&mountlist_lock);
240 TAILQ_FOREACH(mp, &mountlist, mnt_list) {
241 if (mp->mnt_stat.f_fsidx.__fsid_val[0] == fsid->__fsid_val[0] &&
242 mp->mnt_stat.f_fsidx.__fsid_val[1] == fsid->__fsid_val[1]) {
243 mutex_exit(&mountlist_lock);
244 return (mp);
245 }
246 }
247 mutex_exit(&mountlist_lock);
248 return NULL;
249 }
250
251 /*
252 * Drop a reference to a mount structure, freeing if the last reference.
253 */
254 void
255 vfs_destroy(struct mount *mp)
256 {
257
258 if (__predict_true((int)atomic_dec_uint_nv(&mp->mnt_refcnt) > 0)) {
259 return;
260 }
261
262 /*
263 * Nothing else has visibility of the mount: we can now
264 * free the data structures.
265 */
266 KASSERT(mp->mnt_refcnt == 0);
267 specificdata_fini(mount_specificdata_domain, &mp->mnt_specdataref);
268 mutex_destroy(&mp->mnt_unmounting);
269 mutex_destroy(&mp->mnt_updating);
270 mutex_destroy(&mp->mnt_renamelock);
271 if (mp->mnt_op != NULL) {
272 vfs_delref(mp->mnt_op);
273 }
274 kmem_free(mp, sizeof(*mp));
275 }
276
277 /*
278 * Mark a mount point as busy, and gain a new reference to it. Used to
279 * prevent the file system from being unmounted during critical sections.
280 *
281 * vfs_busy can be called multiple times and by multiple threads
282 * and must be accompanied by the same number of vfs_unbusy calls.
283 *
284 * => The caller must hold a pre-existing reference to the mount.
285 * => Will fail if the file system is being unmounted, or is unmounted.
286 */
287 int
288 vfs_busy(struct mount *mp, struct mount **nextp)
289 {
290
291 KASSERT(mp->mnt_refcnt > 0);
292
293 mutex_enter(&mp->mnt_unmounting);
294 if (__predict_false((mp->mnt_iflag & IMNT_GONE) != 0)) {
295 mutex_exit(&mp->mnt_unmounting);
296 if (nextp != NULL) {
297 KASSERT(mutex_owned(&mountlist_lock));
298 *nextp = TAILQ_NEXT(mp, mnt_list);
299 }
300 return ENOENT;
301 }
302 ++mp->mnt_busynest;
303 KASSERT(mp->mnt_busynest != 0);
304 mutex_exit(&mp->mnt_unmounting);
305 if (nextp != NULL) {
306 mutex_exit(&mountlist_lock);
307 }
308 atomic_inc_uint(&mp->mnt_refcnt);
309 return 0;
310 }
311
312 /*
313 * Unbusy a busy filesystem.
314 *
315 * Every successful vfs_busy() call must be undone by a vfs_unbusy() call.
316 *
317 * => If keepref is true, preserve reference added by vfs_busy().
318 * => If nextp != NULL, acquire mountlist_lock.
319 */
320 void
321 vfs_unbusy(struct mount *mp, bool keepref, struct mount **nextp)
322 {
323
324 KASSERT(mp->mnt_refcnt > 0);
325
326 if (nextp != NULL) {
327 mutex_enter(&mountlist_lock);
328 }
329 mutex_enter(&mp->mnt_unmounting);
330 KASSERT(mp->mnt_busynest != 0);
331 mp->mnt_busynest--;
332 mutex_exit(&mp->mnt_unmounting);
333 if (!keepref) {
334 vfs_destroy(mp);
335 }
336 if (nextp != NULL) {
337 KASSERT(mutex_owned(&mountlist_lock));
338 *nextp = TAILQ_NEXT(mp, mnt_list);
339 }
340 }
341
342 struct vnode_iterator {
343 struct vnode vi_vnode;
344 };
345
346 void
347 vfs_vnode_iterator_init(struct mount *mp, struct vnode_iterator **vip)
348 {
349 struct vnode *vp;
350
351 vp = vnalloc(mp);
352
353 mutex_enter(&mntvnode_lock);
354 TAILQ_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes);
355 vp->v_usecount = 1;
356 mutex_exit(&mntvnode_lock);
357
358 *vip = (struct vnode_iterator *)vp;
359 }
360
361 void
362 vfs_vnode_iterator_destroy(struct vnode_iterator *vi)
363 {
364 struct vnode *mvp = &vi->vi_vnode;
365
366 mutex_enter(&mntvnode_lock);
367 KASSERT(ISSET(mvp->v_iflag, VI_MARKER));
368 if (mvp->v_usecount != 0) {
369 TAILQ_REMOVE(&mvp->v_mount->mnt_vnodelist, mvp, v_mntvnodes);
370 mvp->v_usecount = 0;
371 }
372 mutex_exit(&mntvnode_lock);
373 vnfree(mvp);
374 }
375
376 struct vnode *
377 vfs_vnode_iterator_next(struct vnode_iterator *vi,
378 bool (*f)(void *, struct vnode *), void *cl)
379 {
380 struct vnode *mvp = &vi->vi_vnode;
381 struct mount *mp = mvp->v_mount;
382 struct vnode *vp;
383 int error;
384
385 KASSERT(ISSET(mvp->v_iflag, VI_MARKER));
386
387 do {
388 mutex_enter(&mntvnode_lock);
389 vp = TAILQ_NEXT(mvp, v_mntvnodes);
390 TAILQ_REMOVE(&mp->mnt_vnodelist, mvp, v_mntvnodes);
391 mvp->v_usecount = 0;
392 again:
393 if (vp == NULL) {
394 mutex_exit(&mntvnode_lock);
395 return NULL;
396 }
397 mutex_enter(vp->v_interlock);
398 if (ISSET(vp->v_iflag, VI_MARKER) ||
399 (f && !ISSET(vp->v_iflag, VI_XLOCK) && !(*f)(cl, vp))) {
400 mutex_exit(vp->v_interlock);
401 vp = TAILQ_NEXT(vp, v_mntvnodes);
402 goto again;
403 }
404
405 TAILQ_INSERT_AFTER(&mp->mnt_vnodelist, vp, mvp, v_mntvnodes);
406 mvp->v_usecount = 1;
407 mutex_exit(&mntvnode_lock);
408 error = vget(vp, 0);
409 KASSERT(error == 0 || error == ENOENT);
410 } while (error != 0);
411
412 return vp;
413 }
414
415 /*
416 * Move a vnode from one mount queue to another.
417 */
418 void
419 vfs_insmntque(vnode_t *vp, struct mount *mp)
420 {
421 struct mount *omp;
422
423 KASSERT(mp == NULL || (mp->mnt_iflag & IMNT_UNMOUNT) == 0 ||
424 vp->v_tag == VT_VFS);
425
426 mutex_enter(&mntvnode_lock);
427 /*
428 * Delete from old mount point vnode list, if on one.
429 */
430 if ((omp = vp->v_mount) != NULL)
431 TAILQ_REMOVE(&vp->v_mount->mnt_vnodelist, vp, v_mntvnodes);
432 /*
433 * Insert into list of vnodes for the new mount point, if
434 * available. The caller must take a reference on the mount
435 * structure and donate to the vnode.
436 */
437 if ((vp->v_mount = mp) != NULL)
438 TAILQ_INSERT_TAIL(&mp->mnt_vnodelist, vp, v_mntvnodes);
439 mutex_exit(&mntvnode_lock);
440
441 if (omp != NULL) {
442 /* Release reference to old mount. */
443 vfs_destroy(omp);
444 }
445 }
446
447 /*
448 * Remove any vnodes in the vnode table belonging to mount point mp.
449 *
450 * If FORCECLOSE is not specified, there should not be any active ones,
451 * return error if any are found (nb: this is a user error, not a
452 * system error). If FORCECLOSE is specified, detach any active vnodes
453 * that are found.
454 *
455 * If WRITECLOSE is set, only flush out regular file vnodes open for
456 * writing.
457 *
458 * SKIPSYSTEM causes any vnodes marked VV_SYSTEM to be skipped.
459 */
460 #ifdef DEBUG
461 int busyprt = 0; /* print out busy vnodes */
462 struct ctldebug debug1 = { "busyprt", &busyprt };
463 #endif
464
465 struct vflush_ctx {
466 const struct vnode *skipvp;
467 int flags;
468 };
469
470 static bool
471 vflush_selector(void *cl, struct vnode *vp)
472 {
473 struct vflush_ctx *c = cl;
474 /*
475 * Skip over a selected vnode.
476 */
477 if (vp == c->skipvp)
478 return false;
479 /*
480 * Skip over a vnodes marked VSYSTEM.
481 */
482 if ((c->flags & SKIPSYSTEM) && (vp->v_vflag & VV_SYSTEM))
483 return false;
484
485 /*
486 * If WRITECLOSE is set, only flush out regular file
487 * vnodes open for writing.
488 */
489 if ((c->flags & WRITECLOSE) && vp->v_type == VREG) {
490 if (vp->v_writecount == 0)
491 return false;
492 }
493 return true;
494 }
495
496 static vnode_t *
497 vflushnext(struct vnode_iterator *marker, void *ctx, int *when)
498 {
499 if (hardclock_ticks > *when) {
500 yield();
501 *when = hardclock_ticks + hz / 10;
502 }
503 return vfs_vnode_iterator_next(marker, vflush_selector, ctx);
504 }
505
506
507 int
508 vflush(struct mount *mp, vnode_t *skipvp, int flags)
509 {
510 vnode_t *vp;
511 struct vnode_iterator *marker;
512 int busy = 0, when = 0;
513 struct vflush_ctx ctx;
514
515 /* First, flush out any vnode references from vrele_list. */
516 vrele_flush();
517
518 vfs_vnode_iterator_init(mp, &marker);
519
520 ctx.skipvp = skipvp;
521 ctx.flags = flags;
522 while ((vp = vflushnext(marker, &ctx, &when)) != NULL) {
523 /*
524 * First try to recycle the vnode.
525 */
526 if (vrecycle(vp))
527 continue;
528 /*
529 * If FORCECLOSE is set, forcibly close the vnode.
530 */
531 if (flags & FORCECLOSE) {
532 vgone(vp);
533 continue;
534 }
535 #ifdef DEBUG
536 if (busyprt)
537 vprint("vflush: busy vnode", vp);
538 #endif
539 vrele(vp);
540 busy++;
541 }
542 vfs_vnode_iterator_destroy(marker);
543 if (busy)
544 return (EBUSY);
545 return (0);
546 }
547
548 /*
549 * Mount a file system.
550 */
551
552 /*
553 * Scan all active processes to see if any of them have a current or root
554 * directory onto which the new filesystem has just been mounted. If so,
555 * replace them with the new mount point.
556 */
557 static void
558 mount_checkdirs(vnode_t *olddp)
559 {
560 vnode_t *newdp, *rele1, *rele2;
561 struct cwdinfo *cwdi;
562 struct proc *p;
563 bool retry;
564
565 if (olddp->v_usecount == 1) {
566 return;
567 }
568 if (VFS_ROOT(olddp->v_mountedhere, &newdp))
569 panic("mount: lost mount");
570
571 do {
572 retry = false;
573 mutex_enter(proc_lock);
574 PROCLIST_FOREACH(p, &allproc) {
575 if ((cwdi = p->p_cwdi) == NULL)
576 continue;
577 /*
578 * Cannot change to the old directory any more,
579 * so even if we see a stale value it is not a
580 * problem.
581 */
582 if (cwdi->cwdi_cdir != olddp &&
583 cwdi->cwdi_rdir != olddp)
584 continue;
585 retry = true;
586 rele1 = NULL;
587 rele2 = NULL;
588 atomic_inc_uint(&cwdi->cwdi_refcnt);
589 mutex_exit(proc_lock);
590 rw_enter(&cwdi->cwdi_lock, RW_WRITER);
591 if (cwdi->cwdi_cdir == olddp) {
592 rele1 = cwdi->cwdi_cdir;
593 vref(newdp);
594 cwdi->cwdi_cdir = newdp;
595 }
596 if (cwdi->cwdi_rdir == olddp) {
597 rele2 = cwdi->cwdi_rdir;
598 vref(newdp);
599 cwdi->cwdi_rdir = newdp;
600 }
601 rw_exit(&cwdi->cwdi_lock);
602 cwdfree(cwdi);
603 if (rele1 != NULL)
604 vrele(rele1);
605 if (rele2 != NULL)
606 vrele(rele2);
607 mutex_enter(proc_lock);
608 break;
609 }
610 mutex_exit(proc_lock);
611 } while (retry);
612
613 if (rootvnode == olddp) {
614 vrele(rootvnode);
615 vref(newdp);
616 rootvnode = newdp;
617 }
618 vput(newdp);
619 }
620
621 /*
622 * Start extended attributes
623 */
624 static int
625 start_extattr(struct mount *mp)
626 {
627 int error;
628
629 error = VFS_EXTATTRCTL(mp, EXTATTR_CMD_START, NULL, 0, NULL);
630 if (error)
631 printf("%s: failed to start extattr: error = %d\n",
632 mp->mnt_stat.f_mntonname, error);
633
634 return error;
635 }
636
637 int
638 mount_domount(struct lwp *l, vnode_t **vpp, struct vfsops *vfsops,
639 const char *path, int flags, void *data, size_t *data_len)
640 {
641 vnode_t *vp = *vpp;
642 struct mount *mp;
643 struct pathbuf *pb;
644 struct nameidata nd;
645 int error;
646
647 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
648 KAUTH_REQ_SYSTEM_MOUNT_NEW, vp, KAUTH_ARG(flags), data);
649 if (error) {
650 vfs_delref(vfsops);
651 return error;
652 }
653
654 /* Cannot make a non-dir a mount-point (from here anyway). */
655 if (vp->v_type != VDIR) {
656 vfs_delref(vfsops);
657 return ENOTDIR;
658 }
659
660 if (flags & MNT_EXPORTED) {
661 vfs_delref(vfsops);
662 return EINVAL;
663 }
664
665 if ((mp = vfs_mountalloc(vfsops, vp)) == NULL) {
666 vfs_delref(vfsops);
667 return ENOMEM;
668 }
669
670 mp->mnt_stat.f_owner = kauth_cred_geteuid(l->l_cred);
671
672 /*
673 * The underlying file system may refuse the mount for
674 * various reasons. Allow the user to force it to happen.
675 *
676 * Set the mount level flags.
677 */
678 mp->mnt_flag = flags & (MNT_BASIC_FLAGS | MNT_FORCE | MNT_IGNORE);
679
680 mutex_enter(&mp->mnt_updating);
681 error = VFS_MOUNT(mp, path, data, data_len);
682 mp->mnt_flag &= ~MNT_OP_FLAGS;
683
684 if (error != 0)
685 goto err_unmounted;
686
687 /*
688 * Validate and prepare the mount point.
689 */
690 error = pathbuf_copyin(path, &pb);
691 if (error != 0) {
692 goto err_mounted;
693 }
694 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb);
695 error = namei(&nd);
696 pathbuf_destroy(pb);
697 if (error != 0) {
698 goto err_mounted;
699 }
700 if (nd.ni_vp != vp) {
701 vput(nd.ni_vp);
702 error = EINVAL;
703 goto err_mounted;
704 }
705 if (vp->v_mountedhere != NULL) {
706 vput(nd.ni_vp);
707 error = EBUSY;
708 goto err_mounted;
709 }
710 error = vinvalbuf(vp, V_SAVE, l->l_cred, l, 0, 0);
711 if (error != 0) {
712 vput(nd.ni_vp);
713 goto err_mounted;
714 }
715
716 /*
717 * Put the new filesystem on the mount list after root.
718 */
719 cache_purge(vp);
720 mp->mnt_iflag &= ~IMNT_WANTRDWR;
721
722 mutex_enter(&mountlist_lock);
723 TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
724 mutex_exit(&mountlist_lock);
725 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0)
726 error = vfs_allocate_syncvnode(mp);
727 if (error == 0)
728 vp->v_mountedhere = mp;
729 vput(nd.ni_vp);
730 if (error != 0)
731 goto err_onmountlist;
732
733 mount_checkdirs(vp);
734 mutex_exit(&mp->mnt_updating);
735
736 /* Hold an additional reference to the mount across VFS_START(). */
737 vfs_unbusy(mp, true, NULL);
738 (void) VFS_STATVFS(mp, &mp->mnt_stat);
739 error = VFS_START(mp, 0);
740 if (error) {
741 vrele(vp);
742 } else if (flags & MNT_EXTATTR) {
743 (void)start_extattr(mp);
744 }
745 /* Drop reference held for VFS_START(). */
746 vfs_destroy(mp);
747 *vpp = NULL;
748 return error;
749
750 err_onmountlist:
751 mutex_enter(&mountlist_lock);
752 TAILQ_REMOVE(&mountlist, mp, mnt_list);
753 mp->mnt_iflag |= IMNT_GONE;
754 mutex_exit(&mountlist_lock);
755
756 err_mounted:
757 if (VFS_UNMOUNT(mp, MNT_FORCE) != 0)
758 panic("Unmounting fresh file system failed");
759
760 err_unmounted:
761 vp->v_mountedhere = NULL;
762 mutex_exit(&mp->mnt_updating);
763 vfs_unbusy(mp, false, NULL);
764 vfs_destroy(mp);
765
766 return error;
767 }
768
769 /*
770 * Do the actual file system unmount. File system is assumed to have
771 * been locked by the caller.
772 *
773 * => Caller hold reference to the mount, explicitly for dounmount().
774 */
775 int
776 dounmount(struct mount *mp, int flags, struct lwp *l)
777 {
778 vnode_t *coveredvp;
779 int error, async, used_syncer, used_extattr;
780
781 #if NVERIEXEC > 0
782 error = veriexec_unmountchk(mp);
783 if (error)
784 return (error);
785 #endif /* NVERIEXEC > 0 */
786
787 /*
788 * XXX Freeze syncer. Must do this before locking the
789 * mount point. See dounmount() for details.
790 */
791 mutex_enter(&syncer_mutex);
792
793 /*
794 * Abort unmount attempt when the filesystem is in use
795 */
796 mutex_enter(&mp->mnt_unmounting);
797 if (mp->mnt_busynest != 0) {
798 mutex_exit(&mp->mnt_unmounting);
799 mutex_exit(&syncer_mutex);
800 return EBUSY;
801 }
802
803 /*
804 * Abort unmount attempt when the filesystem is not mounted
805 */
806 if ((mp->mnt_iflag & IMNT_GONE) != 0) {
807 mutex_exit(&mp->mnt_unmounting);
808 mutex_exit(&syncer_mutex);
809 return ENOENT;
810 }
811
812 used_syncer = (mp->mnt_syncer != NULL);
813 used_extattr = mp->mnt_flag & MNT_EXTATTR;
814
815 /*
816 * XXX Syncer must be frozen when we get here. This should really
817 * be done on a per-mountpoint basis, but the syncer doesn't work
818 * like that.
819 *
820 * The caller of dounmount() must acquire syncer_mutex because
821 * the syncer itself acquires locks in syncer_mutex -> vfs_busy
822 * order, and we must preserve that order to avoid deadlock.
823 *
824 * So, if the file system did not use the syncer, now is
825 * the time to release the syncer_mutex.
826 */
827 if (used_syncer == 0) {
828 mutex_exit(&syncer_mutex);
829 }
830 mp->mnt_iflag |= IMNT_UNMOUNT;
831 mutex_enter(&mp->mnt_updating);
832 async = mp->mnt_flag & MNT_ASYNC;
833 mp->mnt_flag &= ~MNT_ASYNC;
834 cache_purgevfs(mp); /* remove cache entries for this file sys */
835 if (mp->mnt_syncer != NULL)
836 vfs_deallocate_syncvnode(mp);
837 error = 0;
838 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
839 error = VFS_SYNC(mp, MNT_WAIT, l->l_cred);
840 }
841 if (error == 0 || (flags & MNT_FORCE)) {
842 error = VFS_UNMOUNT(mp, flags);
843 }
844 if (error) {
845 mp->mnt_iflag &= ~IMNT_UNMOUNT;
846 mutex_exit(&mp->mnt_unmounting);
847 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0)
848 (void) vfs_allocate_syncvnode(mp);
849 mp->mnt_flag |= async;
850 mutex_exit(&mp->mnt_updating);
851 if (used_syncer)
852 mutex_exit(&syncer_mutex);
853 if (used_extattr) {
854 if (start_extattr(mp) != 0)
855 mp->mnt_flag &= ~MNT_EXTATTR;
856 else
857 mp->mnt_flag |= MNT_EXTATTR;
858 }
859 return (error);
860 }
861 mutex_exit(&mp->mnt_updating);
862
863 /*
864 * release mnt_umounting lock here, because other code calls
865 * vfs_busy() while holding the mountlist_lock.
866 *
867 * mark filesystem as gone to prevent further umounts
868 * after mnt_umounting lock is gone, this also prevents
869 * vfs_busy() from succeeding.
870 */
871 mp->mnt_iflag |= IMNT_GONE;
872 mutex_exit(&mp->mnt_unmounting);
873
874 if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) {
875 vn_lock(coveredvp, LK_EXCLUSIVE | LK_RETRY);
876 coveredvp->v_mountedhere = NULL;
877 VOP_UNLOCK(coveredvp);
878 }
879 mutex_enter(&mountlist_lock);
880 TAILQ_REMOVE(&mountlist, mp, mnt_list);
881 mutex_exit(&mountlist_lock);
882 if (TAILQ_FIRST(&mp->mnt_vnodelist) != NULL)
883 panic("unmount: dangling vnode");
884 if (used_syncer)
885 mutex_exit(&syncer_mutex);
886 vfs_hooks_unmount(mp);
887
888 vfs_destroy(mp); /* reference from mount() */
889 if (coveredvp != NULLVP) {
890 vrele(coveredvp);
891 }
892 return (0);
893 }
894
895 /*
896 * Unmount all file systems.
897 * We traverse the list in reverse order under the assumption that doing so
898 * will avoid needing to worry about dependencies.
899 */
900 bool
901 vfs_unmountall(struct lwp *l)
902 {
903
904 printf("unmounting file systems...\n");
905 return vfs_unmountall1(l, true, true);
906 }
907
908 static void
909 vfs_unmount_print(struct mount *mp, const char *pfx)
910 {
911
912 aprint_verbose("%sunmounted %s on %s type %s\n", pfx,
913 mp->mnt_stat.f_mntfromname, mp->mnt_stat.f_mntonname,
914 mp->mnt_stat.f_fstypename);
915 }
916
917 bool
918 vfs_unmount_forceone(struct lwp *l)
919 {
920 struct mount *mp, *nmp;
921 int error;
922
923 nmp = NULL;
924
925 TAILQ_FOREACH_REVERSE(mp, &mountlist, mntlist, mnt_list) {
926 if (nmp == NULL || mp->mnt_gen > nmp->mnt_gen) {
927 nmp = mp;
928 }
929 }
930 if (nmp == NULL) {
931 return false;
932 }
933
934 #ifdef DEBUG
935 printf("forcefully unmounting %s (%s)...\n",
936 nmp->mnt_stat.f_mntonname, nmp->mnt_stat.f_mntfromname);
937 #endif
938 atomic_inc_uint(&nmp->mnt_refcnt);
939 if ((error = dounmount(nmp, MNT_FORCE, l)) == 0) {
940 vfs_unmount_print(nmp, "forcefully ");
941 return true;
942 } else {
943 vfs_destroy(nmp);
944 }
945
946 #ifdef DEBUG
947 printf("forceful unmount of %s failed with error %d\n",
948 nmp->mnt_stat.f_mntonname, error);
949 #endif
950
951 return false;
952 }
953
954 bool
955 vfs_unmountall1(struct lwp *l, bool force, bool verbose)
956 {
957 struct mount *mp, *nmp;
958 bool any_error = false, progress = false;
959 int error;
960
961 TAILQ_FOREACH_REVERSE_SAFE(mp, &mountlist, mntlist, mnt_list, nmp) {
962 #ifdef DEBUG
963 printf("unmounting %p %s (%s)...\n",
964 (void *)mp, mp->mnt_stat.f_mntonname,
965 mp->mnt_stat.f_mntfromname);
966 #endif
967 atomic_inc_uint(&mp->mnt_refcnt);
968 if ((error = dounmount(mp, force ? MNT_FORCE : 0, l)) == 0) {
969 vfs_unmount_print(mp, "");
970 progress = true;
971 } else {
972 vfs_destroy(mp);
973 if (verbose) {
974 printf("unmount of %s failed with error %d\n",
975 mp->mnt_stat.f_mntonname, error);
976 }
977 any_error = true;
978 }
979 }
980 if (verbose) {
981 printf("unmounting done\n");
982 }
983 if (any_error && verbose) {
984 printf("WARNING: some file systems would not unmount\n");
985 }
986 return progress;
987 }
988
989 void
990 vfs_sync_all(struct lwp *l)
991 {
992 printf("syncing disks... ");
993
994 /* remove user processes from run queue */
995 suspendsched();
996 (void)spl0();
997
998 /* avoid coming back this way again if we panic. */
999 doing_shutdown = 1;
1000
1001 do_sys_sync(l);
1002
1003 /* Wait for sync to finish. */
1004 if (buf_syncwait() != 0) {
1005 #if defined(DDB) && defined(DEBUG_HALT_BUSY)
1006 Debugger();
1007 #endif
1008 printf("giving up\n");
1009 return;
1010 } else
1011 printf("done\n");
1012 }
1013
1014 /*
1015 * Sync and unmount file systems before shutting down.
1016 */
1017 void
1018 vfs_shutdown(void)
1019 {
1020 lwp_t *l = curlwp;
1021
1022 vfs_sync_all(l);
1023
1024 /*
1025 * If we have paniced - do not make the situation potentially
1026 * worse by unmounting the file systems.
1027 */
1028 if (panicstr != NULL) {
1029 return;
1030 }
1031
1032 /* Unmount file systems. */
1033 vfs_unmountall(l);
1034 }
1035
1036 /*
1037 * Print a list of supported file system types (used by vfs_mountroot)
1038 */
1039 static void
1040 vfs_print_fstypes(void)
1041 {
1042 struct vfsops *v;
1043 int cnt = 0;
1044
1045 mutex_enter(&vfs_list_lock);
1046 LIST_FOREACH(v, &vfs_list, vfs_list)
1047 ++cnt;
1048 mutex_exit(&vfs_list_lock);
1049
1050 if (cnt == 0) {
1051 printf("WARNING: No file system modules have been loaded.\n");
1052 return;
1053 }
1054
1055 printf("Supported file systems:");
1056 mutex_enter(&vfs_list_lock);
1057 LIST_FOREACH(v, &vfs_list, vfs_list) {
1058 printf(" %s", v->vfs_name);
1059 }
1060 mutex_exit(&vfs_list_lock);
1061 printf("\n");
1062 }
1063
1064 /*
1065 * Mount the root file system. If the operator didn't specify a
1066 * file system to use, try all possible file systems until one
1067 * succeeds.
1068 */
1069 int
1070 vfs_mountroot(void)
1071 {
1072 struct vfsops *v;
1073 int error = ENODEV;
1074
1075 if (root_device == NULL)
1076 panic("vfs_mountroot: root device unknown");
1077
1078 switch (device_class(root_device)) {
1079 case DV_IFNET:
1080 if (rootdev != NODEV)
1081 panic("vfs_mountroot: rootdev set for DV_IFNET "
1082 "(0x%llx -> %llu,%llu)",
1083 (unsigned long long)rootdev,
1084 (unsigned long long)major(rootdev),
1085 (unsigned long long)minor(rootdev));
1086 break;
1087
1088 case DV_DISK:
1089 if (rootdev == NODEV)
1090 panic("vfs_mountroot: rootdev not set for DV_DISK");
1091 if (bdevvp(rootdev, &rootvp))
1092 panic("vfs_mountroot: can't get vnode for rootdev");
1093 error = VOP_OPEN(rootvp, FREAD, FSCRED);
1094 if (error) {
1095 printf("vfs_mountroot: can't open root device\n");
1096 return (error);
1097 }
1098 break;
1099
1100 case DV_VIRTUAL:
1101 break;
1102
1103 default:
1104 printf("%s: inappropriate for root file system\n",
1105 device_xname(root_device));
1106 return (ENODEV);
1107 }
1108
1109 /*
1110 * If user specified a root fs type, use it. Make sure the
1111 * specified type exists and has a mount_root()
1112 */
1113 if (strcmp(rootfstype, ROOT_FSTYPE_ANY) != 0) {
1114 v = vfs_getopsbyname(rootfstype);
1115 error = EFTYPE;
1116 if (v != NULL) {
1117 if (v->vfs_mountroot != NULL) {
1118 error = (v->vfs_mountroot)();
1119 }
1120 v->vfs_refcount--;
1121 }
1122 goto done;
1123 }
1124
1125 /*
1126 * Try each file system currently configured into the kernel.
1127 */
1128 mutex_enter(&vfs_list_lock);
1129 LIST_FOREACH(v, &vfs_list, vfs_list) {
1130 if (v->vfs_mountroot == NULL)
1131 continue;
1132 #ifdef DEBUG
1133 aprint_normal("mountroot: trying %s...\n", v->vfs_name);
1134 #endif
1135 v->vfs_refcount++;
1136 mutex_exit(&vfs_list_lock);
1137 error = (*v->vfs_mountroot)();
1138 mutex_enter(&vfs_list_lock);
1139 v->vfs_refcount--;
1140 if (!error) {
1141 aprint_normal("root file system type: %s\n",
1142 v->vfs_name);
1143 break;
1144 }
1145 }
1146 mutex_exit(&vfs_list_lock);
1147
1148 if (v == NULL) {
1149 vfs_print_fstypes();
1150 printf("no file system for %s", device_xname(root_device));
1151 if (device_class(root_device) == DV_DISK)
1152 printf(" (dev 0x%llx)", (unsigned long long)rootdev);
1153 printf("\n");
1154 error = EFTYPE;
1155 }
1156
1157 done:
1158 if (error && device_class(root_device) == DV_DISK) {
1159 VOP_CLOSE(rootvp, FREAD, FSCRED);
1160 vrele(rootvp);
1161 }
1162 if (error == 0) {
1163 struct mount *mp;
1164 extern struct cwdinfo cwdi0;
1165
1166 mp = TAILQ_FIRST(&mountlist);
1167 mp->mnt_flag |= MNT_ROOTFS;
1168 mp->mnt_op->vfs_refcount++;
1169
1170 /*
1171 * Get the vnode for '/'. Set cwdi0.cwdi_cdir to
1172 * reference it.
1173 */
1174 error = VFS_ROOT(mp, &rootvnode);
1175 if (error)
1176 panic("cannot find root vnode, error=%d", error);
1177 cwdi0.cwdi_cdir = rootvnode;
1178 vref(cwdi0.cwdi_cdir);
1179 VOP_UNLOCK(rootvnode);
1180 cwdi0.cwdi_rdir = NULL;
1181
1182 /*
1183 * Now that root is mounted, we can fixup initproc's CWD
1184 * info. All other processes are kthreads, which merely
1185 * share proc0's CWD info.
1186 */
1187 initproc->p_cwdi->cwdi_cdir = rootvnode;
1188 vref(initproc->p_cwdi->cwdi_cdir);
1189 initproc->p_cwdi->cwdi_rdir = NULL;
1190 /*
1191 * Enable loading of modules from the filesystem
1192 */
1193 module_load_vfs_init();
1194
1195 }
1196 return (error);
1197 }
1198
1199 /*
1200 * mount_specific_key_create --
1201 * Create a key for subsystem mount-specific data.
1202 */
1203 int
1204 mount_specific_key_create(specificdata_key_t *keyp, specificdata_dtor_t dtor)
1205 {
1206
1207 return specificdata_key_create(mount_specificdata_domain, keyp, dtor);
1208 }
1209
1210 /*
1211 * mount_specific_key_delete --
1212 * Delete a key for subsystem mount-specific data.
1213 */
1214 void
1215 mount_specific_key_delete(specificdata_key_t key)
1216 {
1217
1218 specificdata_key_delete(mount_specificdata_domain, key);
1219 }
1220
1221 /*
1222 * mount_initspecific --
1223 * Initialize a mount's specificdata container.
1224 */
1225 void
1226 mount_initspecific(struct mount *mp)
1227 {
1228 int error __diagused;
1229
1230 error = specificdata_init(mount_specificdata_domain,
1231 &mp->mnt_specdataref);
1232 KASSERT(error == 0);
1233 }
1234
1235 /*
1236 * mount_finispecific --
1237 * Finalize a mount's specificdata container.
1238 */
1239 void
1240 mount_finispecific(struct mount *mp)
1241 {
1242
1243 specificdata_fini(mount_specificdata_domain, &mp->mnt_specdataref);
1244 }
1245
1246 /*
1247 * mount_getspecific --
1248 * Return mount-specific data corresponding to the specified key.
1249 */
1250 void *
1251 mount_getspecific(struct mount *mp, specificdata_key_t key)
1252 {
1253
1254 return specificdata_getspecific(mount_specificdata_domain,
1255 &mp->mnt_specdataref, key);
1256 }
1257
1258 /*
1259 * mount_setspecific --
1260 * Set mount-specific data corresponding to the specified key.
1261 */
1262 void
1263 mount_setspecific(struct mount *mp, specificdata_key_t key, void *data)
1264 {
1265
1266 specificdata_setspecific(mount_specificdata_domain,
1267 &mp->mnt_specdataref, key, data);
1268 }
1269
1270 /*
1271 * Check to see if a filesystem is mounted on a block device.
1272 */
1273 int
1274 vfs_mountedon(vnode_t *vp)
1275 {
1276 vnode_t *vq;
1277 int error = 0;
1278
1279 if (vp->v_type != VBLK)
1280 return ENOTBLK;
1281 if (spec_node_getmountedfs(vp) != NULL)
1282 return EBUSY;
1283 if (spec_node_lookup_by_dev(vp->v_type, vp->v_rdev, &vq) == 0) {
1284 if (spec_node_getmountedfs(vq) != NULL)
1285 error = EBUSY;
1286 vrele(vq);
1287 }
1288
1289 return error;
1290 }
1291
1292 /*
1293 * Check if a device pointed to by vp is mounted.
1294 *
1295 * Returns:
1296 * EINVAL if it's not a disk
1297 * EBUSY if it's a disk and mounted
1298 * 0 if it's a disk and not mounted
1299 */
1300 int
1301 rawdev_mounted(vnode_t *vp, vnode_t **bvpp)
1302 {
1303 vnode_t *bvp;
1304 dev_t dev;
1305 int d_type;
1306
1307 bvp = NULL;
1308 d_type = D_OTHER;
1309
1310 if (iskmemvp(vp))
1311 return EINVAL;
1312
1313 switch (vp->v_type) {
1314 case VCHR: {
1315 const struct cdevsw *cdev;
1316
1317 dev = vp->v_rdev;
1318 cdev = cdevsw_lookup(dev);
1319 if (cdev != NULL) {
1320 dev_t blkdev;
1321
1322 blkdev = devsw_chr2blk(dev);
1323 if (blkdev != NODEV) {
1324 if (vfinddev(blkdev, VBLK, &bvp) != 0) {
1325 d_type = (cdev->d_flag & D_TYPEMASK);
1326 /* XXX: what if bvp disappears? */
1327 vrele(bvp);
1328 }
1329 }
1330 }
1331
1332 break;
1333 }
1334
1335 case VBLK: {
1336 const struct bdevsw *bdev;
1337
1338 dev = vp->v_rdev;
1339 bdev = bdevsw_lookup(dev);
1340 if (bdev != NULL)
1341 d_type = (bdev->d_flag & D_TYPEMASK);
1342
1343 bvp = vp;
1344
1345 break;
1346 }
1347
1348 default:
1349 break;
1350 }
1351
1352 if (d_type != D_DISK)
1353 return EINVAL;
1354
1355 if (bvpp != NULL)
1356 *bvpp = bvp;
1357
1358 /*
1359 * XXX: This is bogus. We should be failing the request
1360 * XXX: not only if this specific slice is mounted, but
1361 * XXX: if it's on a disk with any other mounted slice.
1362 */
1363 if (vfs_mountedon(bvp))
1364 return EBUSY;
1365
1366 return 0;
1367 }
1368
1369 /*
1370 * Make a 'unique' number from a mount type name.
1371 */
1372 long
1373 makefstype(const char *type)
1374 {
1375 long rv;
1376
1377 for (rv = 0; *type; type++) {
1378 rv <<= 2;
1379 rv ^= *type;
1380 }
1381 return rv;
1382 }
1383
1384 void
1385 mountlist_append(struct mount *mp)
1386 {
1387 mutex_enter(&mountlist_lock);
1388 TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
1389 mutex_exit(&mountlist_lock);
1390 }
1391