vfs_mount.c revision 1.64 1 /* $NetBSD: vfs_mount.c,v 1.64 2017/05/24 09:53:55 hannken Exp $ */
2
3 /*-
4 * Copyright (c) 1997-2011 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9 * NASA Ames Research Center, by Charles M. Hannum, and by Andrew Doran.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 /*
34 * Copyright (c) 1989, 1993
35 * The Regents of the University of California. All rights reserved.
36 * (c) UNIX System Laboratories, Inc.
37 * All or some portions of this file are derived from material licensed
38 * to the University of California by American Telephone and Telegraph
39 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
40 * the permission of UNIX System Laboratories, Inc.
41 *
42 * Redistribution and use in source and binary forms, with or without
43 * modification, are permitted provided that the following conditions
44 * are met:
45 * 1. Redistributions of source code must retain the above copyright
46 * notice, this list of conditions and the following disclaimer.
47 * 2. Redistributions in binary form must reproduce the above copyright
48 * notice, this list of conditions and the following disclaimer in the
49 * documentation and/or other materials provided with the distribution.
50 * 3. Neither the name of the University nor the names of its contributors
51 * may be used to endorse or promote products derived from this software
52 * without specific prior written permission.
53 *
54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64 * SUCH DAMAGE.
65 *
66 * @(#)vfs_subr.c 8.13 (Berkeley) 4/18/94
67 */
68
69 #include <sys/cdefs.h>
70 __KERNEL_RCSID(0, "$NetBSD: vfs_mount.c,v 1.64 2017/05/24 09:53:55 hannken Exp $");
71
72 #include <sys/param.h>
73 #include <sys/kernel.h>
74
75 #include <sys/atomic.h>
76 #include <sys/buf.h>
77 #include <sys/conf.h>
78 #include <sys/fcntl.h>
79 #include <sys/filedesc.h>
80 #include <sys/device.h>
81 #include <sys/kauth.h>
82 #include <sys/kmem.h>
83 #include <sys/module.h>
84 #include <sys/mount.h>
85 #include <sys/fstrans.h>
86 #include <sys/namei.h>
87 #include <sys/extattr.h>
88 #include <sys/syscallargs.h>
89 #include <sys/sysctl.h>
90 #include <sys/systm.h>
91 #include <sys/vfs_syscalls.h>
92 #include <sys/vnode_impl.h>
93
94 #include <miscfs/genfs/genfs.h>
95 #include <miscfs/specfs/specdev.h>
96
97 enum mountlist_type {
98 ME_MOUNT,
99 ME_MARKER
100 };
101 struct mountlist_entry {
102 TAILQ_ENTRY(mountlist_entry) me_list; /* Mount list. */
103 struct mount *me_mount; /* Actual mount if ME_MOUNT,
104 current mount else. */
105 enum mountlist_type me_type; /* Mount or marker. */
106 };
107 struct mount_iterator {
108 struct mountlist_entry mi_entry;
109 };
110
111 static struct vnode *vfs_vnode_iterator_next1(struct vnode_iterator *,
112 bool (*)(void *, struct vnode *), void *, bool);
113
114 /* Root filesystem. */
115 vnode_t * rootvnode;
116
117 /* Mounted filesystem list. */
118 static TAILQ_HEAD(mountlist, mountlist_entry) mountlist;
119 static kmutex_t mountlist_lock;
120 int vnode_offset_next_by_lru /* XXX: ugly hack for pstat.c */
121 = offsetof(vnode_impl_t, vi_lrulist.tqe_next);
122
123 kmutex_t mntvnode_lock;
124 kmutex_t vfs_list_lock;
125
126 static specificdata_domain_t mount_specificdata_domain;
127 static kmutex_t mntid_lock;
128
129 static kmutex_t mountgen_lock;
130 static uint64_t mountgen;
131
132 void
133 vfs_mount_sysinit(void)
134 {
135
136 TAILQ_INIT(&mountlist);
137 mutex_init(&mountlist_lock, MUTEX_DEFAULT, IPL_NONE);
138 mutex_init(&mntvnode_lock, MUTEX_DEFAULT, IPL_NONE);
139 mutex_init(&vfs_list_lock, MUTEX_DEFAULT, IPL_NONE);
140
141 mount_specificdata_domain = specificdata_domain_create();
142 mutex_init(&mntid_lock, MUTEX_DEFAULT, IPL_NONE);
143 mutex_init(&mountgen_lock, MUTEX_DEFAULT, IPL_NONE);
144 mountgen = 0;
145 }
146
147 struct mount *
148 vfs_mountalloc(struct vfsops *vfsops, vnode_t *vp)
149 {
150 struct mount *mp;
151 int error __diagused;
152 extern struct vfsops dead_vfsops;
153
154 mp = kmem_zalloc(sizeof(*mp), KM_SLEEP);
155 if (mp == NULL)
156 return NULL;
157
158 mp->mnt_op = vfsops;
159 mp->mnt_refcnt = 1;
160 TAILQ_INIT(&mp->mnt_vnodelist);
161 mutex_init(&mp->mnt_renamelock, MUTEX_DEFAULT, IPL_NONE);
162 mutex_init(&mp->mnt_updating, MUTEX_DEFAULT, IPL_NONE);
163 mp->mnt_vnodecovered = vp;
164 mount_initspecific(mp);
165 if (vfsops != &dead_vfsops) {
166 error = fstrans_mount(mp);
167 KASSERT(error == 0);
168 }
169
170 mutex_enter(&mountgen_lock);
171 mp->mnt_gen = mountgen++;
172 mutex_exit(&mountgen_lock);
173
174 return mp;
175 }
176
177 /*
178 * vfs_rootmountalloc: lookup a filesystem type, and if found allocate and
179 * initialize a mount structure for it.
180 *
181 * Devname is usually updated by mount(8) after booting.
182 */
183 int
184 vfs_rootmountalloc(const char *fstypename, const char *devname,
185 struct mount **mpp)
186 {
187 struct vfsops *vfsp = NULL;
188 struct mount *mp;
189 int error __diagused;
190
191 mutex_enter(&vfs_list_lock);
192 LIST_FOREACH(vfsp, &vfs_list, vfs_list)
193 if (!strncmp(vfsp->vfs_name, fstypename,
194 sizeof(mp->mnt_stat.f_fstypename)))
195 break;
196 if (vfsp == NULL) {
197 mutex_exit(&vfs_list_lock);
198 return (ENODEV);
199 }
200 vfsp->vfs_refcount++;
201 mutex_exit(&vfs_list_lock);
202
203 if ((mp = vfs_mountalloc(vfsp, NULL)) == NULL)
204 return ENOMEM;
205 error = vfs_busy(mp);
206 KASSERT(error == 0);
207 mp->mnt_flag = MNT_RDONLY;
208 (void)strlcpy(mp->mnt_stat.f_fstypename, vfsp->vfs_name,
209 sizeof(mp->mnt_stat.f_fstypename));
210 mp->mnt_stat.f_mntonname[0] = '/';
211 mp->mnt_stat.f_mntonname[1] = '\0';
212 mp->mnt_stat.f_mntfromname[sizeof(mp->mnt_stat.f_mntfromname) - 1] =
213 '\0';
214 (void)copystr(devname, mp->mnt_stat.f_mntfromname,
215 sizeof(mp->mnt_stat.f_mntfromname) - 1, 0);
216 *mpp = mp;
217 return 0;
218 }
219
220 /*
221 * vfs_getnewfsid: get a new unique fsid.
222 */
223 void
224 vfs_getnewfsid(struct mount *mp)
225 {
226 static u_short xxxfs_mntid;
227 fsid_t tfsid;
228 int mtype;
229
230 mutex_enter(&mntid_lock);
231 mtype = makefstype(mp->mnt_op->vfs_name);
232 mp->mnt_stat.f_fsidx.__fsid_val[0] = makedev(mtype, 0);
233 mp->mnt_stat.f_fsidx.__fsid_val[1] = mtype;
234 mp->mnt_stat.f_fsid = mp->mnt_stat.f_fsidx.__fsid_val[0];
235 if (xxxfs_mntid == 0)
236 ++xxxfs_mntid;
237 tfsid.__fsid_val[0] = makedev(mtype & 0xff, xxxfs_mntid);
238 tfsid.__fsid_val[1] = mtype;
239 while (vfs_getvfs(&tfsid)) {
240 tfsid.__fsid_val[0]++;
241 xxxfs_mntid++;
242 }
243 mp->mnt_stat.f_fsidx.__fsid_val[0] = tfsid.__fsid_val[0];
244 mp->mnt_stat.f_fsid = mp->mnt_stat.f_fsidx.__fsid_val[0];
245 mutex_exit(&mntid_lock);
246 }
247
248 /*
249 * Lookup a mount point by filesystem identifier.
250 *
251 * XXX Needs to add a reference to the mount point.
252 */
253 struct mount *
254 vfs_getvfs(fsid_t *fsid)
255 {
256 mount_iterator_t *iter;
257 struct mount *mp;
258
259 mountlist_iterator_init(&iter);
260 while ((mp = mountlist_iterator_next(iter)) != NULL) {
261 if (mp->mnt_stat.f_fsidx.__fsid_val[0] == fsid->__fsid_val[0] &&
262 mp->mnt_stat.f_fsidx.__fsid_val[1] == fsid->__fsid_val[1]) {
263 mountlist_iterator_destroy(iter);
264 return mp;
265 }
266 }
267 mountlist_iterator_destroy(iter);
268 return NULL;
269 }
270
271 /*
272 * Take a reference to a mount structure.
273 */
274 void
275 vfs_ref(struct mount *mp)
276 {
277
278 KASSERT(mp->mnt_refcnt > 0 || mutex_owned(&mountlist_lock));
279
280 atomic_inc_uint(&mp->mnt_refcnt);
281 }
282
283 /*
284 * Drop a reference to a mount structure, freeing if the last reference.
285 */
286 void
287 vfs_rele(struct mount *mp)
288 {
289
290 if (__predict_true((int)atomic_dec_uint_nv(&mp->mnt_refcnt) > 0)) {
291 return;
292 }
293
294 /*
295 * Nothing else has visibility of the mount: we can now
296 * free the data structures.
297 */
298 KASSERT(mp->mnt_refcnt == 0);
299 specificdata_fini(mount_specificdata_domain, &mp->mnt_specdataref);
300 mutex_destroy(&mp->mnt_updating);
301 mutex_destroy(&mp->mnt_renamelock);
302 if (mp->mnt_op != NULL) {
303 vfs_delref(mp->mnt_op);
304 }
305 kmem_free(mp, sizeof(*mp));
306 }
307
308 /*
309 * Mark a mount point as busy, and gain a new reference to it. Used to
310 * prevent the file system from being unmounted during critical sections.
311 *
312 * vfs_busy can be called multiple times and by multiple threads
313 * and must be accompanied by the same number of vfs_unbusy calls.
314 *
315 * => The caller must hold a pre-existing reference to the mount.
316 * => Will fail if the file system is being unmounted, or is unmounted.
317 */
318 static inline int
319 _vfs_busy(struct mount *mp, bool wait)
320 {
321
322 KASSERT(mp->mnt_refcnt > 0);
323
324 if (wait) {
325 fstrans_start(mp, FSTRANS_SHARED);
326 } else {
327 if (fstrans_start_nowait(mp, FSTRANS_SHARED))
328 return EBUSY;
329 }
330 if (__predict_false((mp->mnt_iflag & IMNT_GONE) != 0)) {
331 fstrans_done(mp);
332 return ENOENT;
333 }
334 vfs_ref(mp);
335 return 0;
336 }
337
338 int
339 vfs_busy(struct mount *mp)
340 {
341
342 return _vfs_busy(mp, true);
343 }
344
345 int
346 vfs_trybusy(struct mount *mp)
347 {
348
349 return _vfs_busy(mp, false);
350 }
351
352 /*
353 * Unbusy a busy filesystem.
354 *
355 * Every successful vfs_busy() call must be undone by a vfs_unbusy() call.
356 */
357 void
358 vfs_unbusy(struct mount *mp)
359 {
360
361 KASSERT(mp->mnt_refcnt > 0);
362
363 fstrans_done(mp);
364 vfs_rele(mp);
365 }
366
367 struct vnode_iterator {
368 vnode_impl_t vi_vnode;
369 };
370
371 void
372 vfs_vnode_iterator_init(struct mount *mp, struct vnode_iterator **vnip)
373 {
374 vnode_t *vp;
375 vnode_impl_t *vip;
376
377 vp = vnalloc_marker(mp);
378 vip = VNODE_TO_VIMPL(vp);
379
380 mutex_enter(&mntvnode_lock);
381 TAILQ_INSERT_HEAD(&mp->mnt_vnodelist, vip, vi_mntvnodes);
382 vp->v_usecount = 1;
383 mutex_exit(&mntvnode_lock);
384
385 *vnip = (struct vnode_iterator *)vip;
386 }
387
388 void
389 vfs_vnode_iterator_destroy(struct vnode_iterator *vni)
390 {
391 vnode_impl_t *mvip = &vni->vi_vnode;
392 vnode_t *mvp = VIMPL_TO_VNODE(mvip);
393
394 mutex_enter(&mntvnode_lock);
395 KASSERT(vnis_marker(mvp));
396 if (mvp->v_usecount != 0) {
397 TAILQ_REMOVE(&mvp->v_mount->mnt_vnodelist, mvip, vi_mntvnodes);
398 mvp->v_usecount = 0;
399 }
400 mutex_exit(&mntvnode_lock);
401 vnfree_marker(mvp);
402 }
403
404 static struct vnode *
405 vfs_vnode_iterator_next1(struct vnode_iterator *vni,
406 bool (*f)(void *, struct vnode *), void *cl, bool do_wait)
407 {
408 vnode_impl_t *mvip = &vni->vi_vnode;
409 struct mount *mp = VIMPL_TO_VNODE(mvip)->v_mount;
410 vnode_t *vp;
411 vnode_impl_t *vip;
412 int error;
413
414 KASSERT(vnis_marker(VIMPL_TO_VNODE(mvip)));
415
416 do {
417 mutex_enter(&mntvnode_lock);
418 vip = TAILQ_NEXT(mvip, vi_mntvnodes);
419 TAILQ_REMOVE(&mp->mnt_vnodelist, mvip, vi_mntvnodes);
420 VIMPL_TO_VNODE(mvip)->v_usecount = 0;
421 again:
422 vp = VIMPL_TO_VNODE(vip);
423 if (vp == NULL) {
424 mutex_exit(&mntvnode_lock);
425 return NULL;
426 }
427 mutex_enter(vp->v_interlock);
428 if (vnis_marker(vp) ||
429 vdead_check(vp, (do_wait ? 0 : VDEAD_NOWAIT)) ||
430 (f && !(*f)(cl, vp))) {
431 mutex_exit(vp->v_interlock);
432 vip = TAILQ_NEXT(vip, vi_mntvnodes);
433 goto again;
434 }
435
436 TAILQ_INSERT_AFTER(&mp->mnt_vnodelist, vip, mvip, vi_mntvnodes);
437 VIMPL_TO_VNODE(mvip)->v_usecount = 1;
438 mutex_exit(&mntvnode_lock);
439 error = vcache_vget(vp);
440 KASSERT(error == 0 || error == ENOENT);
441 } while (error != 0);
442
443 return vp;
444 }
445
446 struct vnode *
447 vfs_vnode_iterator_next(struct vnode_iterator *vni,
448 bool (*f)(void *, struct vnode *), void *cl)
449 {
450
451 return vfs_vnode_iterator_next1(vni, f, cl, false);
452 }
453
454 /*
455 * Move a vnode from one mount queue to another.
456 */
457 void
458 vfs_insmntque(vnode_t *vp, struct mount *mp)
459 {
460 vnode_impl_t *vip = VNODE_TO_VIMPL(vp);
461 struct mount *omp;
462
463 KASSERT(mp == NULL || (mp->mnt_iflag & IMNT_UNMOUNT) == 0 ||
464 vp->v_tag == VT_VFS);
465
466 mutex_enter(&mntvnode_lock);
467 /*
468 * Delete from old mount point vnode list, if on one.
469 */
470 if ((omp = vp->v_mount) != NULL)
471 TAILQ_REMOVE(&vp->v_mount->mnt_vnodelist, vip, vi_mntvnodes);
472 /*
473 * Insert into list of vnodes for the new mount point, if
474 * available. The caller must take a reference on the mount
475 * structure and donate to the vnode.
476 */
477 if ((vp->v_mount = mp) != NULL)
478 TAILQ_INSERT_TAIL(&mp->mnt_vnodelist, vip, vi_mntvnodes);
479 mutex_exit(&mntvnode_lock);
480
481 if (omp != NULL) {
482 /* Release reference to old mount. */
483 vfs_rele(omp);
484 }
485 }
486
487 /*
488 * Remove any vnodes in the vnode table belonging to mount point mp.
489 *
490 * If FORCECLOSE is not specified, there should not be any active ones,
491 * return error if any are found (nb: this is a user error, not a
492 * system error). If FORCECLOSE is specified, detach any active vnodes
493 * that are found.
494 *
495 * If WRITECLOSE is set, only flush out regular file vnodes open for
496 * writing.
497 *
498 * SKIPSYSTEM causes any vnodes marked VV_SYSTEM to be skipped.
499 */
500 #ifdef DEBUG
501 int busyprt = 0; /* print out busy vnodes */
502 struct ctldebug debug1 = { "busyprt", &busyprt };
503 #endif
504
505 static vnode_t *
506 vflushnext(struct vnode_iterator *marker, int *when)
507 {
508 if (hardclock_ticks > *when) {
509 yield();
510 *when = hardclock_ticks + hz / 10;
511 }
512 return vfs_vnode_iterator_next1(marker, NULL, NULL, true);
513 }
514
515 /*
516 * Flush one vnode. Referenced on entry, unreferenced on return.
517 */
518 static int
519 vflush_one(vnode_t *vp, vnode_t *skipvp, int flags)
520 {
521 int error;
522 struct vattr vattr;
523
524 if (vp == skipvp ||
525 ((flags & SKIPSYSTEM) && (vp->v_vflag & VV_SYSTEM))) {
526 vrele(vp);
527 return 0;
528 }
529 /*
530 * If WRITECLOSE is set, only flush out regular file
531 * vnodes open for writing or open and unlinked.
532 */
533 if ((flags & WRITECLOSE)) {
534 if (vp->v_type != VREG) {
535 vrele(vp);
536 return 0;
537 }
538 error = vn_lock(vp, LK_EXCLUSIVE);
539 if (error) {
540 KASSERT(error == ENOENT);
541 vrele(vp);
542 return 0;
543 }
544 error = VOP_FSYNC(vp, curlwp->l_cred, FSYNC_WAIT, 0, 0);
545 if (error == 0)
546 error = VOP_GETATTR(vp, &vattr, curlwp->l_cred);
547 VOP_UNLOCK(vp);
548 if (error) {
549 vrele(vp);
550 return error;
551 }
552 if (vp->v_writecount == 0 && vattr.va_nlink > 0) {
553 vrele(vp);
554 return 0;
555 }
556 }
557 /*
558 * First try to recycle the vnode.
559 */
560 if (vrecycle(vp))
561 return 0;
562 /*
563 * If FORCECLOSE is set, forcibly close the vnode.
564 */
565 if (flags & FORCECLOSE) {
566 vgone(vp);
567 return 0;
568 }
569 vrele(vp);
570 return EBUSY;
571 }
572
573 int
574 vflush(struct mount *mp, vnode_t *skipvp, int flags)
575 {
576 vnode_t *vp;
577 struct vnode_iterator *marker;
578 int busy, error, when, retries = 2;
579
580 do {
581 busy = error = when = 0;
582
583 /*
584 * First, flush out any vnode references from the
585 * deferred vrele list.
586 */
587 vrele_flush(mp);
588
589 vfs_vnode_iterator_init(mp, &marker);
590
591 while ((vp = vflushnext(marker, &when)) != NULL) {
592 error = vflush_one(vp, skipvp, flags);
593 if (error == EBUSY) {
594 error = 0;
595 busy++;
596 #ifdef DEBUG
597 if (busyprt && retries == 0)
598 vprint("vflush: busy vnode", vp);
599 #endif
600 } else if (error != 0) {
601 break;
602 }
603 }
604
605 vfs_vnode_iterator_destroy(marker);
606 } while (error == 0 && busy > 0 && retries-- > 0);
607
608 if (error)
609 return error;
610 if (busy)
611 return EBUSY;
612 return 0;
613 }
614
615 /*
616 * Mount a file system.
617 */
618
619 /*
620 * Scan all active processes to see if any of them have a current or root
621 * directory onto which the new filesystem has just been mounted. If so,
622 * replace them with the new mount point.
623 */
624 static void
625 mount_checkdirs(vnode_t *olddp)
626 {
627 vnode_t *newdp, *rele1, *rele2;
628 struct cwdinfo *cwdi;
629 struct proc *p;
630 bool retry;
631
632 if (olddp->v_usecount == 1) {
633 return;
634 }
635 if (VFS_ROOT(olddp->v_mountedhere, &newdp))
636 panic("mount: lost mount");
637
638 do {
639 retry = false;
640 mutex_enter(proc_lock);
641 PROCLIST_FOREACH(p, &allproc) {
642 if ((cwdi = p->p_cwdi) == NULL)
643 continue;
644 /*
645 * Cannot change to the old directory any more,
646 * so even if we see a stale value it is not a
647 * problem.
648 */
649 if (cwdi->cwdi_cdir != olddp &&
650 cwdi->cwdi_rdir != olddp)
651 continue;
652 retry = true;
653 rele1 = NULL;
654 rele2 = NULL;
655 atomic_inc_uint(&cwdi->cwdi_refcnt);
656 mutex_exit(proc_lock);
657 rw_enter(&cwdi->cwdi_lock, RW_WRITER);
658 if (cwdi->cwdi_cdir == olddp) {
659 rele1 = cwdi->cwdi_cdir;
660 vref(newdp);
661 cwdi->cwdi_cdir = newdp;
662 }
663 if (cwdi->cwdi_rdir == olddp) {
664 rele2 = cwdi->cwdi_rdir;
665 vref(newdp);
666 cwdi->cwdi_rdir = newdp;
667 }
668 rw_exit(&cwdi->cwdi_lock);
669 cwdfree(cwdi);
670 if (rele1 != NULL)
671 vrele(rele1);
672 if (rele2 != NULL)
673 vrele(rele2);
674 mutex_enter(proc_lock);
675 break;
676 }
677 mutex_exit(proc_lock);
678 } while (retry);
679
680 if (rootvnode == olddp) {
681 vrele(rootvnode);
682 vref(newdp);
683 rootvnode = newdp;
684 }
685 vput(newdp);
686 }
687
688 /*
689 * Start extended attributes
690 */
691 static int
692 start_extattr(struct mount *mp)
693 {
694 int error;
695
696 error = VFS_EXTATTRCTL(mp, EXTATTR_CMD_START, NULL, 0, NULL);
697 if (error)
698 printf("%s: failed to start extattr: error = %d\n",
699 mp->mnt_stat.f_mntonname, error);
700
701 return error;
702 }
703
704 int
705 mount_domount(struct lwp *l, vnode_t **vpp, struct vfsops *vfsops,
706 const char *path, int flags, void *data, size_t *data_len)
707 {
708 vnode_t *vp = *vpp;
709 struct mount *mp;
710 struct pathbuf *pb;
711 struct nameidata nd;
712 int error;
713
714 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
715 KAUTH_REQ_SYSTEM_MOUNT_NEW, vp, KAUTH_ARG(flags), data);
716 if (error) {
717 vfs_delref(vfsops);
718 return error;
719 }
720
721 /* Cannot make a non-dir a mount-point (from here anyway). */
722 if (vp->v_type != VDIR) {
723 vfs_delref(vfsops);
724 return ENOTDIR;
725 }
726
727 if (flags & MNT_EXPORTED) {
728 vfs_delref(vfsops);
729 return EINVAL;
730 }
731
732 if ((mp = vfs_mountalloc(vfsops, vp)) == NULL) {
733 vfs_delref(vfsops);
734 return ENOMEM;
735 }
736
737 mp->mnt_stat.f_owner = kauth_cred_geteuid(l->l_cred);
738
739 /*
740 * The underlying file system may refuse the mount for
741 * various reasons. Allow the user to force it to happen.
742 *
743 * Set the mount level flags.
744 */
745 mp->mnt_flag = flags & (MNT_BASIC_FLAGS | MNT_FORCE | MNT_IGNORE);
746
747 mutex_enter(&mp->mnt_updating);
748 error = VFS_MOUNT(mp, path, data, data_len);
749 mp->mnt_flag &= ~MNT_OP_FLAGS;
750
751 if (error != 0)
752 goto err_unmounted;
753
754 /*
755 * Validate and prepare the mount point.
756 */
757 error = pathbuf_copyin(path, &pb);
758 if (error != 0) {
759 goto err_mounted;
760 }
761 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb);
762 error = namei(&nd);
763 pathbuf_destroy(pb);
764 if (error != 0) {
765 goto err_mounted;
766 }
767 if (nd.ni_vp != vp) {
768 vput(nd.ni_vp);
769 error = EINVAL;
770 goto err_mounted;
771 }
772 if (vp->v_mountedhere != NULL) {
773 vput(nd.ni_vp);
774 error = EBUSY;
775 goto err_mounted;
776 }
777 error = vinvalbuf(vp, V_SAVE, l->l_cred, l, 0, 0);
778 if (error != 0) {
779 vput(nd.ni_vp);
780 goto err_mounted;
781 }
782
783 /*
784 * Put the new filesystem on the mount list after root.
785 */
786 cache_purge(vp);
787 mp->mnt_iflag &= ~IMNT_WANTRDWR;
788
789 mountlist_append(mp);
790 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0)
791 vfs_syncer_add_to_worklist(mp);
792 vp->v_mountedhere = mp;
793 vput(nd.ni_vp);
794
795 mount_checkdirs(vp);
796 mutex_exit(&mp->mnt_updating);
797
798 /* Hold an additional reference to the mount across VFS_START(). */
799 vfs_ref(mp);
800 (void) VFS_STATVFS(mp, &mp->mnt_stat);
801 error = VFS_START(mp, 0);
802 if (error) {
803 vrele(vp);
804 } else if (flags & MNT_EXTATTR) {
805 (void)start_extattr(mp);
806 }
807 /* Drop reference held for VFS_START(). */
808 vfs_rele(mp);
809 *vpp = NULL;
810 return error;
811
812 err_mounted:
813 if (VFS_UNMOUNT(mp, MNT_FORCE) != 0)
814 panic("Unmounting fresh file system failed");
815
816 err_unmounted:
817 vp->v_mountedhere = NULL;
818 mutex_exit(&mp->mnt_updating);
819 fstrans_unmount(mp);
820 vfs_rele(mp);
821
822 return error;
823 }
824
825 /*
826 * Do the actual file system unmount. File system is assumed to have
827 * been locked by the caller.
828 *
829 * => Caller hold reference to the mount, explicitly for dounmount().
830 */
831 int
832 dounmount(struct mount *mp, int flags, struct lwp *l)
833 {
834 mount_iterator_t *iter;
835 struct mount *cmp;
836 vnode_t *coveredvp;
837 int error, async, used_syncer, used_extattr;
838
839 #if NVERIEXEC > 0
840 error = veriexec_unmountchk(mp);
841 if (error)
842 return (error);
843 #endif /* NVERIEXEC > 0 */
844
845 /*
846 * No unmount below layered mounts.
847 */
848 mountlist_iterator_init(&iter);
849 while ((cmp = mountlist_iterator_next(iter)) != NULL) {
850 if (cmp->mnt_lower == mp) {
851 mountlist_iterator_destroy(iter);
852 return EBUSY;
853 }
854 }
855 mountlist_iterator_destroy(iter);
856
857 error = vfs_suspend(mp, 0);
858 if (error) {
859 return error;
860 }
861
862 KASSERT((mp->mnt_iflag & IMNT_GONE) == 0);
863
864 used_syncer = (mp->mnt_iflag & IMNT_ONWORKLIST) != 0;
865 used_extattr = mp->mnt_flag & MNT_EXTATTR;
866
867 mp->mnt_iflag |= IMNT_UNMOUNT;
868 mutex_enter(&mp->mnt_updating);
869 async = mp->mnt_flag & MNT_ASYNC;
870 mp->mnt_flag &= ~MNT_ASYNC;
871 cache_purgevfs(mp); /* remove cache entries for this file sys */
872 if (used_syncer)
873 vfs_syncer_remove_from_worklist(mp);
874 error = 0;
875 if (((mp->mnt_flag & MNT_RDONLY) == 0) && ((flags & MNT_FORCE) == 0)) {
876 error = VFS_SYNC(mp, MNT_WAIT, l->l_cred);
877 }
878 if (error == 0 || (flags & MNT_FORCE)) {
879 error = VFS_UNMOUNT(mp, flags);
880 }
881 if (error) {
882 mp->mnt_iflag &= ~IMNT_UNMOUNT;
883 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0)
884 vfs_syncer_add_to_worklist(mp);
885 mp->mnt_flag |= async;
886 mutex_exit(&mp->mnt_updating);
887 vfs_resume(mp);
888 if (used_extattr) {
889 if (start_extattr(mp) != 0)
890 mp->mnt_flag &= ~MNT_EXTATTR;
891 else
892 mp->mnt_flag |= MNT_EXTATTR;
893 }
894 return (error);
895 }
896 mutex_exit(&mp->mnt_updating);
897
898 /*
899 * mark filesystem as gone to prevent further umounts
900 * after mnt_umounting lock is gone, this also prevents
901 * vfs_busy() from succeeding.
902 */
903 mp->mnt_iflag |= IMNT_GONE;
904 vfs_resume(mp);
905
906 if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) {
907 vn_lock(coveredvp, LK_EXCLUSIVE | LK_RETRY);
908 coveredvp->v_mountedhere = NULL;
909 VOP_UNLOCK(coveredvp);
910 }
911 mountlist_remove(mp);
912 if (TAILQ_FIRST(&mp->mnt_vnodelist) != NULL)
913 panic("unmount: dangling vnode");
914 vfs_hooks_unmount(mp);
915
916 fstrans_unmount(mp);
917 vfs_rele(mp); /* reference from mount() */
918 if (coveredvp != NULLVP) {
919 vrele(coveredvp);
920 }
921 return (0);
922 }
923
924 /*
925 * Unmount all file systems.
926 * We traverse the list in reverse order under the assumption that doing so
927 * will avoid needing to worry about dependencies.
928 */
929 bool
930 vfs_unmountall(struct lwp *l)
931 {
932
933 printf("unmounting file systems...\n");
934 return vfs_unmountall1(l, true, true);
935 }
936
937 static void
938 vfs_unmount_print(struct mount *mp, const char *pfx)
939 {
940
941 aprint_verbose("%sunmounted %s on %s type %s\n", pfx,
942 mp->mnt_stat.f_mntfromname, mp->mnt_stat.f_mntonname,
943 mp->mnt_stat.f_fstypename);
944 }
945
946 /*
947 * Return the mount with the highest generation less than "gen".
948 */
949 static struct mount *
950 vfs_unmount_next(uint64_t gen)
951 {
952 mount_iterator_t *iter;
953 struct mount *mp, *nmp;
954
955 nmp = NULL;
956
957 mountlist_iterator_init(&iter);
958 while ((mp = mountlist_iterator_next(iter)) != NULL) {
959 if ((nmp == NULL || mp->mnt_gen > nmp->mnt_gen) &&
960 mp->mnt_gen < gen) {
961 if (nmp != NULL)
962 vfs_rele(nmp);
963 nmp = mp;
964 vfs_ref(nmp);
965 }
966 }
967 mountlist_iterator_destroy(iter);
968
969 return nmp;
970 }
971
972 bool
973 vfs_unmount_forceone(struct lwp *l)
974 {
975 struct mount *mp;
976 int error;
977
978 mp = vfs_unmount_next(mountgen);
979 if (mp == NULL) {
980 return false;
981 }
982
983 #ifdef DEBUG
984 printf("forcefully unmounting %s (%s)...\n",
985 mp->mnt_stat.f_mntonname, mp->mnt_stat.f_mntfromname);
986 #endif
987 if ((error = dounmount(mp, MNT_FORCE, l)) == 0) {
988 vfs_unmount_print(mp, "forcefully ");
989 return true;
990 } else {
991 vfs_rele(mp);
992 }
993
994 #ifdef DEBUG
995 printf("forceful unmount of %s failed with error %d\n",
996 mp->mnt_stat.f_mntonname, error);
997 #endif
998
999 return false;
1000 }
1001
1002 bool
1003 vfs_unmountall1(struct lwp *l, bool force, bool verbose)
1004 {
1005 struct mount *mp;
1006 bool any_error = false, progress = false;
1007 uint64_t gen;
1008 int error;
1009
1010 gen = mountgen;
1011 for (;;) {
1012 mp = vfs_unmount_next(gen);
1013 if (mp == NULL)
1014 break;
1015 gen = mp->mnt_gen;
1016
1017 #ifdef DEBUG
1018 printf("unmounting %p %s (%s)...\n",
1019 (void *)mp, mp->mnt_stat.f_mntonname,
1020 mp->mnt_stat.f_mntfromname);
1021 #endif
1022 if ((error = dounmount(mp, force ? MNT_FORCE : 0, l)) == 0) {
1023 vfs_unmount_print(mp, "");
1024 progress = true;
1025 } else {
1026 vfs_rele(mp);
1027 if (verbose) {
1028 printf("unmount of %s failed with error %d\n",
1029 mp->mnt_stat.f_mntonname, error);
1030 }
1031 any_error = true;
1032 }
1033 }
1034 if (verbose) {
1035 printf("unmounting done\n");
1036 }
1037 if (any_error && verbose) {
1038 printf("WARNING: some file systems would not unmount\n");
1039 }
1040 return progress;
1041 }
1042
1043 void
1044 vfs_sync_all(struct lwp *l)
1045 {
1046 printf("syncing disks... ");
1047
1048 /* remove user processes from run queue */
1049 suspendsched();
1050 (void)spl0();
1051
1052 /* avoid coming back this way again if we panic. */
1053 doing_shutdown = 1;
1054
1055 do_sys_sync(l);
1056
1057 /* Wait for sync to finish. */
1058 if (buf_syncwait() != 0) {
1059 #if defined(DDB) && defined(DEBUG_HALT_BUSY)
1060 Debugger();
1061 #endif
1062 printf("giving up\n");
1063 return;
1064 } else
1065 printf("done\n");
1066 }
1067
1068 /*
1069 * Sync and unmount file systems before shutting down.
1070 */
1071 void
1072 vfs_shutdown(void)
1073 {
1074 lwp_t *l = curlwp;
1075
1076 vfs_sync_all(l);
1077
1078 /*
1079 * If we have paniced - do not make the situation potentially
1080 * worse by unmounting the file systems.
1081 */
1082 if (panicstr != NULL) {
1083 return;
1084 }
1085
1086 /* Unmount file systems. */
1087 vfs_unmountall(l);
1088 }
1089
1090 /*
1091 * Print a list of supported file system types (used by vfs_mountroot)
1092 */
1093 static void
1094 vfs_print_fstypes(void)
1095 {
1096 struct vfsops *v;
1097 int cnt = 0;
1098
1099 mutex_enter(&vfs_list_lock);
1100 LIST_FOREACH(v, &vfs_list, vfs_list)
1101 ++cnt;
1102 mutex_exit(&vfs_list_lock);
1103
1104 if (cnt == 0) {
1105 printf("WARNING: No file system modules have been loaded.\n");
1106 return;
1107 }
1108
1109 printf("Supported file systems:");
1110 mutex_enter(&vfs_list_lock);
1111 LIST_FOREACH(v, &vfs_list, vfs_list) {
1112 printf(" %s", v->vfs_name);
1113 }
1114 mutex_exit(&vfs_list_lock);
1115 printf("\n");
1116 }
1117
1118 /*
1119 * Mount the root file system. If the operator didn't specify a
1120 * file system to use, try all possible file systems until one
1121 * succeeds.
1122 */
1123 int
1124 vfs_mountroot(void)
1125 {
1126 struct vfsops *v;
1127 int error = ENODEV;
1128
1129 if (root_device == NULL)
1130 panic("vfs_mountroot: root device unknown");
1131
1132 switch (device_class(root_device)) {
1133 case DV_IFNET:
1134 if (rootdev != NODEV)
1135 panic("vfs_mountroot: rootdev set for DV_IFNET "
1136 "(0x%llx -> %llu,%llu)",
1137 (unsigned long long)rootdev,
1138 (unsigned long long)major(rootdev),
1139 (unsigned long long)minor(rootdev));
1140 break;
1141
1142 case DV_DISK:
1143 if (rootdev == NODEV)
1144 panic("vfs_mountroot: rootdev not set for DV_DISK");
1145 if (bdevvp(rootdev, &rootvp))
1146 panic("vfs_mountroot: can't get vnode for rootdev");
1147 error = VOP_OPEN(rootvp, FREAD, FSCRED);
1148 if (error) {
1149 printf("vfs_mountroot: can't open root device\n");
1150 return (error);
1151 }
1152 break;
1153
1154 case DV_VIRTUAL:
1155 break;
1156
1157 default:
1158 printf("%s: inappropriate for root file system\n",
1159 device_xname(root_device));
1160 return (ENODEV);
1161 }
1162
1163 /*
1164 * If user specified a root fs type, use it. Make sure the
1165 * specified type exists and has a mount_root()
1166 */
1167 if (strcmp(rootfstype, ROOT_FSTYPE_ANY) != 0) {
1168 v = vfs_getopsbyname(rootfstype);
1169 error = EFTYPE;
1170 if (v != NULL) {
1171 if (v->vfs_mountroot != NULL) {
1172 error = (v->vfs_mountroot)();
1173 }
1174 v->vfs_refcount--;
1175 }
1176 goto done;
1177 }
1178
1179 /*
1180 * Try each file system currently configured into the kernel.
1181 */
1182 mutex_enter(&vfs_list_lock);
1183 LIST_FOREACH(v, &vfs_list, vfs_list) {
1184 if (v->vfs_mountroot == NULL)
1185 continue;
1186 #ifdef DEBUG
1187 aprint_normal("mountroot: trying %s...\n", v->vfs_name);
1188 #endif
1189 v->vfs_refcount++;
1190 mutex_exit(&vfs_list_lock);
1191 error = (*v->vfs_mountroot)();
1192 mutex_enter(&vfs_list_lock);
1193 v->vfs_refcount--;
1194 if (!error) {
1195 aprint_normal("root file system type: %s\n",
1196 v->vfs_name);
1197 break;
1198 }
1199 }
1200 mutex_exit(&vfs_list_lock);
1201
1202 if (v == NULL) {
1203 vfs_print_fstypes();
1204 printf("no file system for %s", device_xname(root_device));
1205 if (device_class(root_device) == DV_DISK)
1206 printf(" (dev 0x%llx)", (unsigned long long)rootdev);
1207 printf("\n");
1208 error = EFTYPE;
1209 }
1210
1211 done:
1212 if (error && device_class(root_device) == DV_DISK) {
1213 VOP_CLOSE(rootvp, FREAD, FSCRED);
1214 vrele(rootvp);
1215 }
1216 if (error == 0) {
1217 mount_iterator_t *iter;
1218 struct mount *mp;
1219 extern struct cwdinfo cwdi0;
1220
1221 mountlist_iterator_init(&iter);
1222 mp = mountlist_iterator_next(iter);
1223 KASSERT(mp != NULL);
1224 mountlist_iterator_destroy(iter);
1225
1226 mp->mnt_flag |= MNT_ROOTFS;
1227 mp->mnt_op->vfs_refcount++;
1228
1229 /*
1230 * Get the vnode for '/'. Set cwdi0.cwdi_cdir to
1231 * reference it.
1232 */
1233 error = VFS_ROOT(mp, &rootvnode);
1234 if (error)
1235 panic("cannot find root vnode, error=%d", error);
1236 cwdi0.cwdi_cdir = rootvnode;
1237 vref(cwdi0.cwdi_cdir);
1238 VOP_UNLOCK(rootvnode);
1239 cwdi0.cwdi_rdir = NULL;
1240
1241 /*
1242 * Now that root is mounted, we can fixup initproc's CWD
1243 * info. All other processes are kthreads, which merely
1244 * share proc0's CWD info.
1245 */
1246 initproc->p_cwdi->cwdi_cdir = rootvnode;
1247 vref(initproc->p_cwdi->cwdi_cdir);
1248 initproc->p_cwdi->cwdi_rdir = NULL;
1249 /*
1250 * Enable loading of modules from the filesystem
1251 */
1252 module_load_vfs_init();
1253
1254 }
1255 return (error);
1256 }
1257
1258 /*
1259 * mount_specific_key_create --
1260 * Create a key for subsystem mount-specific data.
1261 */
1262 int
1263 mount_specific_key_create(specificdata_key_t *keyp, specificdata_dtor_t dtor)
1264 {
1265
1266 return specificdata_key_create(mount_specificdata_domain, keyp, dtor);
1267 }
1268
1269 /*
1270 * mount_specific_key_delete --
1271 * Delete a key for subsystem mount-specific data.
1272 */
1273 void
1274 mount_specific_key_delete(specificdata_key_t key)
1275 {
1276
1277 specificdata_key_delete(mount_specificdata_domain, key);
1278 }
1279
1280 /*
1281 * mount_initspecific --
1282 * Initialize a mount's specificdata container.
1283 */
1284 void
1285 mount_initspecific(struct mount *mp)
1286 {
1287 int error __diagused;
1288
1289 error = specificdata_init(mount_specificdata_domain,
1290 &mp->mnt_specdataref);
1291 KASSERT(error == 0);
1292 }
1293
1294 /*
1295 * mount_finispecific --
1296 * Finalize a mount's specificdata container.
1297 */
1298 void
1299 mount_finispecific(struct mount *mp)
1300 {
1301
1302 specificdata_fini(mount_specificdata_domain, &mp->mnt_specdataref);
1303 }
1304
1305 /*
1306 * mount_getspecific --
1307 * Return mount-specific data corresponding to the specified key.
1308 */
1309 void *
1310 mount_getspecific(struct mount *mp, specificdata_key_t key)
1311 {
1312
1313 return specificdata_getspecific(mount_specificdata_domain,
1314 &mp->mnt_specdataref, key);
1315 }
1316
1317 /*
1318 * mount_setspecific --
1319 * Set mount-specific data corresponding to the specified key.
1320 */
1321 void
1322 mount_setspecific(struct mount *mp, specificdata_key_t key, void *data)
1323 {
1324
1325 specificdata_setspecific(mount_specificdata_domain,
1326 &mp->mnt_specdataref, key, data);
1327 }
1328
1329 /*
1330 * Check to see if a filesystem is mounted on a block device.
1331 */
1332 int
1333 vfs_mountedon(vnode_t *vp)
1334 {
1335 vnode_t *vq;
1336 int error = 0;
1337
1338 if (vp->v_type != VBLK)
1339 return ENOTBLK;
1340 if (spec_node_getmountedfs(vp) != NULL)
1341 return EBUSY;
1342 if (spec_node_lookup_by_dev(vp->v_type, vp->v_rdev, &vq) == 0) {
1343 if (spec_node_getmountedfs(vq) != NULL)
1344 error = EBUSY;
1345 vrele(vq);
1346 }
1347
1348 return error;
1349 }
1350
1351 /*
1352 * Check if a device pointed to by vp is mounted.
1353 *
1354 * Returns:
1355 * EINVAL if it's not a disk
1356 * EBUSY if it's a disk and mounted
1357 * 0 if it's a disk and not mounted
1358 */
1359 int
1360 rawdev_mounted(vnode_t *vp, vnode_t **bvpp)
1361 {
1362 vnode_t *bvp;
1363 dev_t dev;
1364 int d_type;
1365
1366 bvp = NULL;
1367 d_type = D_OTHER;
1368
1369 if (iskmemvp(vp))
1370 return EINVAL;
1371
1372 switch (vp->v_type) {
1373 case VCHR: {
1374 const struct cdevsw *cdev;
1375
1376 dev = vp->v_rdev;
1377 cdev = cdevsw_lookup(dev);
1378 if (cdev != NULL) {
1379 dev_t blkdev;
1380
1381 blkdev = devsw_chr2blk(dev);
1382 if (blkdev != NODEV) {
1383 if (vfinddev(blkdev, VBLK, &bvp) != 0) {
1384 d_type = (cdev->d_flag & D_TYPEMASK);
1385 /* XXX: what if bvp disappears? */
1386 vrele(bvp);
1387 }
1388 }
1389 }
1390
1391 break;
1392 }
1393
1394 case VBLK: {
1395 const struct bdevsw *bdev;
1396
1397 dev = vp->v_rdev;
1398 bdev = bdevsw_lookup(dev);
1399 if (bdev != NULL)
1400 d_type = (bdev->d_flag & D_TYPEMASK);
1401
1402 bvp = vp;
1403
1404 break;
1405 }
1406
1407 default:
1408 break;
1409 }
1410
1411 if (d_type != D_DISK)
1412 return EINVAL;
1413
1414 if (bvpp != NULL)
1415 *bvpp = bvp;
1416
1417 /*
1418 * XXX: This is bogus. We should be failing the request
1419 * XXX: not only if this specific slice is mounted, but
1420 * XXX: if it's on a disk with any other mounted slice.
1421 */
1422 if (vfs_mountedon(bvp))
1423 return EBUSY;
1424
1425 return 0;
1426 }
1427
1428 /*
1429 * Make a 'unique' number from a mount type name.
1430 */
1431 long
1432 makefstype(const char *type)
1433 {
1434 long rv;
1435
1436 for (rv = 0; *type; type++) {
1437 rv <<= 2;
1438 rv ^= *type;
1439 }
1440 return rv;
1441 }
1442
1443 static struct mountlist_entry *
1444 mountlist_alloc(enum mountlist_type type, struct mount *mp)
1445 {
1446 struct mountlist_entry *me;
1447
1448 me = kmem_zalloc(sizeof(*me), KM_SLEEP);
1449 me->me_mount = mp;
1450 me->me_type = type;
1451
1452 return me;
1453 }
1454
1455 static void
1456 mountlist_free(struct mountlist_entry *me)
1457 {
1458
1459 kmem_free(me, sizeof(*me));
1460 }
1461
1462 void
1463 mountlist_iterator_init(mount_iterator_t **mip)
1464 {
1465 struct mountlist_entry *me;
1466
1467 me = mountlist_alloc(ME_MARKER, NULL);
1468 mutex_enter(&mountlist_lock);
1469 TAILQ_INSERT_HEAD(&mountlist, me, me_list);
1470 mutex_exit(&mountlist_lock);
1471 *mip = (mount_iterator_t *)me;
1472 }
1473
1474 void
1475 mountlist_iterator_destroy(mount_iterator_t *mi)
1476 {
1477 struct mountlist_entry *marker = &mi->mi_entry;
1478
1479 if (marker->me_mount != NULL)
1480 vfs_unbusy(marker->me_mount);
1481
1482 mutex_enter(&mountlist_lock);
1483 TAILQ_REMOVE(&mountlist, marker, me_list);
1484 mutex_exit(&mountlist_lock);
1485
1486 mountlist_free(marker);
1487
1488 }
1489
1490 /*
1491 * Return the next mount or NULL for this iterator.
1492 * Mark it busy on success.
1493 */
1494 static inline struct mount *
1495 _mountlist_iterator_next(mount_iterator_t *mi, bool wait)
1496 {
1497 struct mountlist_entry *me, *marker = &mi->mi_entry;
1498 struct mount *mp;
1499 int error;
1500
1501 if (marker->me_mount != NULL) {
1502 vfs_unbusy(marker->me_mount);
1503 marker->me_mount = NULL;
1504 }
1505
1506 mutex_enter(&mountlist_lock);
1507 for (;;) {
1508 KASSERT(marker->me_type == ME_MARKER);
1509
1510 me = TAILQ_NEXT(marker, me_list);
1511 if (me == NULL) {
1512 /* End of list: keep marker and return. */
1513 mutex_exit(&mountlist_lock);
1514 return NULL;
1515 }
1516 TAILQ_REMOVE(&mountlist, marker, me_list);
1517 TAILQ_INSERT_AFTER(&mountlist, me, marker, me_list);
1518
1519 /* Skip other markers. */
1520 if (me->me_type != ME_MOUNT)
1521 continue;
1522
1523 /* Take an initial reference for vfs_busy() below. */
1524 mp = me->me_mount;
1525 KASSERT(mp != NULL);
1526 vfs_ref(mp);
1527 mutex_exit(&mountlist_lock);
1528
1529 /* Try to mark this mount busy and return on success. */
1530 if (wait)
1531 error = vfs_busy(mp);
1532 else
1533 error = vfs_trybusy(mp);
1534 if (error == 0) {
1535 vfs_rele(mp);
1536 marker->me_mount = mp;
1537 return mp;
1538 }
1539 vfs_rele(mp);
1540 mutex_enter(&mountlist_lock);
1541 }
1542 }
1543
1544 struct mount *
1545 mountlist_iterator_next(mount_iterator_t *mi)
1546 {
1547
1548 return _mountlist_iterator_next(mi, true);
1549 }
1550
1551 struct mount *
1552 mountlist_iterator_trynext(mount_iterator_t *mi)
1553 {
1554
1555 return _mountlist_iterator_next(mi, false);
1556 }
1557
1558 /*
1559 * Attach new mount to the end of the mount list.
1560 */
1561 void
1562 mountlist_append(struct mount *mp)
1563 {
1564 struct mountlist_entry *me;
1565
1566 me = mountlist_alloc(ME_MOUNT, mp);
1567 mutex_enter(&mountlist_lock);
1568 TAILQ_INSERT_TAIL(&mountlist, me, me_list);
1569 mutex_exit(&mountlist_lock);
1570 }
1571
1572 /*
1573 * Remove mount from mount list.
1574 */void
1575 mountlist_remove(struct mount *mp)
1576 {
1577 struct mountlist_entry *me;
1578
1579 mutex_enter(&mountlist_lock);
1580 TAILQ_FOREACH(me, &mountlist, me_list)
1581 if (me->me_type == ME_MOUNT && me->me_mount == mp)
1582 break;
1583 KASSERT(me != NULL);
1584 TAILQ_REMOVE(&mountlist, me, me_list);
1585 mutex_exit(&mountlist_lock);
1586 mountlist_free(me);
1587 }
1588
1589 /*
1590 * Unlocked variant to traverse the mountlist.
1591 * To be used from DDB only.
1592 */
1593 struct mount *
1594 _mountlist_next(struct mount *mp)
1595 {
1596 struct mountlist_entry *me;
1597
1598 if (mp == NULL) {
1599 me = TAILQ_FIRST(&mountlist);
1600 } else {
1601 TAILQ_FOREACH(me, &mountlist, me_list)
1602 if (me->me_type == ME_MOUNT && me->me_mount == mp)
1603 break;
1604 if (me != NULL)
1605 me = TAILQ_NEXT(me, me_list);
1606 }
1607
1608 while (me != NULL && me->me_type != ME_MOUNT)
1609 me = TAILQ_NEXT(me, me_list);
1610
1611 return (me ? me->me_mount : NULL);
1612 }
1613