Home | History | Annotate | Line # | Download | only in kern
vfs_trans.c revision 1.65
      1 /*	$NetBSD: vfs_trans.c,v 1.65 2022/07/08 07:42:05 hannken Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 2007, 2020 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Juergen Hannken-Illjes.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     29  * POSSIBILITY OF SUCH DAMAGE.
     30  */
     31 
     32 #include <sys/cdefs.h>
     33 __KERNEL_RCSID(0, "$NetBSD: vfs_trans.c,v 1.65 2022/07/08 07:42:05 hannken Exp $");
     34 
     35 /*
     36  * File system transaction operations.
     37  */
     38 
     39 #ifdef _KERNEL_OPT
     40 #include "opt_ddb.h"
     41 #endif
     42 
     43 #include <sys/param.h>
     44 #include <sys/systm.h>
     45 #include <sys/atomic.h>
     46 #include <sys/buf.h>
     47 #include <sys/kmem.h>
     48 #include <sys/mount.h>
     49 #include <sys/pserialize.h>
     50 #include <sys/vnode.h>
     51 #include <sys/fstrans.h>
     52 #include <sys/proc.h>
     53 #include <sys/pool.h>
     54 
     55 #include <miscfs/specfs/specdev.h>
     56 
     57 enum fstrans_lock_type {
     58 	FSTRANS_LAZY,			/* Granted while not suspended */
     59 	FSTRANS_SHARED			/* Granted while not suspending */
     60 };
     61 
     62 struct fscow_handler {
     63 	LIST_ENTRY(fscow_handler) ch_list;
     64 	int (*ch_func)(void *, struct buf *, bool);
     65 	void *ch_arg;
     66 };
     67 struct fstrans_lwp_info {
     68 	struct fstrans_lwp_info *fli_succ;
     69 	struct lwp *fli_self;
     70 	struct mount *fli_mount;
     71 	struct fstrans_lwp_info *fli_alias;
     72 	struct fstrans_mount_info *fli_mountinfo;
     73 	int fli_trans_cnt;
     74 	int fli_alias_cnt;
     75 	int fli_cow_cnt;
     76 	enum fstrans_lock_type fli_lock_type;
     77 	LIST_ENTRY(fstrans_lwp_info) fli_list;
     78 };
     79 struct fstrans_mount_info {
     80 	enum fstrans_state fmi_state;
     81 	unsigned int fmi_ref_cnt;
     82 	bool fmi_gone;
     83 	bool fmi_cow_change;
     84 	LIST_HEAD(, fscow_handler) fmi_cow_handler;
     85 	struct mount *fmi_mount;
     86 	struct lwp *fmi_owner;
     87 };
     88 
     89 static kmutex_t vfs_suspend_lock	/* Serialize suspensions. */
     90     __cacheline_aligned;
     91 static kmutex_t fstrans_lock		/* Fstrans big lock. */
     92     __cacheline_aligned;
     93 static kcondvar_t fstrans_state_cv;	/* Fstrans or cow state changed. */
     94 static kcondvar_t fstrans_count_cv;	/* Fstrans or cow count changed. */
     95 static pserialize_t fstrans_psz;	/* Pserialize state. */
     96 static LIST_HEAD(fstrans_lwp_head, fstrans_lwp_info) fstrans_fli_head;
     97 					/* List of all fstrans_lwp_info. */
     98 static pool_cache_t fstrans_lwp_cache;	/* Cache of fstrans_lwp_info. */
     99 
    100 static int fstrans_gone_count;		/* Number of fstrans_mount_info gone. */
    101 
    102 static void fstrans_mount_dtor(struct fstrans_mount_info *);
    103 static void fstrans_clear_lwp_info(void);
    104 static inline struct fstrans_lwp_info *
    105     fstrans_get_lwp_info(struct mount *, bool);
    106 static struct fstrans_lwp_info *fstrans_alloc_lwp_info(struct mount *);
    107 static int fstrans_lwp_pcc(void *, void *, int);
    108 static void fstrans_lwp_pcd(void *, void *);
    109 static inline int _fstrans_start(struct mount *, enum fstrans_lock_type, int);
    110 static bool grant_lock(const struct fstrans_mount_info *,
    111     const enum fstrans_lock_type);
    112 static bool state_change_done(const struct fstrans_mount_info *);
    113 static bool cow_state_change_done(const struct fstrans_mount_info *);
    114 static void cow_change_enter(struct fstrans_mount_info *);
    115 static void cow_change_done(struct fstrans_mount_info *);
    116 
    117 extern struct mount *dead_rootmount;
    118 
    119 #if defined(DIAGNOSTIC)
    120 
    121 struct fstrans_debug_mount {
    122 	struct mount *fdm_mount;
    123 	SLIST_ENTRY(fstrans_debug_mount) fdm_list;
    124 };
    125 
    126 static SLIST_HEAD(, fstrans_debug_mount) fstrans_debug_mount_head =
    127     SLIST_HEAD_INITIALIZER(fstrans_debug_mount_head);
    128 
    129 static void
    130 fstrans_debug_mount(struct mount *mp)
    131 {
    132 	struct fstrans_debug_mount *fdm, *new;
    133 
    134 	KASSERT(mutex_owned(&fstrans_lock));
    135 
    136 	mutex_exit(&fstrans_lock);
    137 	new = kmem_alloc(sizeof(*new), KM_SLEEP);
    138 	new->fdm_mount = mp;
    139 	mutex_enter(&fstrans_lock);
    140 
    141 	SLIST_FOREACH(fdm, &fstrans_debug_mount_head, fdm_list)
    142 		KASSERT(fdm->fdm_mount != mp);
    143 	SLIST_INSERT_HEAD(&fstrans_debug_mount_head, new, fdm_list);
    144 }
    145 
    146 static void
    147 fstrans_debug_unmount(struct mount *mp)
    148 {
    149 	struct fstrans_debug_mount *fdm;
    150 
    151 	KASSERT(mutex_owned(&fstrans_lock));
    152 
    153 	SLIST_FOREACH(fdm, &fstrans_debug_mount_head, fdm_list)
    154 		if (fdm->fdm_mount == mp)
    155 			break;
    156 	KASSERT(fdm != NULL);
    157 	SLIST_REMOVE(&fstrans_debug_mount_head, fdm,
    158 	    fstrans_debug_mount, fdm_list);
    159 	kmem_free(fdm, sizeof(*fdm));
    160 }
    161 
    162 static void
    163 fstrans_debug_validate_mount(struct mount *mp)
    164 {
    165 	struct fstrans_debug_mount *fdm;
    166 
    167 	KASSERT(mutex_owned(&fstrans_lock));
    168 
    169 	SLIST_FOREACH(fdm, &fstrans_debug_mount_head, fdm_list)
    170 		if (fdm->fdm_mount == mp)
    171 			break;
    172 	KASSERTMSG(fdm != NULL, "mount %p invalid", mp);
    173 }
    174 
    175 #else /* defined(DIAGNOSTIC) */
    176 
    177 #define fstrans_debug_mount(mp)
    178 #define fstrans_debug_unmount(mp)
    179 #define fstrans_debug_validate_mount(mp)
    180 
    181 #endif  /* defined(DIAGNOSTIC) */
    182 
    183 /*
    184  * Initialize.
    185  */
    186 void
    187 fstrans_init(void)
    188 {
    189 
    190 	mutex_init(&vfs_suspend_lock, MUTEX_DEFAULT, IPL_NONE);
    191 	mutex_init(&fstrans_lock, MUTEX_DEFAULT, IPL_NONE);
    192 	cv_init(&fstrans_state_cv, "fstchg");
    193 	cv_init(&fstrans_count_cv, "fstcnt");
    194 	fstrans_psz = pserialize_create();
    195 	LIST_INIT(&fstrans_fli_head);
    196 	fstrans_lwp_cache = pool_cache_init(sizeof(struct fstrans_lwp_info),
    197 	    coherency_unit, 0, 0, "fstlwp", NULL, IPL_NONE,
    198 	    fstrans_lwp_pcc, fstrans_lwp_pcd, NULL);
    199 	KASSERT(fstrans_lwp_cache != NULL);
    200 }
    201 
    202 /*
    203  * pool_cache constructor for fstrans_lwp_info.  Updating the global list
    204  * produces cache misses on MP.  Minimise by keeping free entries on list.
    205  */
    206 int
    207 fstrans_lwp_pcc(void *arg, void *obj, int flags)
    208 {
    209 	struct fstrans_lwp_info *fli = obj;
    210 
    211 	memset(fli, 0, sizeof(*fli));
    212 
    213 	mutex_enter(&fstrans_lock);
    214 	LIST_INSERT_HEAD(&fstrans_fli_head, fli, fli_list);
    215 	mutex_exit(&fstrans_lock);
    216 
    217 	return 0;
    218 }
    219 
    220 /*
    221  * pool_cache destructor
    222  */
    223 void
    224 fstrans_lwp_pcd(void *arg, void *obj)
    225 {
    226 	struct fstrans_lwp_info *fli = obj;
    227 
    228 	mutex_enter(&fstrans_lock);
    229 	LIST_REMOVE(fli, fli_list);
    230 	mutex_exit(&fstrans_lock);
    231 }
    232 
    233 /*
    234  * Deallocate lwp state.
    235  */
    236 void
    237 fstrans_lwp_dtor(lwp_t *l)
    238 {
    239 	struct fstrans_lwp_info *fli, *fli_next;
    240 
    241 	if (l->l_fstrans == NULL)
    242 		return;
    243 
    244 	mutex_enter(&fstrans_lock);
    245 	for (fli = l->l_fstrans; fli; fli = fli_next) {
    246 		KASSERT(fli->fli_trans_cnt == 0);
    247 		KASSERT(fli->fli_cow_cnt == 0);
    248 		KASSERT(fli->fli_self == l);
    249 		if (fli->fli_mount != NULL)
    250 			fstrans_mount_dtor(fli->fli_mountinfo);
    251 		fli_next = fli->fli_succ;
    252 		fli->fli_alias_cnt = 0;
    253 		fli->fli_mount = NULL;
    254 		fli->fli_alias = NULL;
    255 		fli->fli_mountinfo = NULL;
    256 		fli->fli_self = NULL;
    257 	}
    258 	mutex_exit(&fstrans_lock);
    259 
    260 	for (fli = l->l_fstrans; fli; fli = fli_next) {
    261 		fli_next = fli->fli_succ;
    262 		pool_cache_put(fstrans_lwp_cache, fli);
    263 	}
    264 	l->l_fstrans = NULL;
    265 }
    266 
    267 /*
    268  * Dereference mount state.
    269  */
    270 static void
    271 fstrans_mount_dtor(struct fstrans_mount_info *fmi)
    272 {
    273 
    274 	KASSERT(mutex_owned(&fstrans_lock));
    275 
    276 	KASSERT(fmi != NULL);
    277 	fmi->fmi_ref_cnt -= 1;
    278 	if (__predict_true(fmi->fmi_ref_cnt > 0)) {
    279 		return;
    280 	}
    281 
    282 	KASSERT(fmi->fmi_state == FSTRANS_NORMAL);
    283 	KASSERT(LIST_FIRST(&fmi->fmi_cow_handler) == NULL);
    284 	KASSERT(fmi->fmi_owner == NULL);
    285 
    286 	KASSERT(fstrans_gone_count > 0);
    287 	fstrans_gone_count -= 1;
    288 
    289 	kmem_free(fmi->fmi_mount, sizeof(*fmi->fmi_mount));
    290 	kmem_free(fmi, sizeof(*fmi));
    291 }
    292 
    293 /*
    294  * Allocate mount state.
    295  */
    296 int
    297 fstrans_mount(struct mount *mp)
    298 {
    299 	struct fstrans_mount_info *newfmi;
    300 
    301 	newfmi = kmem_alloc(sizeof(*newfmi), KM_SLEEP);
    302 	newfmi->fmi_state = FSTRANS_NORMAL;
    303 	newfmi->fmi_ref_cnt = 1;
    304 	newfmi->fmi_gone = false;
    305 	LIST_INIT(&newfmi->fmi_cow_handler);
    306 	newfmi->fmi_cow_change = false;
    307 	newfmi->fmi_mount = mp;
    308 	newfmi->fmi_owner = NULL;
    309 
    310 	mutex_enter(&fstrans_lock);
    311 	mp->mnt_transinfo = newfmi;
    312 	fstrans_debug_mount(mp);
    313 	mutex_exit(&fstrans_lock);
    314 
    315 	return 0;
    316 }
    317 
    318 /*
    319  * Deallocate mount state.
    320  */
    321 void
    322 fstrans_unmount(struct mount *mp)
    323 {
    324 	struct fstrans_mount_info *fmi = mp->mnt_transinfo;
    325 
    326 	KASSERT(fmi != NULL);
    327 
    328 	mutex_enter(&fstrans_lock);
    329 	fstrans_debug_unmount(mp);
    330 	fmi->fmi_gone = true;
    331 	mp->mnt_transinfo = NULL;
    332 	fstrans_gone_count += 1;
    333 	fstrans_mount_dtor(fmi);
    334 	mutex_exit(&fstrans_lock);
    335 }
    336 
    337 /*
    338  * Clear mount entries whose mount is gone.
    339  */
    340 static void
    341 fstrans_clear_lwp_info(void)
    342 {
    343 	struct fstrans_lwp_info **p, *fli, *tofree = NULL;
    344 
    345 	/*
    346 	 * Scan our list clearing entries whose mount is gone.
    347 	 */
    348 	mutex_enter(&fstrans_lock);
    349 	for (p = &curlwp->l_fstrans; *p; ) {
    350 		fli = *p;
    351 		if (fli->fli_mount != NULL &&
    352 		    fli->fli_mountinfo->fmi_gone &&
    353 		    fli->fli_trans_cnt == 0 &&
    354 		    fli->fli_cow_cnt == 0 &&
    355 		    fli->fli_alias_cnt == 0) {
    356 			*p = (*p)->fli_succ;
    357 			fstrans_mount_dtor(fli->fli_mountinfo);
    358 			if (fli->fli_alias) {
    359 				KASSERT(fli->fli_alias->fli_alias_cnt > 0);
    360 				fli->fli_alias->fli_alias_cnt--;
    361 			}
    362 			fli->fli_mount = NULL;
    363 			fli->fli_alias = NULL;
    364 			fli->fli_mountinfo = NULL;
    365 			fli->fli_self = NULL;
    366 			p = &curlwp->l_fstrans;
    367 			fli->fli_succ = tofree;
    368 			tofree = fli;
    369 		} else {
    370 			p = &(*p)->fli_succ;
    371 		}
    372 	}
    373 #ifdef DIAGNOSTIC
    374 	for (fli = curlwp->l_fstrans; fli; fli = fli->fli_succ)
    375 		if (fli->fli_alias != NULL)
    376 			KASSERT(fli->fli_alias->fli_self == curlwp);
    377 #endif /* DIAGNOSTIC */
    378 	mutex_exit(&fstrans_lock);
    379 
    380 	while (tofree != NULL) {
    381 		fli = tofree;
    382 		tofree = fli->fli_succ;
    383 		pool_cache_put(fstrans_lwp_cache, fli);
    384 	}
    385 }
    386 
    387 /*
    388  * Allocate and return per lwp info for this mount.
    389  */
    390 static struct fstrans_lwp_info *
    391 fstrans_alloc_lwp_info(struct mount *mp)
    392 {
    393 	struct fstrans_lwp_info *fli;
    394 	struct fstrans_mount_info *fmi;
    395 
    396 	for (fli = curlwp->l_fstrans; fli; fli = fli->fli_succ) {
    397 		if (fli->fli_mount == mp)
    398 			return fli;
    399 	}
    400 
    401 	/*
    402 	 * Allocate a new entry.
    403 	 */
    404 	fli = pool_cache_get(fstrans_lwp_cache, PR_WAITOK);
    405 	KASSERT(fli->fli_trans_cnt == 0);
    406 	KASSERT(fli->fli_cow_cnt == 0);
    407 	KASSERT(fli->fli_alias_cnt == 0);
    408 	KASSERT(fli->fli_mount == NULL);
    409 	KASSERT(fli->fli_alias == NULL);
    410 	KASSERT(fli->fli_mountinfo == NULL);
    411 	KASSERT(fli->fli_self == NULL);
    412 	fli->fli_succ = curlwp->l_fstrans;
    413 	curlwp->l_fstrans = fli;
    414 
    415 	/*
    416 	 * Attach the entry to the mount if its mnt_transinfo is valid.
    417 	 */
    418 
    419 	mutex_enter(&fstrans_lock);
    420 	fli->fli_self = curlwp;
    421 	fstrans_debug_validate_mount(mp);
    422 	fmi = mp->mnt_transinfo;
    423 	KASSERT(fmi != NULL);
    424 	fli->fli_mount = mp;
    425 	fli->fli_mountinfo = fmi;
    426 	fmi->fmi_ref_cnt += 1;
    427 	do {
    428 		mp = mp->mnt_lower;
    429 	} while (mp && mp->mnt_lower);
    430 	mutex_exit(&fstrans_lock);
    431 
    432 	if (mp) {
    433 		fli->fli_alias = fstrans_alloc_lwp_info(mp);
    434 		fli->fli_alias->fli_alias_cnt++;
    435 		fli = fli->fli_alias;
    436 	}
    437 
    438 	return fli;
    439 }
    440 
    441 /*
    442  * Retrieve the per lwp info for this mount allocating if necessary.
    443  */
    444 static inline struct fstrans_lwp_info *
    445 fstrans_get_lwp_info(struct mount *mp, bool do_alloc)
    446 {
    447 	struct fstrans_lwp_info *fli;
    448 
    449 	/*
    450 	 * Scan our list for a match.
    451 	 */
    452 	for (fli = curlwp->l_fstrans; fli; fli = fli->fli_succ) {
    453 		if (fli->fli_mount == mp) {
    454 			KASSERT((mp->mnt_lower == NULL) ==
    455 			    (fli->fli_alias == NULL));
    456 			if (fli->fli_alias != NULL)
    457 				fli = fli->fli_alias;
    458 			break;
    459 		}
    460 	}
    461 
    462 	if (do_alloc) {
    463 		if (__predict_false(fli == NULL))
    464 			fli = fstrans_alloc_lwp_info(mp);
    465 		KASSERT(fli != NULL);
    466 		KASSERT(!fli->fli_mountinfo->fmi_gone);
    467 	} else {
    468 		KASSERT(fli != NULL);
    469 	}
    470 
    471 	return fli;
    472 }
    473 
    474 /*
    475  * Check if this lock type is granted at this state.
    476  */
    477 static bool
    478 grant_lock(const struct fstrans_mount_info *fmi,
    479     const enum fstrans_lock_type type)
    480 {
    481 
    482 	if (__predict_true(fmi->fmi_state == FSTRANS_NORMAL))
    483 		return true;
    484 	if (fmi->fmi_owner == curlwp)
    485 		return true;
    486 	if  (fmi->fmi_state == FSTRANS_SUSPENDING && type == FSTRANS_LAZY)
    487 		return true;
    488 
    489 	return false;
    490 }
    491 
    492 /*
    493  * Start a transaction.  If this thread already has a transaction on this
    494  * file system increment the reference counter.
    495  */
    496 static inline int
    497 _fstrans_start(struct mount *mp, enum fstrans_lock_type lock_type, int wait)
    498 {
    499 	int s;
    500 	struct fstrans_lwp_info *fli;
    501 	struct fstrans_mount_info *fmi;
    502 
    503 #ifndef FSTRANS_DEAD_ENABLED
    504 	if (mp == dead_rootmount)
    505 		return 0;
    506 #endif
    507 
    508 	ASSERT_SLEEPABLE();
    509 
    510 	fli = fstrans_get_lwp_info(mp, true);
    511 	fmi = fli->fli_mountinfo;
    512 
    513 	if (fli->fli_trans_cnt > 0) {
    514 		fli->fli_trans_cnt += 1;
    515 
    516 		return 0;
    517 	}
    518 
    519 	s = pserialize_read_enter();
    520 	if (__predict_true(grant_lock(fmi, lock_type))) {
    521 		fli->fli_trans_cnt = 1;
    522 		fli->fli_lock_type = lock_type;
    523 		pserialize_read_exit(s);
    524 
    525 		return 0;
    526 	}
    527 	pserialize_read_exit(s);
    528 
    529 	if (! wait)
    530 		return EBUSY;
    531 
    532 	mutex_enter(&fstrans_lock);
    533 	while (! grant_lock(fmi, lock_type))
    534 		cv_wait(&fstrans_state_cv, &fstrans_lock);
    535 	fli->fli_trans_cnt = 1;
    536 	fli->fli_lock_type = lock_type;
    537 	mutex_exit(&fstrans_lock);
    538 
    539 	return 0;
    540 }
    541 
    542 void
    543 fstrans_start(struct mount *mp)
    544 {
    545 	int error __diagused;
    546 
    547 	error = _fstrans_start(mp, FSTRANS_SHARED, 1);
    548 	KASSERT(error == 0);
    549 }
    550 
    551 int
    552 fstrans_start_nowait(struct mount *mp)
    553 {
    554 
    555 	return _fstrans_start(mp, FSTRANS_SHARED, 0);
    556 }
    557 
    558 void
    559 fstrans_start_lazy(struct mount *mp)
    560 {
    561 	int error __diagused;
    562 
    563 	error = _fstrans_start(mp, FSTRANS_LAZY, 1);
    564 	KASSERT(error == 0);
    565 }
    566 
    567 /*
    568  * Finish a transaction.
    569  */
    570 void
    571 fstrans_done(struct mount *mp)
    572 {
    573 	int s;
    574 	struct fstrans_lwp_info *fli;
    575 	struct fstrans_mount_info *fmi;
    576 
    577 #ifndef FSTRANS_DEAD_ENABLED
    578 	if (mp == dead_rootmount)
    579 		return;
    580 #endif
    581 
    582 	fli = fstrans_get_lwp_info(mp, false);
    583 	fmi = fli->fli_mountinfo;
    584 	KASSERT(fli->fli_trans_cnt > 0);
    585 
    586 	if (fli->fli_trans_cnt > 1) {
    587 		fli->fli_trans_cnt -= 1;
    588 
    589 		return;
    590 	}
    591 
    592 	if (__predict_false(fstrans_gone_count > 0))
    593 		fstrans_clear_lwp_info();
    594 
    595 	s = pserialize_read_enter();
    596 	if (__predict_true(fmi->fmi_state == FSTRANS_NORMAL)) {
    597 		fli->fli_trans_cnt = 0;
    598 		pserialize_read_exit(s);
    599 
    600 		return;
    601 	}
    602 	pserialize_read_exit(s);
    603 
    604 	mutex_enter(&fstrans_lock);
    605 	fli->fli_trans_cnt = 0;
    606 	cv_signal(&fstrans_count_cv);
    607 	mutex_exit(&fstrans_lock);
    608 }
    609 
    610 /*
    611  * Check if we hold an lock.
    612  */
    613 int
    614 fstrans_held(struct mount *mp)
    615 {
    616 	struct fstrans_lwp_info *fli;
    617 	struct fstrans_mount_info *fmi;
    618 
    619 	KASSERT(mp != dead_rootmount);
    620 
    621 	fli = fstrans_get_lwp_info(mp, true);
    622 	fmi = fli->fli_mountinfo;
    623 
    624 	return (fli->fli_trans_cnt > 0 || fmi->fmi_owner == curlwp);
    625 }
    626 
    627 /*
    628  * Check if this thread has an exclusive lock.
    629  */
    630 int
    631 fstrans_is_owner(struct mount *mp)
    632 {
    633 	struct fstrans_lwp_info *fli;
    634 	struct fstrans_mount_info *fmi;
    635 
    636 	KASSERT(mp != dead_rootmount);
    637 
    638 	fli = fstrans_get_lwp_info(mp, true);
    639 	fmi = fli->fli_mountinfo;
    640 
    641 	return (fmi->fmi_owner == curlwp);
    642 }
    643 
    644 /*
    645  * True, if no thread is in a transaction not granted at the current state.
    646  */
    647 static bool
    648 state_change_done(const struct fstrans_mount_info *fmi)
    649 {
    650 	struct fstrans_lwp_info *fli;
    651 
    652 	KASSERT(mutex_owned(&fstrans_lock));
    653 
    654 	LIST_FOREACH(fli, &fstrans_fli_head, fli_list) {
    655 		if (fli->fli_mountinfo != fmi)
    656 			continue;
    657 		if (fli->fli_trans_cnt == 0)
    658 			continue;
    659 		if (fli->fli_self == curlwp)
    660 			continue;
    661 		if (grant_lock(fmi, fli->fli_lock_type))
    662 			continue;
    663 
    664 		return false;
    665 	}
    666 
    667 	return true;
    668 }
    669 
    670 /*
    671  * Set new file system state.
    672  */
    673 int
    674 fstrans_setstate(struct mount *mp, enum fstrans_state new_state)
    675 {
    676 	int error;
    677 	enum fstrans_state old_state;
    678 	struct fstrans_lwp_info *fli;
    679 	struct fstrans_mount_info *fmi;
    680 
    681 	KASSERT(mp != dead_rootmount);
    682 
    683 	fli = fstrans_get_lwp_info(mp, true);
    684 	fmi = fli->fli_mountinfo;
    685 	old_state = fmi->fmi_state;
    686 	if (old_state == new_state)
    687 		return 0;
    688 
    689 	mutex_enter(&fstrans_lock);
    690 	fmi->fmi_state = new_state;
    691 	pserialize_perform(fstrans_psz);
    692 
    693 	/*
    694 	 * All threads see the new state now.
    695 	 * Wait for transactions invalid at this state to leave.
    696 	 */
    697 	error = 0;
    698 	while (! state_change_done(fmi)) {
    699 		error = cv_wait_sig(&fstrans_count_cv, &fstrans_lock);
    700 		if (error) {
    701 			new_state = fmi->fmi_state = FSTRANS_NORMAL;
    702 			break;
    703 		}
    704 	}
    705 	if (old_state != new_state) {
    706 		if (old_state == FSTRANS_NORMAL) {
    707 			KASSERT(fmi->fmi_owner == NULL);
    708 			fmi->fmi_owner = curlwp;
    709 		}
    710 		if (new_state == FSTRANS_NORMAL) {
    711 			KASSERT(fmi->fmi_owner == curlwp);
    712 			fmi->fmi_owner = NULL;
    713 		}
    714 	}
    715 	cv_broadcast(&fstrans_state_cv);
    716 	mutex_exit(&fstrans_lock);
    717 
    718 	return error;
    719 }
    720 
    721 /*
    722  * Get current file system state.
    723  */
    724 enum fstrans_state
    725 fstrans_getstate(struct mount *mp)
    726 {
    727 	struct fstrans_lwp_info *fli;
    728 	struct fstrans_mount_info *fmi;
    729 
    730 	KASSERT(mp != dead_rootmount);
    731 
    732 	fli = fstrans_get_lwp_info(mp, true);
    733 	fmi = fli->fli_mountinfo;
    734 
    735 	return fmi->fmi_state;
    736 }
    737 
    738 /*
    739  * Request a filesystem to suspend all operations.
    740  */
    741 int
    742 vfs_suspend(struct mount *mp, int nowait)
    743 {
    744 	struct fstrans_lwp_info *fli;
    745 	int error;
    746 
    747 	if (mp == dead_rootmount)
    748 		return EOPNOTSUPP;
    749 
    750 	fli = fstrans_get_lwp_info(mp, true);
    751 
    752 	if (nowait) {
    753 		if (!mutex_tryenter(&vfs_suspend_lock))
    754 			return EWOULDBLOCK;
    755 	} else
    756 		mutex_enter(&vfs_suspend_lock);
    757 
    758 	if ((error = VFS_SUSPENDCTL(fli->fli_mount, SUSPEND_SUSPEND)) != 0) {
    759 		mutex_exit(&vfs_suspend_lock);
    760 		return error;
    761 	}
    762 
    763 	if ((mp->mnt_iflag & IMNT_GONE) != 0) {
    764 		vfs_resume(mp);
    765 		return ENOENT;
    766 	}
    767 
    768 	return 0;
    769 }
    770 
    771 /*
    772  * Request a filesystem to resume all operations.
    773  */
    774 void
    775 vfs_resume(struct mount *mp)
    776 {
    777 	struct fstrans_lwp_info *fli;
    778 
    779 	KASSERT(mp != dead_rootmount);
    780 
    781 	fli = fstrans_get_lwp_info(mp, false);
    782 	mp = fli->fli_mount;
    783 
    784 	VFS_SUSPENDCTL(mp, SUSPEND_RESUME);
    785 	mutex_exit(&vfs_suspend_lock);
    786 }
    787 
    788 
    789 /*
    790  * True, if no thread is running a cow handler.
    791  */
    792 static bool
    793 cow_state_change_done(const struct fstrans_mount_info *fmi)
    794 {
    795 	struct fstrans_lwp_info *fli;
    796 
    797 	KASSERT(mutex_owned(&fstrans_lock));
    798 	KASSERT(fmi->fmi_cow_change);
    799 
    800 	LIST_FOREACH(fli, &fstrans_fli_head, fli_list) {
    801 		if (fli->fli_mount != fmi->fmi_mount)
    802 			continue;
    803 		if (fli->fli_cow_cnt == 0)
    804 			continue;
    805 
    806 		return false;
    807 	}
    808 
    809 	return true;
    810 }
    811 
    812 /*
    813  * Prepare for changing this mounts cow list.
    814  * Returns with fstrans_lock locked.
    815  */
    816 static void
    817 cow_change_enter(struct fstrans_mount_info *fmi)
    818 {
    819 
    820 	mutex_enter(&fstrans_lock);
    821 
    822 	/*
    823 	 * Wait for other threads changing the list.
    824 	 */
    825 	while (fmi->fmi_cow_change)
    826 		cv_wait(&fstrans_state_cv, &fstrans_lock);
    827 
    828 	/*
    829 	 * Wait until all threads are aware of a state change.
    830 	 */
    831 	fmi->fmi_cow_change = true;
    832 	pserialize_perform(fstrans_psz);
    833 
    834 	while (! cow_state_change_done(fmi))
    835 		cv_wait(&fstrans_count_cv, &fstrans_lock);
    836 }
    837 
    838 /*
    839  * Done changing this mounts cow list.
    840  */
    841 static void
    842 cow_change_done(struct fstrans_mount_info *fmi)
    843 {
    844 
    845 	KASSERT(mutex_owned(&fstrans_lock));
    846 
    847 	fmi->fmi_cow_change = false;
    848 	pserialize_perform(fstrans_psz);
    849 
    850 	cv_broadcast(&fstrans_state_cv);
    851 
    852 	mutex_exit(&fstrans_lock);
    853 }
    854 
    855 /*
    856  * Add a handler to this mount.
    857  */
    858 int
    859 fscow_establish(struct mount *mp, int (*func)(void *, struct buf *, bool),
    860     void *arg)
    861 {
    862 	struct fstrans_mount_info *fmi;
    863 	struct fscow_handler *newch;
    864 
    865 	KASSERT(mp != dead_rootmount);
    866 
    867 	mutex_enter(&fstrans_lock);
    868 	fmi = mp->mnt_transinfo;
    869 	KASSERT(fmi != NULL);
    870 	fmi->fmi_ref_cnt += 1;
    871 	mutex_exit(&fstrans_lock);
    872 
    873 	newch = kmem_alloc(sizeof(*newch), KM_SLEEP);
    874 	newch->ch_func = func;
    875 	newch->ch_arg = arg;
    876 
    877 	cow_change_enter(fmi);
    878 	LIST_INSERT_HEAD(&fmi->fmi_cow_handler, newch, ch_list);
    879 	cow_change_done(fmi);
    880 
    881 	return 0;
    882 }
    883 
    884 /*
    885  * Remove a handler from this mount.
    886  */
    887 int
    888 fscow_disestablish(struct mount *mp, int (*func)(void *, struct buf *, bool),
    889     void *arg)
    890 {
    891 	struct fstrans_mount_info *fmi;
    892 	struct fscow_handler *hp = NULL;
    893 
    894 	KASSERT(mp != dead_rootmount);
    895 
    896 	fmi = mp->mnt_transinfo;
    897 	KASSERT(fmi != NULL);
    898 
    899 	cow_change_enter(fmi);
    900 	LIST_FOREACH(hp, &fmi->fmi_cow_handler, ch_list)
    901 		if (hp->ch_func == func && hp->ch_arg == arg)
    902 			break;
    903 	if (hp != NULL) {
    904 		LIST_REMOVE(hp, ch_list);
    905 		kmem_free(hp, sizeof(*hp));
    906 	}
    907 	fstrans_mount_dtor(fmi);
    908 	cow_change_done(fmi);
    909 
    910 	return hp ? 0 : EINVAL;
    911 }
    912 
    913 /*
    914  * Check for need to copy block that is about to be written.
    915  */
    916 int
    917 fscow_run(struct buf *bp, bool data_valid)
    918 {
    919 	int error, s;
    920 	struct mount *mp;
    921 	struct fstrans_lwp_info *fli;
    922 	struct fstrans_mount_info *fmi;
    923 	struct fscow_handler *hp;
    924 
    925 	/*
    926 	 * First check if we need run the copy-on-write handler.
    927 	 */
    928 	if ((bp->b_flags & B_COWDONE))
    929 		return 0;
    930 	if (bp->b_vp == NULL) {
    931 		bp->b_flags |= B_COWDONE;
    932 		return 0;
    933 	}
    934 	if (bp->b_vp->v_type == VBLK)
    935 		mp = spec_node_getmountedfs(bp->b_vp);
    936 	else
    937 		mp = bp->b_vp->v_mount;
    938 	if (mp == NULL || mp == dead_rootmount) {
    939 		bp->b_flags |= B_COWDONE;
    940 		return 0;
    941 	}
    942 
    943 	fli = fstrans_get_lwp_info(mp, true);
    944 	fmi = fli->fli_mountinfo;
    945 
    946 	/*
    947 	 * On non-recursed run check if other threads
    948 	 * want to change the list.
    949 	 */
    950 	if (fli->fli_cow_cnt == 0) {
    951 		s = pserialize_read_enter();
    952 		if (__predict_false(fmi->fmi_cow_change)) {
    953 			pserialize_read_exit(s);
    954 			mutex_enter(&fstrans_lock);
    955 			while (fmi->fmi_cow_change)
    956 				cv_wait(&fstrans_state_cv, &fstrans_lock);
    957 			fli->fli_cow_cnt = 1;
    958 			mutex_exit(&fstrans_lock);
    959 		} else {
    960 			fli->fli_cow_cnt = 1;
    961 			pserialize_read_exit(s);
    962 		}
    963 	} else
    964 		fli->fli_cow_cnt += 1;
    965 
    966 	/*
    967 	 * Run all copy-on-write handlers, stop on error.
    968 	 */
    969 	error = 0;
    970 	LIST_FOREACH(hp, &fmi->fmi_cow_handler, ch_list)
    971 		if ((error = (*hp->ch_func)(hp->ch_arg, bp, data_valid)) != 0)
    972 			break;
    973  	if (error == 0)
    974  		bp->b_flags |= B_COWDONE;
    975 
    976 	/*
    977 	 * Check if other threads want to change the list.
    978 	 */
    979 	if (fli->fli_cow_cnt > 1) {
    980 		fli->fli_cow_cnt -= 1;
    981 	} else {
    982 		s = pserialize_read_enter();
    983 		if (__predict_false(fmi->fmi_cow_change)) {
    984 			pserialize_read_exit(s);
    985 			mutex_enter(&fstrans_lock);
    986 			fli->fli_cow_cnt = 0;
    987 			cv_signal(&fstrans_count_cv);
    988 			mutex_exit(&fstrans_lock);
    989 		} else {
    990 			fli->fli_cow_cnt = 0;
    991 			pserialize_read_exit(s);
    992 		}
    993 	}
    994 
    995 	return error;
    996 }
    997 
    998 #if defined(DDB)
    999 void fstrans_dump(int);
   1000 
   1001 static void
   1002 fstrans_print_lwp(struct proc *p, struct lwp *l, int verbose)
   1003 {
   1004 	char prefix[9];
   1005 	struct fstrans_lwp_info *fli;
   1006 
   1007 	snprintf(prefix, sizeof(prefix), "%d.%d", p->p_pid, l->l_lid);
   1008 	LIST_FOREACH(fli, &fstrans_fli_head, fli_list) {
   1009 		if (fli->fli_self != l)
   1010 			continue;
   1011 		if (fli->fli_trans_cnt == 0 && fli->fli_cow_cnt == 0) {
   1012 			if (! verbose)
   1013 				continue;
   1014 		}
   1015 		printf("%-8s", prefix);
   1016 		if (verbose)
   1017 			printf(" @%p", fli);
   1018 		if (fli->fli_mount == dead_rootmount)
   1019 			printf(" <dead>");
   1020 		else if (fli->fli_mount != NULL)
   1021 			printf(" (%s)", fli->fli_mount->mnt_stat.f_mntonname);
   1022 		else
   1023 			printf(" NULL");
   1024 		if (fli->fli_alias != NULL) {
   1025 			struct mount *amp = fli->fli_alias->fli_mount;
   1026 
   1027 			printf(" alias");
   1028 			if (verbose)
   1029 				printf(" @%p", fli->fli_alias);
   1030 			if (amp == NULL)
   1031 				printf(" NULL");
   1032 			else
   1033 				printf(" (%s)", amp->mnt_stat.f_mntonname);
   1034 		}
   1035 		if (fli->fli_mountinfo && fli->fli_mountinfo->fmi_gone)
   1036 			printf(" gone");
   1037 		if (fli->fli_trans_cnt == 0) {
   1038 			printf(" -");
   1039 		} else {
   1040 			switch (fli->fli_lock_type) {
   1041 			case FSTRANS_LAZY:
   1042 				printf(" lazy");
   1043 				break;
   1044 			case FSTRANS_SHARED:
   1045 				printf(" shared");
   1046 				break;
   1047 			default:
   1048 				printf(" %#x", fli->fli_lock_type);
   1049 				break;
   1050 			}
   1051 		}
   1052 		printf(" %d cow %d alias %d\n",
   1053 		    fli->fli_trans_cnt, fli->fli_cow_cnt, fli->fli_alias_cnt);
   1054 		prefix[0] = '\0';
   1055 	}
   1056 }
   1057 
   1058 static void
   1059 fstrans_print_mount(struct mount *mp, int verbose)
   1060 {
   1061 	struct fstrans_mount_info *fmi;
   1062 
   1063 	fmi = mp->mnt_transinfo;
   1064 	if (!verbose && (fmi == NULL || fmi->fmi_state == FSTRANS_NORMAL))
   1065 		return;
   1066 
   1067 	printf("%-16s ", mp->mnt_stat.f_mntonname);
   1068 	if (fmi == NULL) {
   1069 		printf("(null)\n");
   1070 		return;
   1071 	}
   1072 	printf("owner %p ", fmi->fmi_owner);
   1073 	switch (fmi->fmi_state) {
   1074 	case FSTRANS_NORMAL:
   1075 		printf("state normal\n");
   1076 		break;
   1077 	case FSTRANS_SUSPENDING:
   1078 		printf("state suspending\n");
   1079 		break;
   1080 	case FSTRANS_SUSPENDED:
   1081 		printf("state suspended\n");
   1082 		break;
   1083 	default:
   1084 		printf("state %#x\n", fmi->fmi_state);
   1085 		break;
   1086 	}
   1087 }
   1088 
   1089 void
   1090 fstrans_dump(int full)
   1091 {
   1092 	const struct proclist_desc *pd;
   1093 	struct proc *p;
   1094 	struct lwp *l;
   1095 	struct mount *mp;
   1096 
   1097 	printf("Fstrans locks by lwp:\n");
   1098 	for (pd = proclists; pd->pd_list != NULL; pd++)
   1099 		PROCLIST_FOREACH(p, pd->pd_list)
   1100 			LIST_FOREACH(l, &p->p_lwps, l_sibling)
   1101 				fstrans_print_lwp(p, l, full == 1);
   1102 
   1103 	printf("Fstrans state by mount:\n");
   1104 	for (mp = _mountlist_next(NULL); mp; mp = _mountlist_next(mp))
   1105 		fstrans_print_mount(mp, full == 1);
   1106 }
   1107 #endif /* defined(DDB) */
   1108