vfs_trans.c revision 1.48.6.1 1 /* $NetBSD: vfs_trans.c,v 1.48.6.1 2019/06/10 22:09:04 christos Exp $ */
2
3 /*-
4 * Copyright (c) 2007 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Juergen Hannken-Illjes.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 #include <sys/cdefs.h>
33 __KERNEL_RCSID(0, "$NetBSD: vfs_trans.c,v 1.48.6.1 2019/06/10 22:09:04 christos Exp $");
34
35 /*
36 * File system transaction operations.
37 */
38
39 #ifdef _KERNEL_OPT
40 #include "opt_ddb.h"
41 #endif
42
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/atomic.h>
46 #include <sys/buf.h>
47 #include <sys/kmem.h>
48 #include <sys/mount.h>
49 #include <sys/pserialize.h>
50 #include <sys/vnode.h>
51 #include <sys/fstrans.h>
52 #include <sys/proc.h>
53
54 #include <miscfs/specfs/specdev.h>
55
56 enum fstrans_lock_type {
57 FSTRANS_LAZY, /* Granted while not suspended */
58 FSTRANS_SHARED, /* Granted while not suspending */
59 FSTRANS_EXCL /* Internal: exclusive lock */
60 };
61
62 struct fscow_handler {
63 LIST_ENTRY(fscow_handler) ch_list;
64 int (*ch_func)(void *, struct buf *, bool);
65 void *ch_arg;
66 };
67 struct fstrans_lwp_info {
68 struct fstrans_lwp_info *fli_succ;
69 struct lwp *fli_self;
70 struct mount *fli_mount;
71 struct fstrans_lwp_info *fli_alias;
72 struct fstrans_mount_info *fli_mountinfo;
73 int fli_trans_cnt;
74 int fli_alias_cnt;
75 int fli_cow_cnt;
76 enum fstrans_lock_type fli_lock_type;
77 LIST_ENTRY(fstrans_lwp_info) fli_list;
78 };
79 struct fstrans_mount_info {
80 enum fstrans_state fmi_state;
81 unsigned int fmi_ref_cnt;
82 bool fmi_gone;
83 bool fmi_cow_change;
84 LIST_HEAD(, fscow_handler) fmi_cow_handler;
85 struct mount *fmi_mount;
86 };
87
88 static kmutex_t vfs_suspend_lock; /* Serialize suspensions. */
89 static kmutex_t fstrans_lock; /* Fstrans big lock. */
90 static kmutex_t fstrans_mount_lock; /* Fstrans mount big lock. */
91 static kcondvar_t fstrans_state_cv; /* Fstrans or cow state changed. */
92 static kcondvar_t fstrans_count_cv; /* Fstrans or cow count changed. */
93 static pserialize_t fstrans_psz; /* Pserialize state. */
94 static LIST_HEAD(fstrans_lwp_head, fstrans_lwp_info) fstrans_fli_head;
95 /* List of all fstrans_lwp_info. */
96 static int fstrans_gone_count; /* Number of fstrans_mount_info gone. */
97
98 static void fstrans_mount_dtor(struct fstrans_mount_info *);
99 static void fstrans_clear_lwp_info(void);
100 static inline struct fstrans_lwp_info *
101 fstrans_get_lwp_info(struct mount *, bool);
102 static struct fstrans_lwp_info *fstrans_alloc_lwp_info(struct mount *);
103 static inline int _fstrans_start(struct mount *, enum fstrans_lock_type, int);
104 static bool grant_lock(const enum fstrans_state, const enum fstrans_lock_type);
105 static bool state_change_done(const struct fstrans_mount_info *);
106 static bool cow_state_change_done(const struct fstrans_mount_info *);
107 static void cow_change_enter(struct fstrans_mount_info *);
108 static void cow_change_done(struct fstrans_mount_info *);
109
110 extern struct mount *dead_rootmount;
111
112 #if defined(DIAGNOSTIC)
113
114 struct fstrans_debug_mount {
115 struct mount *fdm_mount;
116 SLIST_ENTRY(fstrans_debug_mount) fdm_list;
117 };
118
119 static SLIST_HEAD(, fstrans_debug_mount) fstrans_debug_mount_head =
120 SLIST_HEAD_INITIALIZER(fstrans_debug_mount_head);
121
122 static void
123 fstrans_debug_mount(struct mount *mp)
124 {
125 struct fstrans_debug_mount *fdm, *new;
126
127 KASSERT(mutex_owned(&fstrans_mount_lock));
128
129 mutex_exit(&fstrans_mount_lock);
130 new = kmem_alloc(sizeof(*new), KM_SLEEP);
131 new->fdm_mount = mp;
132 mutex_enter(&fstrans_mount_lock);
133
134 SLIST_FOREACH(fdm, &fstrans_debug_mount_head, fdm_list)
135 KASSERT(fdm->fdm_mount != mp);
136 SLIST_INSERT_HEAD(&fstrans_debug_mount_head, new, fdm_list);
137 }
138
139 static void
140 fstrans_debug_unmount(struct mount *mp)
141 {
142 struct fstrans_debug_mount *fdm;
143
144 KASSERT(mutex_owned(&fstrans_mount_lock));
145
146 SLIST_FOREACH(fdm, &fstrans_debug_mount_head, fdm_list)
147 if (fdm->fdm_mount == mp)
148 break;
149 KASSERT(fdm != NULL);
150 SLIST_REMOVE(&fstrans_debug_mount_head, fdm,
151 fstrans_debug_mount, fdm_list);
152 kmem_free(fdm, sizeof(*fdm));
153 }
154
155 static void
156 fstrans_debug_validate_mount(struct mount *mp)
157 {
158 struct fstrans_debug_mount *fdm;
159
160 KASSERT(mutex_owned(&fstrans_mount_lock));
161
162 SLIST_FOREACH(fdm, &fstrans_debug_mount_head, fdm_list)
163 if (fdm->fdm_mount == mp)
164 break;
165 KASSERTMSG(fdm != NULL, "mount %p invalid", mp);
166 }
167
168 #else /* defined(DIAGNOSTIC) */
169
170 #define fstrans_debug_mount(mp)
171 #define fstrans_debug_unmount(mp)
172 #define fstrans_debug_validate_mount(mp)
173
174 #endif /* defined(DIAGNOSTIC) */
175
176 /*
177 * Initialize.
178 */
179 void
180 fstrans_init(void)
181 {
182
183 mutex_init(&vfs_suspend_lock, MUTEX_DEFAULT, IPL_NONE);
184 mutex_init(&fstrans_lock, MUTEX_DEFAULT, IPL_NONE);
185 mutex_init(&fstrans_mount_lock, MUTEX_DEFAULT, IPL_NONE);
186 cv_init(&fstrans_state_cv, "fstchg");
187 cv_init(&fstrans_count_cv, "fstcnt");
188 fstrans_psz = pserialize_create();
189 LIST_INIT(&fstrans_fli_head);
190 }
191
192 /*
193 * Deallocate lwp state.
194 */
195 void
196 fstrans_lwp_dtor(lwp_t *l)
197 {
198 struct fstrans_lwp_info *fli, *fli_next;
199
200 for (fli = l->l_fstrans; fli; fli = fli_next) {
201 KASSERT(fli->fli_trans_cnt == 0);
202 KASSERT(fli->fli_cow_cnt == 0);
203 KASSERT(fli->fli_self == l);
204 if (fli->fli_mount != NULL)
205 fstrans_mount_dtor(fli->fli_mountinfo);
206 fli_next = fli->fli_succ;
207 fli->fli_alias_cnt = 0;
208 fli->fli_mount = NULL;
209 fli->fli_alias = NULL;
210 fli->fli_mountinfo = NULL;
211 membar_sync();
212 fli->fli_self = NULL;
213 }
214
215 l->l_fstrans = NULL;
216 }
217
218 /*
219 * Dereference mount state.
220 */
221 static void
222 fstrans_mount_dtor(struct fstrans_mount_info *fmi)
223 {
224
225 mutex_enter(&fstrans_mount_lock);
226
227 KASSERT(fmi != NULL);
228 fmi->fmi_ref_cnt -= 1;
229 if (fmi->fmi_ref_cnt > 0) {
230 mutex_exit(&fstrans_mount_lock);
231 return;
232 }
233
234 KASSERT(fmi->fmi_state == FSTRANS_NORMAL);
235 KASSERT(LIST_FIRST(&fmi->fmi_cow_handler) == NULL);
236
237 KASSERT(fstrans_gone_count > 0);
238 fstrans_gone_count -= 1;
239
240 mutex_exit(&fstrans_mount_lock);
241
242 kmem_free(fmi->fmi_mount, sizeof(*fmi->fmi_mount));
243 kmem_free(fmi, sizeof(*fmi));
244 }
245
246 /*
247 * Allocate mount state.
248 */
249 int
250 fstrans_mount(struct mount *mp)
251 {
252 struct fstrans_mount_info *newfmi;
253
254 newfmi = kmem_alloc(sizeof(*newfmi), KM_SLEEP);
255 newfmi->fmi_state = FSTRANS_NORMAL;
256 newfmi->fmi_ref_cnt = 1;
257 newfmi->fmi_gone = false;
258 LIST_INIT(&newfmi->fmi_cow_handler);
259 newfmi->fmi_cow_change = false;
260 newfmi->fmi_mount = mp;
261
262 mutex_enter(&fstrans_mount_lock);
263 mp->mnt_transinfo = newfmi;
264 fstrans_debug_mount(mp);
265 mutex_exit(&fstrans_mount_lock);
266
267 return 0;
268 }
269
270 /*
271 * Deallocate mount state.
272 */
273 void
274 fstrans_unmount(struct mount *mp)
275 {
276 struct fstrans_mount_info *fmi = mp->mnt_transinfo;
277
278 KASSERT(fmi != NULL);
279
280 mutex_enter(&fstrans_mount_lock);
281 fstrans_debug_unmount(mp);
282 fmi->fmi_gone = true;
283 mp->mnt_transinfo = NULL;
284 fstrans_gone_count += 1;
285 mutex_exit(&fstrans_mount_lock);
286
287 fstrans_mount_dtor(fmi);
288 }
289
290 /*
291 * Clear mount entries whose mount is gone.
292 */
293 static void
294 fstrans_clear_lwp_info(void)
295 {
296 struct fstrans_lwp_info **p, *fli;
297
298 /*
299 * Scan our list clearing entries whose mount is gone.
300 */
301 for (p = &curlwp->l_fstrans; *p; ) {
302 fli = *p;
303 if (fli->fli_mount != NULL &&
304 fli->fli_mountinfo->fmi_gone &&
305 fli->fli_trans_cnt == 0 &&
306 fli->fli_cow_cnt == 0 &&
307 fli->fli_alias_cnt == 0) {
308 *p = (*p)->fli_succ;
309 fstrans_mount_dtor(fli->fli_mountinfo);
310 if (fli->fli_alias) {
311 KASSERT(fli->fli_alias->fli_alias_cnt > 0);
312 fli->fli_alias->fli_alias_cnt--;
313 }
314 fli->fli_mount = NULL;
315 fli->fli_alias = NULL;
316 fli->fli_mountinfo = NULL;
317 membar_sync();
318 fli->fli_self = NULL;
319 p = &curlwp->l_fstrans;
320 } else {
321 p = &(*p)->fli_succ;
322 }
323 }
324 #ifdef DIAGNOSTIC
325 for (fli = curlwp->l_fstrans; fli; fli = fli->fli_succ)
326 if (fli->fli_alias != NULL)
327 KASSERT(fli->fli_alias->fli_self == curlwp);
328 #endif /* DIAGNOSTIC */
329 }
330
331 /*
332 * Allocate and return per lwp info for this mount.
333 */
334 static struct fstrans_lwp_info *
335 fstrans_alloc_lwp_info(struct mount *mp)
336 {
337 struct fstrans_lwp_info *fli;
338 struct fstrans_mount_info *fmi;
339
340 for (fli = curlwp->l_fstrans; fli; fli = fli->fli_succ) {
341 if (fli->fli_mount == mp)
342 return fli;
343 }
344
345 /*
346 * Try to reuse a cleared entry or allocate a new one.
347 */
348 mutex_enter(&fstrans_lock);
349 LIST_FOREACH(fli, &fstrans_fli_head, fli_list) {
350 membar_sync();
351 if (fli->fli_self == NULL) {
352 KASSERT(fli->fli_mount == NULL);
353 KASSERT(fli->fli_trans_cnt == 0);
354 KASSERT(fli->fli_cow_cnt == 0);
355 KASSERT(fli->fli_alias_cnt == 0);
356 fli->fli_self = curlwp;
357 fli->fli_succ = curlwp->l_fstrans;
358 curlwp->l_fstrans = fli;
359 break;
360 }
361 }
362 mutex_exit(&fstrans_lock);
363
364 if (fli == NULL) {
365 fli = kmem_alloc(sizeof(*fli), KM_SLEEP);
366 mutex_enter(&fstrans_lock);
367 memset(fli, 0, sizeof(*fli));
368 fli->fli_self = curlwp;
369 LIST_INSERT_HEAD(&fstrans_fli_head, fli, fli_list);
370 mutex_exit(&fstrans_lock);
371 fli->fli_succ = curlwp->l_fstrans;
372 curlwp->l_fstrans = fli;
373 }
374
375 /*
376 * Attach the entry to the mount if its mnt_transinfo is valid.
377 */
378
379 mutex_enter(&fstrans_mount_lock);
380 fstrans_debug_validate_mount(mp);
381 fmi = mp->mnt_transinfo;
382 KASSERT(fmi != NULL);
383 fli->fli_mount = mp;
384 fli->fli_mountinfo = fmi;
385 fmi->fmi_ref_cnt += 1;
386 do {
387 mp = mp->mnt_lower;
388 } while (mp && mp->mnt_lower);
389 mutex_exit(&fstrans_mount_lock);
390
391 if (mp) {
392 fli->fli_alias = fstrans_alloc_lwp_info(mp);
393 fli->fli_alias->fli_alias_cnt++;
394 fli = fli->fli_alias;
395 }
396
397 return fli;
398 }
399
400 /*
401 * Retrieve the per lwp info for this mount allocating if necessary.
402 */
403 static inline struct fstrans_lwp_info *
404 fstrans_get_lwp_info(struct mount *mp, bool do_alloc)
405 {
406 struct fstrans_lwp_info *fli;
407
408 /*
409 * Scan our list for a match.
410 */
411 for (fli = curlwp->l_fstrans; fli; fli = fli->fli_succ) {
412 if (fli->fli_mount == mp) {
413 KASSERT((mp->mnt_lower == NULL) ==
414 (fli->fli_alias == NULL));
415 if (fli->fli_alias != NULL)
416 fli = fli->fli_alias;
417 break;
418 }
419 }
420
421 if (do_alloc) {
422 if (__predict_false(fli == NULL))
423 fli = fstrans_alloc_lwp_info(mp);
424 KASSERT(fli != NULL && !fli->fli_mountinfo->fmi_gone);
425 } else {
426 KASSERT(fli != NULL);
427 }
428
429 return fli;
430 }
431
432 /*
433 * Check if this lock type is granted at this state.
434 */
435 static bool
436 grant_lock(const enum fstrans_state state, const enum fstrans_lock_type type)
437 {
438
439 if (__predict_true(state == FSTRANS_NORMAL))
440 return true;
441 if (type == FSTRANS_EXCL)
442 return true;
443 if (state == FSTRANS_SUSPENDING && type == FSTRANS_LAZY)
444 return true;
445
446 return false;
447 }
448
449 /*
450 * Start a transaction. If this thread already has a transaction on this
451 * file system increment the reference counter.
452 */
453 static inline int
454 _fstrans_start(struct mount *mp, enum fstrans_lock_type lock_type, int wait)
455 {
456 int s;
457 struct fstrans_lwp_info *fli;
458 struct fstrans_mount_info *fmi;
459
460 #ifndef FSTRANS_DEAD_ENABLED
461 if (mp == dead_rootmount)
462 return 0;
463 #endif
464
465 ASSERT_SLEEPABLE();
466
467 fli = fstrans_get_lwp_info(mp, true);
468 fmi = fli->fli_mountinfo;
469
470 if (fli->fli_trans_cnt > 0) {
471 KASSERT(lock_type != FSTRANS_EXCL);
472 fli->fli_trans_cnt += 1;
473
474 return 0;
475 }
476
477 s = pserialize_read_enter();
478 if (__predict_true(grant_lock(fmi->fmi_state, lock_type))) {
479 fli->fli_trans_cnt = 1;
480 fli->fli_lock_type = lock_type;
481 pserialize_read_exit(s);
482
483 return 0;
484 }
485 pserialize_read_exit(s);
486
487 if (! wait)
488 return EBUSY;
489
490 mutex_enter(&fstrans_lock);
491 while (! grant_lock(fmi->fmi_state, lock_type))
492 cv_wait(&fstrans_state_cv, &fstrans_lock);
493 fli->fli_trans_cnt = 1;
494 fli->fli_lock_type = lock_type;
495 mutex_exit(&fstrans_lock);
496
497 return 0;
498 }
499
500 void
501 fstrans_start(struct mount *mp)
502 {
503 int error __diagused;
504
505 error = _fstrans_start(mp, FSTRANS_SHARED, 1);
506 KASSERT(error == 0);
507 }
508
509 int
510 fstrans_start_nowait(struct mount *mp)
511 {
512
513 return _fstrans_start(mp, FSTRANS_SHARED, 0);
514 }
515
516 void
517 fstrans_start_lazy(struct mount *mp)
518 {
519 int error __diagused;
520
521 error = _fstrans_start(mp, FSTRANS_LAZY, 1);
522 KASSERT(error == 0);
523 }
524
525 /*
526 * Finish a transaction.
527 */
528 void
529 fstrans_done(struct mount *mp)
530 {
531 int s;
532 struct fstrans_lwp_info *fli;
533 struct fstrans_mount_info *fmi;
534
535 #ifndef FSTRANS_DEAD_ENABLED
536 if (mp == dead_rootmount)
537 return;
538 #endif
539
540 fli = fstrans_get_lwp_info(mp, false);
541 fmi = fli->fli_mountinfo;
542 KASSERT(fli->fli_trans_cnt > 0);
543
544 if (fli->fli_trans_cnt > 1) {
545 fli->fli_trans_cnt -= 1;
546
547 return;
548 }
549
550 if (__predict_false(fstrans_gone_count > 0))
551 fstrans_clear_lwp_info();
552
553 s = pserialize_read_enter();
554 if (__predict_true(fmi->fmi_state == FSTRANS_NORMAL)) {
555 fli->fli_trans_cnt = 0;
556 pserialize_read_exit(s);
557
558 return;
559 }
560 pserialize_read_exit(s);
561
562 mutex_enter(&fstrans_lock);
563 fli->fli_trans_cnt = 0;
564 cv_signal(&fstrans_count_cv);
565 mutex_exit(&fstrans_lock);
566 }
567
568 /*
569 * Check if this thread has an exclusive lock.
570 */
571 int
572 fstrans_is_owner(struct mount *mp)
573 {
574 struct fstrans_lwp_info *fli;
575
576 KASSERT(mp != dead_rootmount);
577
578 fli = fstrans_get_lwp_info(mp, true);
579
580 if (fli->fli_trans_cnt == 0)
581 return 0;
582
583 return (fli->fli_lock_type == FSTRANS_EXCL);
584 }
585
586 /*
587 * True, if no thread is in a transaction not granted at the current state.
588 */
589 static bool
590 state_change_done(const struct fstrans_mount_info *fmi)
591 {
592 struct fstrans_lwp_info *fli;
593
594 KASSERT(mutex_owned(&fstrans_lock));
595
596 LIST_FOREACH(fli, &fstrans_fli_head, fli_list) {
597 if (fli->fli_mountinfo != fmi)
598 continue;
599 if (fli->fli_trans_cnt == 0)
600 continue;
601 if (grant_lock(fmi->fmi_state, fli->fli_lock_type))
602 continue;
603
604 return false;
605 }
606
607 return true;
608 }
609
610 /*
611 * Set new file system state.
612 */
613 int
614 fstrans_setstate(struct mount *mp, enum fstrans_state new_state)
615 {
616 int error;
617 enum fstrans_state old_state;
618 struct fstrans_lwp_info *fli;
619 struct fstrans_mount_info *fmi;
620
621 KASSERT(mp != dead_rootmount);
622
623 fli = fstrans_get_lwp_info(mp, true);
624 fmi = fli->fli_mountinfo;
625 old_state = fmi->fmi_state;
626 if (old_state == new_state)
627 return 0;
628
629 mutex_enter(&fstrans_lock);
630 fmi->fmi_state = new_state;
631 pserialize_perform(fstrans_psz);
632
633 /*
634 * All threads see the new state now.
635 * Wait for transactions invalid at this state to leave.
636 */
637 error = 0;
638 while (! state_change_done(fmi)) {
639 error = cv_wait_sig(&fstrans_count_cv, &fstrans_lock);
640 if (error) {
641 new_state = fmi->fmi_state = FSTRANS_NORMAL;
642 break;
643 }
644 }
645 cv_broadcast(&fstrans_state_cv);
646 mutex_exit(&fstrans_lock);
647
648 if (old_state != new_state) {
649 if (old_state == FSTRANS_NORMAL)
650 _fstrans_start(mp, FSTRANS_EXCL, 1);
651 if (new_state == FSTRANS_NORMAL)
652 fstrans_done(mp);
653 }
654
655 return error;
656 }
657
658 /*
659 * Get current file system state.
660 */
661 enum fstrans_state
662 fstrans_getstate(struct mount *mp)
663 {
664 struct fstrans_lwp_info *fli;
665 struct fstrans_mount_info *fmi;
666
667 KASSERT(mp != dead_rootmount);
668
669 fli = fstrans_get_lwp_info(mp, true);
670 fmi = fli->fli_mountinfo;
671
672 return fmi->fmi_state;
673 }
674
675 /*
676 * Request a filesystem to suspend all operations.
677 */
678 int
679 vfs_suspend(struct mount *mp, int nowait)
680 {
681 struct fstrans_lwp_info *fli;
682 int error;
683
684 if (mp == dead_rootmount)
685 return EOPNOTSUPP;
686
687 fli = fstrans_get_lwp_info(mp, true);
688 mp = fli->fli_mount;
689
690 if (nowait) {
691 if (!mutex_tryenter(&vfs_suspend_lock))
692 return EWOULDBLOCK;
693 } else
694 mutex_enter(&vfs_suspend_lock);
695
696 if ((error = VFS_SUSPENDCTL(mp, SUSPEND_SUSPEND)) != 0)
697 mutex_exit(&vfs_suspend_lock);
698
699 return error;
700 }
701
702 /*
703 * Request a filesystem to resume all operations.
704 */
705 void
706 vfs_resume(struct mount *mp)
707 {
708 struct fstrans_lwp_info *fli;
709
710 KASSERT(mp != dead_rootmount);
711
712 fli = fstrans_get_lwp_info(mp, false);
713 mp = fli->fli_mount;
714
715 VFS_SUSPENDCTL(mp, SUSPEND_RESUME);
716 mutex_exit(&vfs_suspend_lock);
717 }
718
719
720 /*
721 * True, if no thread is running a cow handler.
722 */
723 static bool
724 cow_state_change_done(const struct fstrans_mount_info *fmi)
725 {
726 struct fstrans_lwp_info *fli;
727
728 KASSERT(mutex_owned(&fstrans_lock));
729 KASSERT(fmi->fmi_cow_change);
730
731 LIST_FOREACH(fli, &fstrans_fli_head, fli_list) {
732 if (fli->fli_mount != fmi->fmi_mount)
733 continue;
734 if (fli->fli_cow_cnt == 0)
735 continue;
736
737 return false;
738 }
739
740 return true;
741 }
742
743 /*
744 * Prepare for changing this mounts cow list.
745 * Returns with fstrans_lock locked.
746 */
747 static void
748 cow_change_enter(struct fstrans_mount_info *fmi)
749 {
750
751 mutex_enter(&fstrans_lock);
752
753 /*
754 * Wait for other threads changing the list.
755 */
756 while (fmi->fmi_cow_change)
757 cv_wait(&fstrans_state_cv, &fstrans_lock);
758
759 /*
760 * Wait until all threads are aware of a state change.
761 */
762 fmi->fmi_cow_change = true;
763 pserialize_perform(fstrans_psz);
764
765 while (! cow_state_change_done(fmi))
766 cv_wait(&fstrans_count_cv, &fstrans_lock);
767 }
768
769 /*
770 * Done changing this mounts cow list.
771 */
772 static void
773 cow_change_done(struct fstrans_mount_info *fmi)
774 {
775
776 KASSERT(mutex_owned(&fstrans_lock));
777
778 fmi->fmi_cow_change = false;
779 pserialize_perform(fstrans_psz);
780
781 cv_broadcast(&fstrans_state_cv);
782
783 mutex_exit(&fstrans_lock);
784 }
785
786 /*
787 * Add a handler to this mount.
788 */
789 int
790 fscow_establish(struct mount *mp, int (*func)(void *, struct buf *, bool),
791 void *arg)
792 {
793 struct fstrans_mount_info *fmi;
794 struct fscow_handler *newch;
795
796 KASSERT(mp != dead_rootmount);
797
798 mutex_enter(&fstrans_mount_lock);
799 fmi = mp->mnt_transinfo;
800 KASSERT(fmi != NULL);
801 fmi->fmi_ref_cnt += 1;
802 mutex_exit(&fstrans_mount_lock);
803
804 newch = kmem_alloc(sizeof(*newch), KM_SLEEP);
805 newch->ch_func = func;
806 newch->ch_arg = arg;
807
808 cow_change_enter(fmi);
809 LIST_INSERT_HEAD(&fmi->fmi_cow_handler, newch, ch_list);
810 cow_change_done(fmi);
811
812 return 0;
813 }
814
815 /*
816 * Remove a handler from this mount.
817 */
818 int
819 fscow_disestablish(struct mount *mp, int (*func)(void *, struct buf *, bool),
820 void *arg)
821 {
822 struct fstrans_mount_info *fmi;
823 struct fscow_handler *hp = NULL;
824
825 KASSERT(mp != dead_rootmount);
826
827 fmi = mp->mnt_transinfo;
828 KASSERT(fmi != NULL);
829
830 cow_change_enter(fmi);
831 LIST_FOREACH(hp, &fmi->fmi_cow_handler, ch_list)
832 if (hp->ch_func == func && hp->ch_arg == arg)
833 break;
834 if (hp != NULL) {
835 LIST_REMOVE(hp, ch_list);
836 kmem_free(hp, sizeof(*hp));
837 }
838 cow_change_done(fmi);
839
840 fstrans_mount_dtor(fmi);
841
842 return hp ? 0 : EINVAL;
843 }
844
845 /*
846 * Check for need to copy block that is about to be written.
847 */
848 int
849 fscow_run(struct buf *bp, bool data_valid)
850 {
851 int error, s;
852 struct mount *mp;
853 struct fstrans_lwp_info *fli;
854 struct fstrans_mount_info *fmi;
855 struct fscow_handler *hp;
856
857 /*
858 * First check if we need run the copy-on-write handler.
859 */
860 if ((bp->b_flags & B_COWDONE))
861 return 0;
862 if (bp->b_vp == NULL) {
863 bp->b_flags |= B_COWDONE;
864 return 0;
865 }
866 if (bp->b_vp->v_type == VBLK)
867 mp = spec_node_getmountedfs(bp->b_vp);
868 else
869 mp = bp->b_vp->v_mount;
870 if (mp == NULL || mp == dead_rootmount) {
871 bp->b_flags |= B_COWDONE;
872 return 0;
873 }
874
875 fli = fstrans_get_lwp_info(mp, true);
876 fmi = fli->fli_mountinfo;
877
878 /*
879 * On non-recursed run check if other threads
880 * want to change the list.
881 */
882 if (fli->fli_cow_cnt == 0) {
883 s = pserialize_read_enter();
884 if (__predict_false(fmi->fmi_cow_change)) {
885 pserialize_read_exit(s);
886 mutex_enter(&fstrans_lock);
887 while (fmi->fmi_cow_change)
888 cv_wait(&fstrans_state_cv, &fstrans_lock);
889 fli->fli_cow_cnt = 1;
890 mutex_exit(&fstrans_lock);
891 } else {
892 fli->fli_cow_cnt = 1;
893 pserialize_read_exit(s);
894 }
895 } else
896 fli->fli_cow_cnt += 1;
897
898 /*
899 * Run all copy-on-write handlers, stop on error.
900 */
901 error = 0;
902 LIST_FOREACH(hp, &fmi->fmi_cow_handler, ch_list)
903 if ((error = (*hp->ch_func)(hp->ch_arg, bp, data_valid)) != 0)
904 break;
905 if (error == 0)
906 bp->b_flags |= B_COWDONE;
907
908 /*
909 * Check if other threads want to change the list.
910 */
911 if (fli->fli_cow_cnt > 1) {
912 fli->fli_cow_cnt -= 1;
913 } else {
914 s = pserialize_read_enter();
915 if (__predict_false(fmi->fmi_cow_change)) {
916 pserialize_read_exit(s);
917 mutex_enter(&fstrans_lock);
918 fli->fli_cow_cnt = 0;
919 cv_signal(&fstrans_count_cv);
920 mutex_exit(&fstrans_lock);
921 } else {
922 fli->fli_cow_cnt = 0;
923 pserialize_read_exit(s);
924 }
925 }
926
927 return error;
928 }
929
930 #if defined(DDB)
931 void fstrans_dump(int);
932
933 static void
934 fstrans_print_lwp(struct proc *p, struct lwp *l, int verbose)
935 {
936 char prefix[9];
937 struct fstrans_lwp_info *fli;
938
939 snprintf(prefix, sizeof(prefix), "%d.%d", p->p_pid, l->l_lid);
940 LIST_FOREACH(fli, &fstrans_fli_head, fli_list) {
941 if (fli->fli_self != l)
942 continue;
943 if (fli->fli_trans_cnt == 0 && fli->fli_cow_cnt == 0) {
944 if (! verbose)
945 continue;
946 }
947 printf("%-8s", prefix);
948 if (verbose)
949 printf(" @%p", fli);
950 if (fli->fli_mount == dead_rootmount)
951 printf(" <dead>");
952 else if (fli->fli_mount != NULL)
953 printf(" (%s)", fli->fli_mount->mnt_stat.f_mntonname);
954 else
955 printf(" NULL");
956 if (fli->fli_alias != NULL) {
957 struct mount *amp = fli->fli_alias->fli_mount;
958
959 printf(" alias");
960 if (verbose)
961 printf(" @%p", fli->fli_alias);
962 if (amp == NULL)
963 printf(" NULL");
964 else
965 printf(" (%s)", amp->mnt_stat.f_mntonname);
966 }
967 if (fli->fli_mountinfo && fli->fli_mountinfo->fmi_gone)
968 printf(" gone");
969 if (fli->fli_trans_cnt == 0) {
970 printf(" -");
971 } else {
972 switch (fli->fli_lock_type) {
973 case FSTRANS_LAZY:
974 printf(" lazy");
975 break;
976 case FSTRANS_SHARED:
977 printf(" shared");
978 break;
979 case FSTRANS_EXCL:
980 printf(" excl");
981 break;
982 default:
983 printf(" %#x", fli->fli_lock_type);
984 break;
985 }
986 }
987 printf(" %d cow %d alias %d\n",
988 fli->fli_trans_cnt, fli->fli_cow_cnt, fli->fli_alias_cnt);
989 prefix[0] = '\0';
990 }
991 }
992
993 static void
994 fstrans_print_mount(struct mount *mp, int verbose)
995 {
996 struct fstrans_mount_info *fmi;
997
998 fmi = mp->mnt_transinfo;
999 if (!verbose && (fmi == NULL || fmi->fmi_state == FSTRANS_NORMAL))
1000 return;
1001
1002 printf("%-16s ", mp->mnt_stat.f_mntonname);
1003 if (fmi == NULL) {
1004 printf("(null)\n");
1005 return;
1006 }
1007 switch (fmi->fmi_state) {
1008 case FSTRANS_NORMAL:
1009 printf("state normal\n");
1010 break;
1011 case FSTRANS_SUSPENDING:
1012 printf("state suspending\n");
1013 break;
1014 case FSTRANS_SUSPENDED:
1015 printf("state suspended\n");
1016 break;
1017 default:
1018 printf("state %#x\n", fmi->fmi_state);
1019 break;
1020 }
1021 }
1022
1023 void
1024 fstrans_dump(int full)
1025 {
1026 const struct proclist_desc *pd;
1027 struct proc *p;
1028 struct lwp *l;
1029 struct mount *mp;
1030
1031 printf("Fstrans locks by lwp:\n");
1032 for (pd = proclists; pd->pd_list != NULL; pd++)
1033 PROCLIST_FOREACH(p, pd->pd_list)
1034 LIST_FOREACH(l, &p->p_lwps, l_sibling)
1035 fstrans_print_lwp(p, l, full == 1);
1036
1037 printf("Fstrans state by mount:\n");
1038 for (mp = _mountlist_next(NULL); mp; mp = _mountlist_next(mp))
1039 fstrans_print_mount(mp, full == 1);
1040 }
1041 #endif /* defined(DDB) */
1042