vfs_trans.c revision 1.54 1 /* $NetBSD: vfs_trans.c,v 1.54 2019/02/20 10:09:45 hannken Exp $ */
2
3 /*-
4 * Copyright (c) 2007 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Juergen Hannken-Illjes.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 #include <sys/cdefs.h>
33 __KERNEL_RCSID(0, "$NetBSD: vfs_trans.c,v 1.54 2019/02/20 10:09:45 hannken Exp $");
34
35 /*
36 * File system transaction operations.
37 */
38
39 #ifdef _KERNEL_OPT
40 #include "opt_ddb.h"
41 #endif
42
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/atomic.h>
46 #include <sys/buf.h>
47 #include <sys/kmem.h>
48 #include <sys/mount.h>
49 #include <sys/pserialize.h>
50 #include <sys/vnode.h>
51 #include <sys/fstrans.h>
52 #include <sys/proc.h>
53
54 #include <miscfs/specfs/specdev.h>
55
56 enum fstrans_lock_type {
57 FSTRANS_LAZY, /* Granted while not suspended */
58 FSTRANS_SHARED, /* Granted while not suspending */
59 FSTRANS_EXCL /* Internal: exclusive lock */
60 };
61
62 struct fscow_handler {
63 LIST_ENTRY(fscow_handler) ch_list;
64 int (*ch_func)(void *, struct buf *, bool);
65 void *ch_arg;
66 };
67 struct fstrans_lwp_info {
68 struct fstrans_lwp_info *fli_succ;
69 struct lwp *fli_self;
70 struct mount *fli_mount;
71 struct mount *fli_alias;
72 struct fstrans_mount_info *fli_mountinfo;
73 int fli_trans_cnt;
74 int fli_cow_cnt;
75 enum fstrans_lock_type fli_lock_type;
76 LIST_ENTRY(fstrans_lwp_info) fli_list;
77 };
78 struct fstrans_mount_info {
79 enum fstrans_state fmi_state;
80 unsigned int fmi_ref_cnt;
81 bool fmi_gone;
82 bool fmi_cow_change;
83 LIST_HEAD(, fscow_handler) fmi_cow_handler;
84 struct mount *fmi_mount;
85 };
86
87 static specificdata_key_t lwp_data_key; /* Our specific data key. */
88 static kmutex_t vfs_suspend_lock; /* Serialize suspensions. */
89 static kmutex_t fstrans_lock; /* Fstrans big lock. */
90 static kmutex_t fstrans_mount_lock; /* Fstrans mount big lock. */
91 static kcondvar_t fstrans_state_cv; /* Fstrans or cow state changed. */
92 static kcondvar_t fstrans_count_cv; /* Fstrans or cow count changed. */
93 static pserialize_t fstrans_psz; /* Pserialize state. */
94 static LIST_HEAD(fstrans_lwp_head, fstrans_lwp_info) fstrans_fli_head;
95 /* List of all fstrans_lwp_info. */
96 static int fstrans_gone_count; /* Number of fstrans_mount_info gone. */
97
98 static void fstrans_lwp_dtor(void *);
99 static void fstrans_mount_dtor(struct fstrans_mount_info *);
100 static void fstrans_clear_lwp_info(void);
101 static inline struct fstrans_lwp_info *
102 fstrans_get_lwp_info(struct mount *, bool);
103 static struct fstrans_lwp_info *fstrans_alloc_lwp_info(struct mount *);
104 static inline int _fstrans_start(struct mount *, enum fstrans_lock_type, int);
105 static bool grant_lock(const enum fstrans_state, const enum fstrans_lock_type);
106 static bool state_change_done(const struct fstrans_mount_info *);
107 static bool cow_state_change_done(const struct fstrans_mount_info *);
108 static void cow_change_enter(struct fstrans_mount_info *);
109 static void cow_change_done(struct fstrans_mount_info *);
110
111 extern struct mount *dead_rootmount;
112
113 #if defined(DIAGNOSTIC)
114
115 struct fstrans_debug_mount {
116 struct mount *fdm_mount;
117 SLIST_ENTRY(fstrans_debug_mount) fdm_list;
118 };
119
120 static SLIST_HEAD(, fstrans_debug_mount) fstrans_debug_mount_head =
121 SLIST_HEAD_INITIALIZER(fstrans_debug_mount_head);
122
123 static void
124 fstrans_debug_mount(struct mount *mp)
125 {
126 struct fstrans_debug_mount *fdm, *new;
127
128 KASSERT(mutex_owned(&fstrans_mount_lock));
129
130 mutex_exit(&fstrans_mount_lock);
131 new = kmem_alloc(sizeof(*new), KM_SLEEP);
132 new->fdm_mount = mp;
133 mutex_enter(&fstrans_mount_lock);
134
135 SLIST_FOREACH(fdm, &fstrans_debug_mount_head, fdm_list)
136 KASSERT(fdm->fdm_mount != mp);
137 SLIST_INSERT_HEAD(&fstrans_debug_mount_head, new, fdm_list);
138 }
139
140 static void
141 fstrans_debug_unmount(struct mount *mp)
142 {
143 struct fstrans_debug_mount *fdm;
144
145 KASSERT(mutex_owned(&fstrans_mount_lock));
146
147 SLIST_FOREACH(fdm, &fstrans_debug_mount_head, fdm_list)
148 if (fdm->fdm_mount == mp)
149 break;
150 KASSERT(fdm != NULL);
151 SLIST_REMOVE(&fstrans_debug_mount_head, fdm,
152 fstrans_debug_mount, fdm_list);
153 kmem_free(fdm, sizeof(*fdm));
154 }
155
156 static void
157 fstrans_debug_validate_mount(struct mount *mp)
158 {
159 struct fstrans_debug_mount *fdm;
160
161 KASSERT(mutex_owned(&fstrans_mount_lock));
162
163 SLIST_FOREACH(fdm, &fstrans_debug_mount_head, fdm_list)
164 if (fdm->fdm_mount == mp)
165 break;
166 KASSERTMSG(fdm != NULL, "mount %p invalid", mp);
167 }
168
169 #else /* defined(DIAGNOSTIC) */
170
171 #define fstrans_debug_mount(mp)
172 #define fstrans_debug_unmount(mp)
173 #define fstrans_debug_validate_mount(mp)
174
175 #endif /* defined(DIAGNOSTIC) */
176
177 /*
178 * Initialize.
179 */
180 void
181 fstrans_init(void)
182 {
183 int error __diagused;
184
185 error = lwp_specific_key_create(&lwp_data_key, fstrans_lwp_dtor);
186 KASSERT(error == 0);
187
188 mutex_init(&vfs_suspend_lock, MUTEX_DEFAULT, IPL_NONE);
189 mutex_init(&fstrans_lock, MUTEX_DEFAULT, IPL_NONE);
190 mutex_init(&fstrans_mount_lock, MUTEX_DEFAULT, IPL_NONE);
191 cv_init(&fstrans_state_cv, "fstchg");
192 cv_init(&fstrans_count_cv, "fstcnt");
193 fstrans_psz = pserialize_create();
194 LIST_INIT(&fstrans_fli_head);
195 }
196
197 /*
198 * Deallocate lwp state.
199 */
200 static void
201 fstrans_lwp_dtor(void *arg)
202 {
203 struct fstrans_lwp_info *fli, *fli_next;
204
205 for (fli = arg; fli; fli = fli_next) {
206 KASSERT(fli->fli_trans_cnt == 0);
207 KASSERT(fli->fli_cow_cnt == 0);
208 if (fli->fli_mount != NULL)
209 fstrans_mount_dtor(fli->fli_mountinfo);
210 fli_next = fli->fli_succ;
211 fli->fli_mount = NULL;
212 fli->fli_alias = NULL;
213 fli->fli_mountinfo = NULL;
214 membar_sync();
215 fli->fli_self = NULL;
216 }
217 }
218
219 /*
220 * Dereference mount state.
221 */
222 static void
223 fstrans_mount_dtor(struct fstrans_mount_info *fmi)
224 {
225
226 mutex_enter(&fstrans_mount_lock);
227
228 KASSERT(fmi != NULL);
229 fmi->fmi_ref_cnt -= 1;
230 if (fmi->fmi_ref_cnt > 0) {
231 mutex_exit(&fstrans_mount_lock);
232 return;
233 }
234
235 KASSERT(fmi->fmi_state == FSTRANS_NORMAL);
236 KASSERT(LIST_FIRST(&fmi->fmi_cow_handler) == NULL);
237
238 KASSERT(fstrans_gone_count > 0);
239 fstrans_gone_count -= 1;
240
241 mutex_exit(&fstrans_mount_lock);
242
243 kmem_free(fmi->fmi_mount, sizeof(*fmi->fmi_mount));
244 kmem_free(fmi, sizeof(*fmi));
245 }
246
247 /*
248 * Allocate mount state.
249 */
250 int
251 fstrans_mount(struct mount *mp)
252 {
253 struct fstrans_mount_info *newfmi;
254
255 newfmi = kmem_alloc(sizeof(*newfmi), KM_SLEEP);
256 newfmi->fmi_state = FSTRANS_NORMAL;
257 newfmi->fmi_ref_cnt = 1;
258 newfmi->fmi_gone = false;
259 LIST_INIT(&newfmi->fmi_cow_handler);
260 newfmi->fmi_cow_change = false;
261 newfmi->fmi_mount = mp;
262
263 mutex_enter(&fstrans_mount_lock);
264 mp->mnt_transinfo = newfmi;
265 fstrans_debug_mount(mp);
266 mutex_exit(&fstrans_mount_lock);
267
268 return 0;
269 }
270
271 /*
272 * Deallocate mount state.
273 */
274 void
275 fstrans_unmount(struct mount *mp)
276 {
277 struct fstrans_mount_info *fmi = mp->mnt_transinfo;
278
279 KASSERT(fmi != NULL);
280
281 mutex_enter(&fstrans_mount_lock);
282 fstrans_debug_unmount(mp);
283 fmi->fmi_gone = true;
284 mp->mnt_transinfo = NULL;
285 fstrans_gone_count += 1;
286 mutex_exit(&fstrans_mount_lock);
287
288 fstrans_mount_dtor(fmi);
289 }
290
291 /*
292 * Clear mount entries whose mount is gone.
293 */
294 static void
295 fstrans_clear_lwp_info(void)
296 {
297 struct fstrans_lwp_info *head, **p, *fli;
298
299 /*
300 * Scan our list clearing entries whose mount is gone.
301 */
302 head = lwp_getspecific(lwp_data_key);
303 for (p = &head; *p; p = &(*p)->fli_succ) {
304 fli = *p;
305 if (fli->fli_mount != NULL &&
306 fli->fli_mountinfo->fmi_gone &&
307 fli->fli_trans_cnt == 0 && fli->fli_cow_cnt == 0) {
308 *p = (*p)->fli_succ;
309 fstrans_mount_dtor(fli->fli_mountinfo);
310 fli->fli_mount = NULL;
311 fli->fli_alias = NULL;
312 fli->fli_mountinfo = NULL;
313 membar_sync();
314 fli->fli_self = NULL;
315
316 if (*p == NULL)
317 break;
318 }
319 }
320 lwp_setspecific(lwp_data_key, head);
321 }
322
323 /*
324 * Allocate and return per lwp info for this mount.
325 */
326 static struct fstrans_lwp_info *
327 fstrans_alloc_lwp_info(struct mount *mp)
328 {
329 struct fstrans_lwp_info *fli, *fli2;
330 struct fstrans_mount_info *fmi;
331
332 for (fli = lwp_getspecific(lwp_data_key); fli; fli = fli->fli_succ) {
333 if (fli->fli_mount == mp)
334 return fli;
335 }
336
337 /*
338 * Try to reuse a cleared entry or allocate a new one.
339 */
340 mutex_enter(&fstrans_lock);
341 LIST_FOREACH(fli, &fstrans_fli_head, fli_list) {
342 membar_sync();
343 if (fli->fli_self == NULL) {
344 KASSERT(fli->fli_mount == NULL);
345 KASSERT(fli->fli_trans_cnt == 0);
346 KASSERT(fli->fli_cow_cnt == 0);
347 fli->fli_self = curlwp;
348 fli->fli_succ = lwp_getspecific(lwp_data_key);
349 lwp_setspecific(lwp_data_key, fli);
350 break;
351 }
352 }
353 mutex_exit(&fstrans_lock);
354
355 if (fli == NULL) {
356 fli = kmem_alloc(sizeof(*fli), KM_SLEEP);
357 mutex_enter(&fstrans_lock);
358 memset(fli, 0, sizeof(*fli));
359 fli->fli_self = curlwp;
360 LIST_INSERT_HEAD(&fstrans_fli_head, fli, fli_list);
361 mutex_exit(&fstrans_lock);
362 fli->fli_succ = lwp_getspecific(lwp_data_key);
363 lwp_setspecific(lwp_data_key, fli);
364 }
365
366 /*
367 * Attach the entry to the mount if its mnt_transinfo is valid.
368 */
369
370 mutex_enter(&fstrans_mount_lock);
371 fstrans_debug_validate_mount(mp);
372 fmi = mp->mnt_transinfo;
373 KASSERT(fmi != NULL);
374 fli->fli_mount = mp;
375 fli->fli_mountinfo = fmi;
376 fmi->fmi_ref_cnt += 1;
377 mp = mp->mnt_lower;
378 mutex_exit(&fstrans_mount_lock);
379
380 if (mp) {
381 fli2 = fstrans_alloc_lwp_info(mp);
382 fli->fli_alias = fli2->fli_mount;
383
384 fli = fli2;
385 }
386
387 return fli;
388 }
389
390 /*
391 * Retrieve the per lwp info for this mount allocating if necessary.
392 */
393 static inline struct fstrans_lwp_info *
394 fstrans_get_lwp_info(struct mount *mp, bool do_alloc)
395 {
396 struct fstrans_lwp_info *head, *fli, *fli2;
397
398 head = lwp_getspecific(lwp_data_key);
399
400 /*
401 * Scan our list for a match.
402 */
403 for (fli = head; fli; fli = fli->fli_succ) {
404 if (fli->fli_mount == mp) {
405 if (fli->fli_alias != NULL) {
406 for (fli2 = head; fli2; fli2 = fli2->fli_succ) {
407 if (fli2->fli_mount == fli->fli_alias)
408 break;
409 }
410 KASSERT(fli2 != NULL);
411 fli = fli2;
412 }
413 break;
414 }
415 }
416
417 if (do_alloc) {
418 if (__predict_false(fli == NULL))
419 fli = fstrans_alloc_lwp_info(mp);
420 KASSERT(fli != NULL && !fli->fli_mountinfo->fmi_gone);
421 } else {
422 KASSERT(fli != NULL);
423 }
424
425 return fli;
426 }
427
428 /*
429 * Check if this lock type is granted at this state.
430 */
431 static bool
432 grant_lock(const enum fstrans_state state, const enum fstrans_lock_type type)
433 {
434
435 if (__predict_true(state == FSTRANS_NORMAL))
436 return true;
437 if (type == FSTRANS_EXCL)
438 return true;
439 if (state == FSTRANS_SUSPENDING && type == FSTRANS_LAZY)
440 return true;
441
442 return false;
443 }
444
445 /*
446 * Start a transaction. If this thread already has a transaction on this
447 * file system increment the reference counter.
448 */
449 static inline int
450 _fstrans_start(struct mount *mp, enum fstrans_lock_type lock_type, int wait)
451 {
452 int s;
453 struct fstrans_lwp_info *fli;
454 struct fstrans_mount_info *fmi;
455
456 #ifndef FSTRANS_DEAD_ENABLED
457 if (mp == dead_rootmount)
458 return 0;
459 #endif
460
461 ASSERT_SLEEPABLE();
462
463 fli = fstrans_get_lwp_info(mp, true);
464 fmi = fli->fli_mountinfo;
465
466 if (fli->fli_trans_cnt > 0) {
467 KASSERT(lock_type != FSTRANS_EXCL);
468 fli->fli_trans_cnt += 1;
469
470 return 0;
471 }
472
473 s = pserialize_read_enter();
474 if (__predict_true(grant_lock(fmi->fmi_state, lock_type))) {
475 fli->fli_trans_cnt = 1;
476 fli->fli_lock_type = lock_type;
477 pserialize_read_exit(s);
478
479 return 0;
480 }
481 pserialize_read_exit(s);
482
483 if (! wait)
484 return EBUSY;
485
486 mutex_enter(&fstrans_lock);
487 while (! grant_lock(fmi->fmi_state, lock_type))
488 cv_wait(&fstrans_state_cv, &fstrans_lock);
489 fli->fli_trans_cnt = 1;
490 fli->fli_lock_type = lock_type;
491 mutex_exit(&fstrans_lock);
492
493 return 0;
494 }
495
496 void
497 fstrans_start(struct mount *mp)
498 {
499 int error __diagused;
500
501 error = _fstrans_start(mp, FSTRANS_SHARED, 1);
502 KASSERT(error == 0);
503 }
504
505 int
506 fstrans_start_nowait(struct mount *mp)
507 {
508
509 return _fstrans_start(mp, FSTRANS_SHARED, 0);
510 }
511
512 void
513 fstrans_start_lazy(struct mount *mp)
514 {
515 int error __diagused;
516
517 error = _fstrans_start(mp, FSTRANS_LAZY, 1);
518 KASSERT(error == 0);
519 }
520
521 /*
522 * Finish a transaction.
523 */
524 void
525 fstrans_done(struct mount *mp)
526 {
527 int s;
528 struct fstrans_lwp_info *fli;
529 struct fstrans_mount_info *fmi;
530
531 #ifndef FSTRANS_DEAD_ENABLED
532 if (mp == dead_rootmount)
533 return;
534 #endif
535
536 fli = fstrans_get_lwp_info(mp, false);
537 fmi = fli->fli_mountinfo;
538 KASSERT(fli->fli_trans_cnt > 0);
539
540 if (fli->fli_trans_cnt > 1) {
541 fli->fli_trans_cnt -= 1;
542
543 return;
544 }
545
546 s = pserialize_read_enter();
547 if (__predict_true(fmi->fmi_state == FSTRANS_NORMAL)) {
548 fli->fli_trans_cnt = 0;
549 pserialize_read_exit(s);
550
551 return;
552 }
553 pserialize_read_exit(s);
554
555 if (__predict_false(fstrans_gone_count > 0))
556 fstrans_clear_lwp_info();
557
558 mutex_enter(&fstrans_lock);
559 fli->fli_trans_cnt = 0;
560 cv_signal(&fstrans_count_cv);
561 mutex_exit(&fstrans_lock);
562 }
563
564 /*
565 * Check if this thread has an exclusive lock.
566 */
567 int
568 fstrans_is_owner(struct mount *mp)
569 {
570 struct fstrans_lwp_info *fli;
571
572 KASSERT(mp != dead_rootmount);
573
574 fli = fstrans_get_lwp_info(mp, true);
575
576 if (fli->fli_trans_cnt == 0)
577 return 0;
578
579 return (fli->fli_lock_type == FSTRANS_EXCL);
580 }
581
582 /*
583 * True, if no thread is in a transaction not granted at the current state.
584 */
585 static bool
586 state_change_done(const struct fstrans_mount_info *fmi)
587 {
588 struct fstrans_lwp_info *fli;
589
590 KASSERT(mutex_owned(&fstrans_lock));
591
592 LIST_FOREACH(fli, &fstrans_fli_head, fli_list) {
593 if (fli->fli_mountinfo != fmi)
594 continue;
595 if (fli->fli_trans_cnt == 0)
596 continue;
597 if (grant_lock(fmi->fmi_state, fli->fli_lock_type))
598 continue;
599
600 return false;
601 }
602
603 return true;
604 }
605
606 /*
607 * Set new file system state.
608 */
609 int
610 fstrans_setstate(struct mount *mp, enum fstrans_state new_state)
611 {
612 int error;
613 enum fstrans_state old_state;
614 struct fstrans_lwp_info *fli;
615 struct fstrans_mount_info *fmi;
616
617 KASSERT(mp != dead_rootmount);
618
619 fli = fstrans_get_lwp_info(mp, true);
620 fmi = fli->fli_mountinfo;
621 old_state = fmi->fmi_state;
622 if (old_state == new_state)
623 return 0;
624
625 mutex_enter(&fstrans_lock);
626 fmi->fmi_state = new_state;
627 pserialize_perform(fstrans_psz);
628
629 /*
630 * All threads see the new state now.
631 * Wait for transactions invalid at this state to leave.
632 */
633 error = 0;
634 while (! state_change_done(fmi)) {
635 error = cv_wait_sig(&fstrans_count_cv, &fstrans_lock);
636 if (error) {
637 new_state = fmi->fmi_state = FSTRANS_NORMAL;
638 break;
639 }
640 }
641 cv_broadcast(&fstrans_state_cv);
642 mutex_exit(&fstrans_lock);
643
644 if (old_state != new_state) {
645 if (old_state == FSTRANS_NORMAL)
646 _fstrans_start(mp, FSTRANS_EXCL, 1);
647 if (new_state == FSTRANS_NORMAL)
648 fstrans_done(mp);
649 }
650
651 return error;
652 }
653
654 /*
655 * Get current file system state.
656 */
657 enum fstrans_state
658 fstrans_getstate(struct mount *mp)
659 {
660 struct fstrans_lwp_info *fli;
661 struct fstrans_mount_info *fmi;
662
663 KASSERT(mp != dead_rootmount);
664
665 fli = fstrans_get_lwp_info(mp, true);
666 fmi = fli->fli_mountinfo;
667
668 return fmi->fmi_state;
669 }
670
671 /*
672 * Request a filesystem to suspend all operations.
673 */
674 int
675 vfs_suspend(struct mount *mp, int nowait)
676 {
677 struct fstrans_lwp_info *fli;
678 int error;
679
680 KASSERT(mp != dead_rootmount);
681
682 fli = fstrans_get_lwp_info(mp, true);
683 mp = fli->fli_mount;
684
685 if (nowait) {
686 if (!mutex_tryenter(&vfs_suspend_lock))
687 return EWOULDBLOCK;
688 } else
689 mutex_enter(&vfs_suspend_lock);
690
691 if ((error = VFS_SUSPENDCTL(mp, SUSPEND_SUSPEND)) != 0)
692 mutex_exit(&vfs_suspend_lock);
693
694 return error;
695 }
696
697 /*
698 * Request a filesystem to resume all operations.
699 */
700 void
701 vfs_resume(struct mount *mp)
702 {
703 struct fstrans_lwp_info *fli;
704
705 KASSERT(mp != dead_rootmount);
706
707 fli = fstrans_get_lwp_info(mp, false);
708 mp = fli->fli_mount;
709
710 VFS_SUSPENDCTL(mp, SUSPEND_RESUME);
711 mutex_exit(&vfs_suspend_lock);
712 }
713
714
715 /*
716 * True, if no thread is running a cow handler.
717 */
718 static bool
719 cow_state_change_done(const struct fstrans_mount_info *fmi)
720 {
721 struct fstrans_lwp_info *fli;
722
723 KASSERT(mutex_owned(&fstrans_lock));
724 KASSERT(fmi->fmi_cow_change);
725
726 LIST_FOREACH(fli, &fstrans_fli_head, fli_list) {
727 if (fli->fli_mount != fmi->fmi_mount)
728 continue;
729 if (fli->fli_cow_cnt == 0)
730 continue;
731
732 return false;
733 }
734
735 return true;
736 }
737
738 /*
739 * Prepare for changing this mounts cow list.
740 * Returns with fstrans_lock locked.
741 */
742 static void
743 cow_change_enter(struct fstrans_mount_info *fmi)
744 {
745
746 mutex_enter(&fstrans_lock);
747
748 /*
749 * Wait for other threads changing the list.
750 */
751 while (fmi->fmi_cow_change)
752 cv_wait(&fstrans_state_cv, &fstrans_lock);
753
754 /*
755 * Wait until all threads are aware of a state change.
756 */
757 fmi->fmi_cow_change = true;
758 pserialize_perform(fstrans_psz);
759
760 while (! cow_state_change_done(fmi))
761 cv_wait(&fstrans_count_cv, &fstrans_lock);
762 }
763
764 /*
765 * Done changing this mounts cow list.
766 */
767 static void
768 cow_change_done(struct fstrans_mount_info *fmi)
769 {
770
771 KASSERT(mutex_owned(&fstrans_lock));
772
773 fmi->fmi_cow_change = false;
774 pserialize_perform(fstrans_psz);
775
776 cv_broadcast(&fstrans_state_cv);
777
778 mutex_exit(&fstrans_lock);
779 }
780
781 /*
782 * Add a handler to this mount.
783 */
784 int
785 fscow_establish(struct mount *mp, int (*func)(void *, struct buf *, bool),
786 void *arg)
787 {
788 struct fstrans_mount_info *fmi;
789 struct fscow_handler *newch;
790
791 KASSERT(mp != dead_rootmount);
792
793 mutex_enter(&fstrans_mount_lock);
794 fmi = mp->mnt_transinfo;
795 KASSERT(fmi != NULL);
796 fmi->fmi_ref_cnt += 1;
797 mutex_exit(&fstrans_mount_lock);
798
799 newch = kmem_alloc(sizeof(*newch), KM_SLEEP);
800 newch->ch_func = func;
801 newch->ch_arg = arg;
802
803 cow_change_enter(fmi);
804 LIST_INSERT_HEAD(&fmi->fmi_cow_handler, newch, ch_list);
805 cow_change_done(fmi);
806
807 return 0;
808 }
809
810 /*
811 * Remove a handler from this mount.
812 */
813 int
814 fscow_disestablish(struct mount *mp, int (*func)(void *, struct buf *, bool),
815 void *arg)
816 {
817 struct fstrans_mount_info *fmi;
818 struct fscow_handler *hp = NULL;
819
820 KASSERT(mp != dead_rootmount);
821
822 fmi = mp->mnt_transinfo;
823 KASSERT(fmi != NULL);
824
825 cow_change_enter(fmi);
826 LIST_FOREACH(hp, &fmi->fmi_cow_handler, ch_list)
827 if (hp->ch_func == func && hp->ch_arg == arg)
828 break;
829 if (hp != NULL) {
830 LIST_REMOVE(hp, ch_list);
831 kmem_free(hp, sizeof(*hp));
832 }
833 cow_change_done(fmi);
834
835 fstrans_mount_dtor(fmi);
836
837 return hp ? 0 : EINVAL;
838 }
839
840 /*
841 * Check for need to copy block that is about to be written.
842 */
843 int
844 fscow_run(struct buf *bp, bool data_valid)
845 {
846 int error, s;
847 struct mount *mp;
848 struct fstrans_lwp_info *fli;
849 struct fstrans_mount_info *fmi;
850 struct fscow_handler *hp;
851
852 /*
853 * First check if we need run the copy-on-write handler.
854 */
855 if ((bp->b_flags & B_COWDONE))
856 return 0;
857 if (bp->b_vp == NULL) {
858 bp->b_flags |= B_COWDONE;
859 return 0;
860 }
861 if (bp->b_vp->v_type == VBLK)
862 mp = spec_node_getmountedfs(bp->b_vp);
863 else
864 mp = bp->b_vp->v_mount;
865 if (mp == NULL || mp == dead_rootmount) {
866 bp->b_flags |= B_COWDONE;
867 return 0;
868 }
869
870 fli = fstrans_get_lwp_info(mp, true);
871 fmi = fli->fli_mountinfo;
872
873 /*
874 * On non-recursed run check if other threads
875 * want to change the list.
876 */
877 if (fli->fli_cow_cnt == 0) {
878 s = pserialize_read_enter();
879 if (__predict_false(fmi->fmi_cow_change)) {
880 pserialize_read_exit(s);
881 mutex_enter(&fstrans_lock);
882 while (fmi->fmi_cow_change)
883 cv_wait(&fstrans_state_cv, &fstrans_lock);
884 fli->fli_cow_cnt = 1;
885 mutex_exit(&fstrans_lock);
886 } else {
887 fli->fli_cow_cnt = 1;
888 pserialize_read_exit(s);
889 }
890 } else
891 fli->fli_cow_cnt += 1;
892
893 /*
894 * Run all copy-on-write handlers, stop on error.
895 */
896 error = 0;
897 LIST_FOREACH(hp, &fmi->fmi_cow_handler, ch_list)
898 if ((error = (*hp->ch_func)(hp->ch_arg, bp, data_valid)) != 0)
899 break;
900 if (error == 0)
901 bp->b_flags |= B_COWDONE;
902
903 /*
904 * Check if other threads want to change the list.
905 */
906 if (fli->fli_cow_cnt > 1) {
907 fli->fli_cow_cnt -= 1;
908 } else {
909 s = pserialize_read_enter();
910 if (__predict_false(fmi->fmi_cow_change)) {
911 pserialize_read_exit(s);
912 mutex_enter(&fstrans_lock);
913 fli->fli_cow_cnt = 0;
914 cv_signal(&fstrans_count_cv);
915 mutex_exit(&fstrans_lock);
916 } else {
917 fli->fli_cow_cnt = 0;
918 pserialize_read_exit(s);
919 }
920 }
921
922 return error;
923 }
924
925 #if defined(DDB)
926 void fstrans_dump(int);
927
928 static void
929 fstrans_print_lwp(struct proc *p, struct lwp *l, int verbose)
930 {
931 char prefix[9];
932 struct fstrans_lwp_info *fli;
933
934 snprintf(prefix, sizeof(prefix), "%d.%d", p->p_pid, l->l_lid);
935 LIST_FOREACH(fli, &fstrans_fli_head, fli_list) {
936 if (fli->fli_self != l)
937 continue;
938 if (fli->fli_trans_cnt == 0 && fli->fli_cow_cnt == 0) {
939 if (! verbose)
940 continue;
941 }
942 printf("%-8s", prefix);
943 if (verbose)
944 printf(" @%p", fli);
945 if (fli->fli_mount == dead_rootmount)
946 printf(" <dead>");
947 else if (fli->fli_mount != NULL)
948 printf(" (%s)", fli->fli_mount->mnt_stat.f_mntonname);
949 else
950 printf(" NULL");
951 if (fli->fli_alias != NULL)
952 printf(" alias (%s)",
953 fli->fli_alias->mnt_stat.f_mntonname);
954 if (fli->fli_mountinfo && fli->fli_mountinfo->fmi_gone)
955 printf(" gone");
956 if (fli->fli_trans_cnt == 0) {
957 printf(" -");
958 } else {
959 switch (fli->fli_lock_type) {
960 case FSTRANS_LAZY:
961 printf(" lazy");
962 break;
963 case FSTRANS_SHARED:
964 printf(" shared");
965 break;
966 case FSTRANS_EXCL:
967 printf(" excl");
968 break;
969 default:
970 printf(" %#x", fli->fli_lock_type);
971 break;
972 }
973 }
974 printf(" %d cow %d\n", fli->fli_trans_cnt, fli->fli_cow_cnt);
975 prefix[0] = '\0';
976 }
977 }
978
979 static void
980 fstrans_print_mount(struct mount *mp, int verbose)
981 {
982 struct fstrans_mount_info *fmi;
983
984 fmi = mp->mnt_transinfo;
985 if (!verbose && (fmi == NULL || fmi->fmi_state == FSTRANS_NORMAL))
986 return;
987
988 printf("%-16s ", mp->mnt_stat.f_mntonname);
989 if (fmi == NULL) {
990 printf("(null)\n");
991 return;
992 }
993 switch (fmi->fmi_state) {
994 case FSTRANS_NORMAL:
995 printf("state normal\n");
996 break;
997 case FSTRANS_SUSPENDING:
998 printf("state suspending\n");
999 break;
1000 case FSTRANS_SUSPENDED:
1001 printf("state suspended\n");
1002 break;
1003 default:
1004 printf("state %#x\n", fmi->fmi_state);
1005 break;
1006 }
1007 }
1008
1009 void
1010 fstrans_dump(int full)
1011 {
1012 const struct proclist_desc *pd;
1013 struct proc *p;
1014 struct lwp *l;
1015 struct mount *mp;
1016
1017 printf("Fstrans locks by lwp:\n");
1018 for (pd = proclists; pd->pd_list != NULL; pd++)
1019 PROCLIST_FOREACH(p, pd->pd_list)
1020 LIST_FOREACH(l, &p->p_lwps, l_sibling)
1021 fstrans_print_lwp(p, l, full == 1);
1022
1023 printf("Fstrans state by mount:\n");
1024 for (mp = _mountlist_next(NULL); mp; mp = _mountlist_next(mp))
1025 fstrans_print_mount(mp, full == 1);
1026 }
1027 #endif /* defined(DDB) */
1028