vfs_trans.c revision 1.55 1 /* $NetBSD: vfs_trans.c,v 1.55 2019/02/21 08:52:53 hannken Exp $ */
2
3 /*-
4 * Copyright (c) 2007 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Juergen Hannken-Illjes.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 #include <sys/cdefs.h>
33 __KERNEL_RCSID(0, "$NetBSD: vfs_trans.c,v 1.55 2019/02/21 08:52:53 hannken Exp $");
34
35 /*
36 * File system transaction operations.
37 */
38
39 #ifdef _KERNEL_OPT
40 #include "opt_ddb.h"
41 #endif
42
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/atomic.h>
46 #include <sys/buf.h>
47 #include <sys/kmem.h>
48 #include <sys/mount.h>
49 #include <sys/pserialize.h>
50 #include <sys/vnode.h>
51 #include <sys/fstrans.h>
52 #include <sys/proc.h>
53
54 #include <miscfs/specfs/specdev.h>
55
56 enum fstrans_lock_type {
57 FSTRANS_LAZY, /* Granted while not suspended */
58 FSTRANS_SHARED, /* Granted while not suspending */
59 FSTRANS_EXCL /* Internal: exclusive lock */
60 };
61
62 struct fscow_handler {
63 LIST_ENTRY(fscow_handler) ch_list;
64 int (*ch_func)(void *, struct buf *, bool);
65 void *ch_arg;
66 };
67 struct fstrans_lwp_info {
68 struct fstrans_lwp_info *fli_succ;
69 struct lwp *fli_self;
70 struct mount *fli_mount;
71 struct mount *fli_alias;
72 struct fstrans_mount_info *fli_mountinfo;
73 int fli_trans_cnt;
74 int fli_cow_cnt;
75 enum fstrans_lock_type fli_lock_type;
76 LIST_ENTRY(fstrans_lwp_info) fli_list;
77 };
78 struct fstrans_mount_info {
79 enum fstrans_state fmi_state;
80 unsigned int fmi_ref_cnt;
81 bool fmi_gone;
82 bool fmi_cow_change;
83 LIST_HEAD(, fscow_handler) fmi_cow_handler;
84 struct mount *fmi_mount;
85 };
86
87 static specificdata_key_t lwp_data_key; /* Our specific data key. */
88 static kmutex_t vfs_suspend_lock; /* Serialize suspensions. */
89 static kmutex_t fstrans_lock; /* Fstrans big lock. */
90 static kmutex_t fstrans_mount_lock; /* Fstrans mount big lock. */
91 static kcondvar_t fstrans_state_cv; /* Fstrans or cow state changed. */
92 static kcondvar_t fstrans_count_cv; /* Fstrans or cow count changed. */
93 static pserialize_t fstrans_psz; /* Pserialize state. */
94 static LIST_HEAD(fstrans_lwp_head, fstrans_lwp_info) fstrans_fli_head;
95 /* List of all fstrans_lwp_info. */
96 static int fstrans_gone_count; /* Number of fstrans_mount_info gone. */
97
98 static void fstrans_lwp_dtor(void *);
99 static void fstrans_mount_dtor(struct fstrans_mount_info *);
100 static void fstrans_clear_lwp_info(void);
101 static inline struct fstrans_lwp_info *
102 fstrans_get_lwp_info(struct mount *, bool);
103 static struct fstrans_lwp_info *fstrans_alloc_lwp_info(struct mount *);
104 static inline int _fstrans_start(struct mount *, enum fstrans_lock_type, int);
105 static bool grant_lock(const enum fstrans_state, const enum fstrans_lock_type);
106 static bool state_change_done(const struct fstrans_mount_info *);
107 static bool cow_state_change_done(const struct fstrans_mount_info *);
108 static void cow_change_enter(struct fstrans_mount_info *);
109 static void cow_change_done(struct fstrans_mount_info *);
110
111 extern struct mount *dead_rootmount;
112
113 #if defined(DIAGNOSTIC)
114
115 struct fstrans_debug_mount {
116 struct mount *fdm_mount;
117 SLIST_ENTRY(fstrans_debug_mount) fdm_list;
118 };
119
120 static SLIST_HEAD(, fstrans_debug_mount) fstrans_debug_mount_head =
121 SLIST_HEAD_INITIALIZER(fstrans_debug_mount_head);
122
123 static void
124 fstrans_debug_mount(struct mount *mp)
125 {
126 struct fstrans_debug_mount *fdm, *new;
127
128 KASSERT(mutex_owned(&fstrans_mount_lock));
129
130 mutex_exit(&fstrans_mount_lock);
131 new = kmem_alloc(sizeof(*new), KM_SLEEP);
132 new->fdm_mount = mp;
133 mutex_enter(&fstrans_mount_lock);
134
135 SLIST_FOREACH(fdm, &fstrans_debug_mount_head, fdm_list)
136 KASSERT(fdm->fdm_mount != mp);
137 SLIST_INSERT_HEAD(&fstrans_debug_mount_head, new, fdm_list);
138 }
139
140 static void
141 fstrans_debug_unmount(struct mount *mp)
142 {
143 struct fstrans_debug_mount *fdm;
144
145 KASSERT(mutex_owned(&fstrans_mount_lock));
146
147 SLIST_FOREACH(fdm, &fstrans_debug_mount_head, fdm_list)
148 if (fdm->fdm_mount == mp)
149 break;
150 KASSERT(fdm != NULL);
151 SLIST_REMOVE(&fstrans_debug_mount_head, fdm,
152 fstrans_debug_mount, fdm_list);
153 kmem_free(fdm, sizeof(*fdm));
154 }
155
156 static void
157 fstrans_debug_validate_mount(struct mount *mp)
158 {
159 struct fstrans_debug_mount *fdm;
160
161 KASSERT(mutex_owned(&fstrans_mount_lock));
162
163 SLIST_FOREACH(fdm, &fstrans_debug_mount_head, fdm_list)
164 if (fdm->fdm_mount == mp)
165 break;
166 KASSERTMSG(fdm != NULL, "mount %p invalid", mp);
167 }
168
169 #else /* defined(DIAGNOSTIC) */
170
171 #define fstrans_debug_mount(mp)
172 #define fstrans_debug_unmount(mp)
173 #define fstrans_debug_validate_mount(mp)
174
175 #endif /* defined(DIAGNOSTIC) */
176
177 /*
178 * Initialize.
179 */
180 void
181 fstrans_init(void)
182 {
183 int error __diagused;
184
185 error = lwp_specific_key_create(&lwp_data_key, fstrans_lwp_dtor);
186 KASSERT(error == 0);
187
188 mutex_init(&vfs_suspend_lock, MUTEX_DEFAULT, IPL_NONE);
189 mutex_init(&fstrans_lock, MUTEX_DEFAULT, IPL_NONE);
190 mutex_init(&fstrans_mount_lock, MUTEX_DEFAULT, IPL_NONE);
191 cv_init(&fstrans_state_cv, "fstchg");
192 cv_init(&fstrans_count_cv, "fstcnt");
193 fstrans_psz = pserialize_create();
194 LIST_INIT(&fstrans_fli_head);
195 }
196
197 /*
198 * Deallocate lwp state.
199 */
200 static void
201 fstrans_lwp_dtor(void *arg)
202 {
203 struct fstrans_lwp_info *fli, *fli_next;
204
205 for (fli = arg; fli; fli = fli_next) {
206 KASSERT(fli->fli_trans_cnt == 0);
207 KASSERT(fli->fli_cow_cnt == 0);
208 if (fli->fli_mount != NULL)
209 fstrans_mount_dtor(fli->fli_mountinfo);
210 fli_next = fli->fli_succ;
211 fli->fli_mount = NULL;
212 fli->fli_alias = NULL;
213 fli->fli_mountinfo = NULL;
214 membar_sync();
215 fli->fli_self = NULL;
216 }
217 }
218
219 /*
220 * Dereference mount state.
221 */
222 static void
223 fstrans_mount_dtor(struct fstrans_mount_info *fmi)
224 {
225
226 mutex_enter(&fstrans_mount_lock);
227
228 KASSERT(fmi != NULL);
229 fmi->fmi_ref_cnt -= 1;
230 if (fmi->fmi_ref_cnt > 0) {
231 mutex_exit(&fstrans_mount_lock);
232 return;
233 }
234
235 KASSERT(fmi->fmi_state == FSTRANS_NORMAL);
236 KASSERT(LIST_FIRST(&fmi->fmi_cow_handler) == NULL);
237
238 KASSERT(fstrans_gone_count > 0);
239 fstrans_gone_count -= 1;
240
241 mutex_exit(&fstrans_mount_lock);
242
243 kmem_free(fmi->fmi_mount, sizeof(*fmi->fmi_mount));
244 kmem_free(fmi, sizeof(*fmi));
245 }
246
247 /*
248 * Allocate mount state.
249 */
250 int
251 fstrans_mount(struct mount *mp)
252 {
253 struct fstrans_mount_info *newfmi;
254
255 newfmi = kmem_alloc(sizeof(*newfmi), KM_SLEEP);
256 newfmi->fmi_state = FSTRANS_NORMAL;
257 newfmi->fmi_ref_cnt = 1;
258 newfmi->fmi_gone = false;
259 LIST_INIT(&newfmi->fmi_cow_handler);
260 newfmi->fmi_cow_change = false;
261 newfmi->fmi_mount = mp;
262
263 mutex_enter(&fstrans_mount_lock);
264 mp->mnt_transinfo = newfmi;
265 fstrans_debug_mount(mp);
266 mutex_exit(&fstrans_mount_lock);
267
268 return 0;
269 }
270
271 /*
272 * Deallocate mount state.
273 */
274 void
275 fstrans_unmount(struct mount *mp)
276 {
277 struct fstrans_mount_info *fmi = mp->mnt_transinfo;
278
279 KASSERT(fmi != NULL);
280
281 mutex_enter(&fstrans_mount_lock);
282 fstrans_debug_unmount(mp);
283 fmi->fmi_gone = true;
284 mp->mnt_transinfo = NULL;
285 fstrans_gone_count += 1;
286 mutex_exit(&fstrans_mount_lock);
287
288 fstrans_mount_dtor(fmi);
289 }
290
291 /*
292 * Clear mount entries whose mount is gone.
293 */
294 static void
295 fstrans_clear_lwp_info(void)
296 {
297 struct fstrans_lwp_info *head, **p, *fli;
298
299 /*
300 * Scan our list clearing entries whose mount is gone.
301 */
302 head = lwp_getspecific(lwp_data_key);
303 for (p = &head; *p; p = &(*p)->fli_succ) {
304 fli = *p;
305 if (fli->fli_mount != NULL &&
306 fli->fli_mountinfo->fmi_gone &&
307 fli->fli_trans_cnt == 0 && fli->fli_cow_cnt == 0) {
308 *p = (*p)->fli_succ;
309 fstrans_mount_dtor(fli->fli_mountinfo);
310 fli->fli_mount = NULL;
311 fli->fli_alias = NULL;
312 fli->fli_mountinfo = NULL;
313 membar_sync();
314 fli->fli_self = NULL;
315
316 if (*p == NULL)
317 break;
318 }
319 }
320 lwp_setspecific(lwp_data_key, head);
321 }
322
323 /*
324 * Allocate and return per lwp info for this mount.
325 */
326 static struct fstrans_lwp_info *
327 fstrans_alloc_lwp_info(struct mount *mp)
328 {
329 struct fstrans_lwp_info *fli, *fli2;
330 struct fstrans_mount_info *fmi;
331
332 for (fli = lwp_getspecific(lwp_data_key); fli; fli = fli->fli_succ) {
333 if (fli->fli_mount == mp)
334 return fli;
335 }
336
337 /*
338 * Try to reuse a cleared entry or allocate a new one.
339 */
340 mutex_enter(&fstrans_lock);
341 LIST_FOREACH(fli, &fstrans_fli_head, fli_list) {
342 membar_sync();
343 if (fli->fli_self == NULL) {
344 KASSERT(fli->fli_mount == NULL);
345 KASSERT(fli->fli_trans_cnt == 0);
346 KASSERT(fli->fli_cow_cnt == 0);
347 fli->fli_self = curlwp;
348 fli->fli_succ = lwp_getspecific(lwp_data_key);
349 lwp_setspecific(lwp_data_key, fli);
350 break;
351 }
352 }
353 mutex_exit(&fstrans_lock);
354
355 if (fli == NULL) {
356 fli = kmem_alloc(sizeof(*fli), KM_SLEEP);
357 mutex_enter(&fstrans_lock);
358 memset(fli, 0, sizeof(*fli));
359 fli->fli_self = curlwp;
360 LIST_INSERT_HEAD(&fstrans_fli_head, fli, fli_list);
361 mutex_exit(&fstrans_lock);
362 fli->fli_succ = lwp_getspecific(lwp_data_key);
363 lwp_setspecific(lwp_data_key, fli);
364 }
365
366 /*
367 * Attach the entry to the mount if its mnt_transinfo is valid.
368 */
369
370 mutex_enter(&fstrans_mount_lock);
371 fstrans_debug_validate_mount(mp);
372 fmi = mp->mnt_transinfo;
373 KASSERT(fmi != NULL);
374 fli->fli_mount = mp;
375 fli->fli_mountinfo = fmi;
376 fmi->fmi_ref_cnt += 1;
377 mp = mp->mnt_lower;
378 mutex_exit(&fstrans_mount_lock);
379
380 if (mp) {
381 fli2 = fstrans_alloc_lwp_info(mp);
382 fli->fli_alias = fli2->fli_mount;
383
384 fli = fli2;
385 }
386
387 return fli;
388 }
389
390 /*
391 * Retrieve the per lwp info for this mount allocating if necessary.
392 */
393 static inline struct fstrans_lwp_info *
394 fstrans_get_lwp_info(struct mount *mp, bool do_alloc)
395 {
396 struct fstrans_lwp_info *head, *fli, *fli2;
397
398 head = lwp_getspecific(lwp_data_key);
399
400 /*
401 * Scan our list for a match.
402 */
403 for (fli = head; fli; fli = fli->fli_succ) {
404 if (fli->fli_mount == mp) {
405 if (fli->fli_alias != NULL) {
406 for (fli2 = head; fli2; fli2 = fli2->fli_succ) {
407 if (fli2->fli_mount == fli->fli_alias)
408 break;
409 }
410 KASSERT(fli2 != NULL);
411 fli = fli2;
412 }
413 break;
414 }
415 }
416
417 if (do_alloc) {
418 if (__predict_false(fli == NULL))
419 fli = fstrans_alloc_lwp_info(mp);
420 KASSERT(fli != NULL && !fli->fli_mountinfo->fmi_gone);
421 } else {
422 KASSERT(fli != NULL);
423 }
424
425 return fli;
426 }
427
428 /*
429 * Check if this lock type is granted at this state.
430 */
431 static bool
432 grant_lock(const enum fstrans_state state, const enum fstrans_lock_type type)
433 {
434
435 if (__predict_true(state == FSTRANS_NORMAL))
436 return true;
437 if (type == FSTRANS_EXCL)
438 return true;
439 if (state == FSTRANS_SUSPENDING && type == FSTRANS_LAZY)
440 return true;
441
442 return false;
443 }
444
445 /*
446 * Start a transaction. If this thread already has a transaction on this
447 * file system increment the reference counter.
448 */
449 static inline int
450 _fstrans_start(struct mount *mp, enum fstrans_lock_type lock_type, int wait)
451 {
452 int s;
453 struct fstrans_lwp_info *fli;
454 struct fstrans_mount_info *fmi;
455
456 #ifndef FSTRANS_DEAD_ENABLED
457 if (mp == dead_rootmount)
458 return 0;
459 #endif
460
461 ASSERT_SLEEPABLE();
462
463 fli = fstrans_get_lwp_info(mp, true);
464 fmi = fli->fli_mountinfo;
465
466 if (fli->fli_trans_cnt > 0) {
467 KASSERT(lock_type != FSTRANS_EXCL);
468 fli->fli_trans_cnt += 1;
469
470 return 0;
471 }
472
473 s = pserialize_read_enter();
474 if (__predict_true(grant_lock(fmi->fmi_state, lock_type))) {
475 fli->fli_trans_cnt = 1;
476 fli->fli_lock_type = lock_type;
477 pserialize_read_exit(s);
478
479 return 0;
480 }
481 pserialize_read_exit(s);
482
483 if (! wait)
484 return EBUSY;
485
486 mutex_enter(&fstrans_lock);
487 while (! grant_lock(fmi->fmi_state, lock_type))
488 cv_wait(&fstrans_state_cv, &fstrans_lock);
489 fli->fli_trans_cnt = 1;
490 fli->fli_lock_type = lock_type;
491 mutex_exit(&fstrans_lock);
492
493 return 0;
494 }
495
496 void
497 fstrans_start(struct mount *mp)
498 {
499 int error __diagused;
500
501 error = _fstrans_start(mp, FSTRANS_SHARED, 1);
502 KASSERT(error == 0);
503 }
504
505 int
506 fstrans_start_nowait(struct mount *mp)
507 {
508
509 return _fstrans_start(mp, FSTRANS_SHARED, 0);
510 }
511
512 void
513 fstrans_start_lazy(struct mount *mp)
514 {
515 int error __diagused;
516
517 error = _fstrans_start(mp, FSTRANS_LAZY, 1);
518 KASSERT(error == 0);
519 }
520
521 /*
522 * Finish a transaction.
523 */
524 void
525 fstrans_done(struct mount *mp)
526 {
527 int s;
528 struct fstrans_lwp_info *fli;
529 struct fstrans_mount_info *fmi;
530
531 #ifndef FSTRANS_DEAD_ENABLED
532 if (mp == dead_rootmount)
533 return;
534 #endif
535
536 fli = fstrans_get_lwp_info(mp, false);
537 fmi = fli->fli_mountinfo;
538 KASSERT(fli->fli_trans_cnt > 0);
539
540 if (fli->fli_trans_cnt > 1) {
541 fli->fli_trans_cnt -= 1;
542
543 return;
544 }
545
546 s = pserialize_read_enter();
547 if (__predict_true(fmi->fmi_state == FSTRANS_NORMAL)) {
548 fli->fli_trans_cnt = 0;
549 pserialize_read_exit(s);
550
551 return;
552 }
553 pserialize_read_exit(s);
554
555 if (__predict_false(fstrans_gone_count > 0))
556 fstrans_clear_lwp_info();
557
558 mutex_enter(&fstrans_lock);
559 fli->fli_trans_cnt = 0;
560 cv_signal(&fstrans_count_cv);
561 mutex_exit(&fstrans_lock);
562 }
563
564 /*
565 * Check if this thread has an exclusive lock.
566 */
567 int
568 fstrans_is_owner(struct mount *mp)
569 {
570 struct fstrans_lwp_info *fli;
571
572 KASSERT(mp != dead_rootmount);
573
574 fli = fstrans_get_lwp_info(mp, true);
575
576 if (fli->fli_trans_cnt == 0)
577 return 0;
578
579 return (fli->fli_lock_type == FSTRANS_EXCL);
580 }
581
582 /*
583 * True, if no thread is in a transaction not granted at the current state.
584 */
585 static bool
586 state_change_done(const struct fstrans_mount_info *fmi)
587 {
588 struct fstrans_lwp_info *fli;
589
590 KASSERT(mutex_owned(&fstrans_lock));
591
592 LIST_FOREACH(fli, &fstrans_fli_head, fli_list) {
593 if (fli->fli_mountinfo != fmi)
594 continue;
595 if (fli->fli_trans_cnt == 0)
596 continue;
597 if (grant_lock(fmi->fmi_state, fli->fli_lock_type))
598 continue;
599
600 return false;
601 }
602
603 return true;
604 }
605
606 /*
607 * Set new file system state.
608 */
609 int
610 fstrans_setstate(struct mount *mp, enum fstrans_state new_state)
611 {
612 int error;
613 enum fstrans_state old_state;
614 struct fstrans_lwp_info *fli;
615 struct fstrans_mount_info *fmi;
616
617 KASSERT(mp != dead_rootmount);
618
619 fli = fstrans_get_lwp_info(mp, true);
620 fmi = fli->fli_mountinfo;
621 old_state = fmi->fmi_state;
622 if (old_state == new_state)
623 return 0;
624
625 mutex_enter(&fstrans_lock);
626 fmi->fmi_state = new_state;
627 pserialize_perform(fstrans_psz);
628
629 /*
630 * All threads see the new state now.
631 * Wait for transactions invalid at this state to leave.
632 */
633 error = 0;
634 while (! state_change_done(fmi)) {
635 error = cv_wait_sig(&fstrans_count_cv, &fstrans_lock);
636 if (error) {
637 new_state = fmi->fmi_state = FSTRANS_NORMAL;
638 break;
639 }
640 }
641 cv_broadcast(&fstrans_state_cv);
642 mutex_exit(&fstrans_lock);
643
644 if (old_state != new_state) {
645 if (old_state == FSTRANS_NORMAL)
646 _fstrans_start(mp, FSTRANS_EXCL, 1);
647 if (new_state == FSTRANS_NORMAL)
648 fstrans_done(mp);
649 }
650
651 return error;
652 }
653
654 /*
655 * Get current file system state.
656 */
657 enum fstrans_state
658 fstrans_getstate(struct mount *mp)
659 {
660 struct fstrans_lwp_info *fli;
661 struct fstrans_mount_info *fmi;
662
663 KASSERT(mp != dead_rootmount);
664
665 fli = fstrans_get_lwp_info(mp, true);
666 fmi = fli->fli_mountinfo;
667
668 return fmi->fmi_state;
669 }
670
671 /*
672 * Request a filesystem to suspend all operations.
673 */
674 int
675 vfs_suspend(struct mount *mp, int nowait)
676 {
677 struct fstrans_lwp_info *fli;
678 int error;
679
680 if (mp == dead_rootmount)
681 return EOPNOTSUPP;
682
683 fli = fstrans_get_lwp_info(mp, true);
684 mp = fli->fli_mount;
685
686 if (nowait) {
687 if (!mutex_tryenter(&vfs_suspend_lock))
688 return EWOULDBLOCK;
689 } else
690 mutex_enter(&vfs_suspend_lock);
691
692 if ((error = VFS_SUSPENDCTL(mp, SUSPEND_SUSPEND)) != 0)
693 mutex_exit(&vfs_suspend_lock);
694
695 return error;
696 }
697
698 /*
699 * Request a filesystem to resume all operations.
700 */
701 void
702 vfs_resume(struct mount *mp)
703 {
704 struct fstrans_lwp_info *fli;
705
706 KASSERT(mp != dead_rootmount);
707
708 fli = fstrans_get_lwp_info(mp, false);
709 mp = fli->fli_mount;
710
711 VFS_SUSPENDCTL(mp, SUSPEND_RESUME);
712 mutex_exit(&vfs_suspend_lock);
713 }
714
715
716 /*
717 * True, if no thread is running a cow handler.
718 */
719 static bool
720 cow_state_change_done(const struct fstrans_mount_info *fmi)
721 {
722 struct fstrans_lwp_info *fli;
723
724 KASSERT(mutex_owned(&fstrans_lock));
725 KASSERT(fmi->fmi_cow_change);
726
727 LIST_FOREACH(fli, &fstrans_fli_head, fli_list) {
728 if (fli->fli_mount != fmi->fmi_mount)
729 continue;
730 if (fli->fli_cow_cnt == 0)
731 continue;
732
733 return false;
734 }
735
736 return true;
737 }
738
739 /*
740 * Prepare for changing this mounts cow list.
741 * Returns with fstrans_lock locked.
742 */
743 static void
744 cow_change_enter(struct fstrans_mount_info *fmi)
745 {
746
747 mutex_enter(&fstrans_lock);
748
749 /*
750 * Wait for other threads changing the list.
751 */
752 while (fmi->fmi_cow_change)
753 cv_wait(&fstrans_state_cv, &fstrans_lock);
754
755 /*
756 * Wait until all threads are aware of a state change.
757 */
758 fmi->fmi_cow_change = true;
759 pserialize_perform(fstrans_psz);
760
761 while (! cow_state_change_done(fmi))
762 cv_wait(&fstrans_count_cv, &fstrans_lock);
763 }
764
765 /*
766 * Done changing this mounts cow list.
767 */
768 static void
769 cow_change_done(struct fstrans_mount_info *fmi)
770 {
771
772 KASSERT(mutex_owned(&fstrans_lock));
773
774 fmi->fmi_cow_change = false;
775 pserialize_perform(fstrans_psz);
776
777 cv_broadcast(&fstrans_state_cv);
778
779 mutex_exit(&fstrans_lock);
780 }
781
782 /*
783 * Add a handler to this mount.
784 */
785 int
786 fscow_establish(struct mount *mp, int (*func)(void *, struct buf *, bool),
787 void *arg)
788 {
789 struct fstrans_mount_info *fmi;
790 struct fscow_handler *newch;
791
792 KASSERT(mp != dead_rootmount);
793
794 mutex_enter(&fstrans_mount_lock);
795 fmi = mp->mnt_transinfo;
796 KASSERT(fmi != NULL);
797 fmi->fmi_ref_cnt += 1;
798 mutex_exit(&fstrans_mount_lock);
799
800 newch = kmem_alloc(sizeof(*newch), KM_SLEEP);
801 newch->ch_func = func;
802 newch->ch_arg = arg;
803
804 cow_change_enter(fmi);
805 LIST_INSERT_HEAD(&fmi->fmi_cow_handler, newch, ch_list);
806 cow_change_done(fmi);
807
808 return 0;
809 }
810
811 /*
812 * Remove a handler from this mount.
813 */
814 int
815 fscow_disestablish(struct mount *mp, int (*func)(void *, struct buf *, bool),
816 void *arg)
817 {
818 struct fstrans_mount_info *fmi;
819 struct fscow_handler *hp = NULL;
820
821 KASSERT(mp != dead_rootmount);
822
823 fmi = mp->mnt_transinfo;
824 KASSERT(fmi != NULL);
825
826 cow_change_enter(fmi);
827 LIST_FOREACH(hp, &fmi->fmi_cow_handler, ch_list)
828 if (hp->ch_func == func && hp->ch_arg == arg)
829 break;
830 if (hp != NULL) {
831 LIST_REMOVE(hp, ch_list);
832 kmem_free(hp, sizeof(*hp));
833 }
834 cow_change_done(fmi);
835
836 fstrans_mount_dtor(fmi);
837
838 return hp ? 0 : EINVAL;
839 }
840
841 /*
842 * Check for need to copy block that is about to be written.
843 */
844 int
845 fscow_run(struct buf *bp, bool data_valid)
846 {
847 int error, s;
848 struct mount *mp;
849 struct fstrans_lwp_info *fli;
850 struct fstrans_mount_info *fmi;
851 struct fscow_handler *hp;
852
853 /*
854 * First check if we need run the copy-on-write handler.
855 */
856 if ((bp->b_flags & B_COWDONE))
857 return 0;
858 if (bp->b_vp == NULL) {
859 bp->b_flags |= B_COWDONE;
860 return 0;
861 }
862 if (bp->b_vp->v_type == VBLK)
863 mp = spec_node_getmountedfs(bp->b_vp);
864 else
865 mp = bp->b_vp->v_mount;
866 if (mp == NULL || mp == dead_rootmount) {
867 bp->b_flags |= B_COWDONE;
868 return 0;
869 }
870
871 fli = fstrans_get_lwp_info(mp, true);
872 fmi = fli->fli_mountinfo;
873
874 /*
875 * On non-recursed run check if other threads
876 * want to change the list.
877 */
878 if (fli->fli_cow_cnt == 0) {
879 s = pserialize_read_enter();
880 if (__predict_false(fmi->fmi_cow_change)) {
881 pserialize_read_exit(s);
882 mutex_enter(&fstrans_lock);
883 while (fmi->fmi_cow_change)
884 cv_wait(&fstrans_state_cv, &fstrans_lock);
885 fli->fli_cow_cnt = 1;
886 mutex_exit(&fstrans_lock);
887 } else {
888 fli->fli_cow_cnt = 1;
889 pserialize_read_exit(s);
890 }
891 } else
892 fli->fli_cow_cnt += 1;
893
894 /*
895 * Run all copy-on-write handlers, stop on error.
896 */
897 error = 0;
898 LIST_FOREACH(hp, &fmi->fmi_cow_handler, ch_list)
899 if ((error = (*hp->ch_func)(hp->ch_arg, bp, data_valid)) != 0)
900 break;
901 if (error == 0)
902 bp->b_flags |= B_COWDONE;
903
904 /*
905 * Check if other threads want to change the list.
906 */
907 if (fli->fli_cow_cnt > 1) {
908 fli->fli_cow_cnt -= 1;
909 } else {
910 s = pserialize_read_enter();
911 if (__predict_false(fmi->fmi_cow_change)) {
912 pserialize_read_exit(s);
913 mutex_enter(&fstrans_lock);
914 fli->fli_cow_cnt = 0;
915 cv_signal(&fstrans_count_cv);
916 mutex_exit(&fstrans_lock);
917 } else {
918 fli->fli_cow_cnt = 0;
919 pserialize_read_exit(s);
920 }
921 }
922
923 return error;
924 }
925
926 #if defined(DDB)
927 void fstrans_dump(int);
928
929 static void
930 fstrans_print_lwp(struct proc *p, struct lwp *l, int verbose)
931 {
932 char prefix[9];
933 struct fstrans_lwp_info *fli;
934
935 snprintf(prefix, sizeof(prefix), "%d.%d", p->p_pid, l->l_lid);
936 LIST_FOREACH(fli, &fstrans_fli_head, fli_list) {
937 if (fli->fli_self != l)
938 continue;
939 if (fli->fli_trans_cnt == 0 && fli->fli_cow_cnt == 0) {
940 if (! verbose)
941 continue;
942 }
943 printf("%-8s", prefix);
944 if (verbose)
945 printf(" @%p", fli);
946 if (fli->fli_mount == dead_rootmount)
947 printf(" <dead>");
948 else if (fli->fli_mount != NULL)
949 printf(" (%s)", fli->fli_mount->mnt_stat.f_mntonname);
950 else
951 printf(" NULL");
952 if (fli->fli_alias != NULL)
953 printf(" alias (%s)",
954 fli->fli_alias->mnt_stat.f_mntonname);
955 if (fli->fli_mountinfo && fli->fli_mountinfo->fmi_gone)
956 printf(" gone");
957 if (fli->fli_trans_cnt == 0) {
958 printf(" -");
959 } else {
960 switch (fli->fli_lock_type) {
961 case FSTRANS_LAZY:
962 printf(" lazy");
963 break;
964 case FSTRANS_SHARED:
965 printf(" shared");
966 break;
967 case FSTRANS_EXCL:
968 printf(" excl");
969 break;
970 default:
971 printf(" %#x", fli->fli_lock_type);
972 break;
973 }
974 }
975 printf(" %d cow %d\n", fli->fli_trans_cnt, fli->fli_cow_cnt);
976 prefix[0] = '\0';
977 }
978 }
979
980 static void
981 fstrans_print_mount(struct mount *mp, int verbose)
982 {
983 struct fstrans_mount_info *fmi;
984
985 fmi = mp->mnt_transinfo;
986 if (!verbose && (fmi == NULL || fmi->fmi_state == FSTRANS_NORMAL))
987 return;
988
989 printf("%-16s ", mp->mnt_stat.f_mntonname);
990 if (fmi == NULL) {
991 printf("(null)\n");
992 return;
993 }
994 switch (fmi->fmi_state) {
995 case FSTRANS_NORMAL:
996 printf("state normal\n");
997 break;
998 case FSTRANS_SUSPENDING:
999 printf("state suspending\n");
1000 break;
1001 case FSTRANS_SUSPENDED:
1002 printf("state suspended\n");
1003 break;
1004 default:
1005 printf("state %#x\n", fmi->fmi_state);
1006 break;
1007 }
1008 }
1009
1010 void
1011 fstrans_dump(int full)
1012 {
1013 const struct proclist_desc *pd;
1014 struct proc *p;
1015 struct lwp *l;
1016 struct mount *mp;
1017
1018 printf("Fstrans locks by lwp:\n");
1019 for (pd = proclists; pd->pd_list != NULL; pd++)
1020 PROCLIST_FOREACH(p, pd->pd_list)
1021 LIST_FOREACH(l, &p->p_lwps, l_sibling)
1022 fstrans_print_lwp(p, l, full == 1);
1023
1024 printf("Fstrans state by mount:\n");
1025 for (mp = _mountlist_next(NULL); mp; mp = _mountlist_next(mp))
1026 fstrans_print_mount(mp, full == 1);
1027 }
1028 #endif /* defined(DDB) */
1029