vfs_trans.c revision 1.57 1 /* $NetBSD: vfs_trans.c,v 1.57 2019/03/01 09:02:03 hannken Exp $ */
2
3 /*-
4 * Copyright (c) 2007 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Juergen Hannken-Illjes.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 #include <sys/cdefs.h>
33 __KERNEL_RCSID(0, "$NetBSD: vfs_trans.c,v 1.57 2019/03/01 09:02:03 hannken Exp $");
34
35 /*
36 * File system transaction operations.
37 */
38
39 #ifdef _KERNEL_OPT
40 #include "opt_ddb.h"
41 #endif
42
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/atomic.h>
46 #include <sys/buf.h>
47 #include <sys/kmem.h>
48 #include <sys/mount.h>
49 #include <sys/pserialize.h>
50 #include <sys/vnode.h>
51 #include <sys/fstrans.h>
52 #include <sys/proc.h>
53
54 #include <miscfs/specfs/specdev.h>
55
56 enum fstrans_lock_type {
57 FSTRANS_LAZY, /* Granted while not suspended */
58 FSTRANS_SHARED, /* Granted while not suspending */
59 FSTRANS_EXCL /* Internal: exclusive lock */
60 };
61
62 struct fscow_handler {
63 LIST_ENTRY(fscow_handler) ch_list;
64 int (*ch_func)(void *, struct buf *, bool);
65 void *ch_arg;
66 };
67 struct fstrans_lwp_info {
68 struct fstrans_lwp_info *fli_succ;
69 struct lwp *fli_self;
70 struct mount *fli_mount;
71 struct mount *fli_alias;
72 struct fstrans_mount_info *fli_mountinfo;
73 int fli_trans_cnt;
74 int fli_cow_cnt;
75 enum fstrans_lock_type fli_lock_type;
76 LIST_ENTRY(fstrans_lwp_info) fli_list;
77 };
78 struct fstrans_mount_info {
79 enum fstrans_state fmi_state;
80 unsigned int fmi_ref_cnt;
81 bool fmi_gone;
82 bool fmi_cow_change;
83 LIST_HEAD(, fscow_handler) fmi_cow_handler;
84 struct mount *fmi_mount;
85 };
86
87 static kmutex_t vfs_suspend_lock; /* Serialize suspensions. */
88 static kmutex_t fstrans_lock; /* Fstrans big lock. */
89 static kmutex_t fstrans_mount_lock; /* Fstrans mount big lock. */
90 static kcondvar_t fstrans_state_cv; /* Fstrans or cow state changed. */
91 static kcondvar_t fstrans_count_cv; /* Fstrans or cow count changed. */
92 static pserialize_t fstrans_psz; /* Pserialize state. */
93 static LIST_HEAD(fstrans_lwp_head, fstrans_lwp_info) fstrans_fli_head;
94 /* List of all fstrans_lwp_info. */
95 static int fstrans_gone_count; /* Number of fstrans_mount_info gone. */
96
97 static void fstrans_mount_dtor(struct fstrans_mount_info *);
98 static void fstrans_clear_lwp_info(void);
99 static inline struct fstrans_lwp_info *
100 fstrans_get_lwp_info(struct mount *, bool);
101 static struct fstrans_lwp_info *fstrans_alloc_lwp_info(struct mount *);
102 static inline int _fstrans_start(struct mount *, enum fstrans_lock_type, int);
103 static bool grant_lock(const enum fstrans_state, const enum fstrans_lock_type);
104 static bool state_change_done(const struct fstrans_mount_info *);
105 static bool cow_state_change_done(const struct fstrans_mount_info *);
106 static void cow_change_enter(struct fstrans_mount_info *);
107 static void cow_change_done(struct fstrans_mount_info *);
108
109 extern struct mount *dead_rootmount;
110
111 #if defined(DIAGNOSTIC)
112
113 struct fstrans_debug_mount {
114 struct mount *fdm_mount;
115 SLIST_ENTRY(fstrans_debug_mount) fdm_list;
116 };
117
118 static SLIST_HEAD(, fstrans_debug_mount) fstrans_debug_mount_head =
119 SLIST_HEAD_INITIALIZER(fstrans_debug_mount_head);
120
121 static void
122 fstrans_debug_mount(struct mount *mp)
123 {
124 struct fstrans_debug_mount *fdm, *new;
125
126 KASSERT(mutex_owned(&fstrans_mount_lock));
127
128 mutex_exit(&fstrans_mount_lock);
129 new = kmem_alloc(sizeof(*new), KM_SLEEP);
130 new->fdm_mount = mp;
131 mutex_enter(&fstrans_mount_lock);
132
133 SLIST_FOREACH(fdm, &fstrans_debug_mount_head, fdm_list)
134 KASSERT(fdm->fdm_mount != mp);
135 SLIST_INSERT_HEAD(&fstrans_debug_mount_head, new, fdm_list);
136 }
137
138 static void
139 fstrans_debug_unmount(struct mount *mp)
140 {
141 struct fstrans_debug_mount *fdm;
142
143 KASSERT(mutex_owned(&fstrans_mount_lock));
144
145 SLIST_FOREACH(fdm, &fstrans_debug_mount_head, fdm_list)
146 if (fdm->fdm_mount == mp)
147 break;
148 KASSERT(fdm != NULL);
149 SLIST_REMOVE(&fstrans_debug_mount_head, fdm,
150 fstrans_debug_mount, fdm_list);
151 kmem_free(fdm, sizeof(*fdm));
152 }
153
154 static void
155 fstrans_debug_validate_mount(struct mount *mp)
156 {
157 struct fstrans_debug_mount *fdm;
158
159 KASSERT(mutex_owned(&fstrans_mount_lock));
160
161 SLIST_FOREACH(fdm, &fstrans_debug_mount_head, fdm_list)
162 if (fdm->fdm_mount == mp)
163 break;
164 KASSERTMSG(fdm != NULL, "mount %p invalid", mp);
165 }
166
167 #else /* defined(DIAGNOSTIC) */
168
169 #define fstrans_debug_mount(mp)
170 #define fstrans_debug_unmount(mp)
171 #define fstrans_debug_validate_mount(mp)
172
173 #endif /* defined(DIAGNOSTIC) */
174
175 /*
176 * Initialize.
177 */
178 void
179 fstrans_init(void)
180 {
181
182 mutex_init(&vfs_suspend_lock, MUTEX_DEFAULT, IPL_NONE);
183 mutex_init(&fstrans_lock, MUTEX_DEFAULT, IPL_NONE);
184 mutex_init(&fstrans_mount_lock, MUTEX_DEFAULT, IPL_NONE);
185 cv_init(&fstrans_state_cv, "fstchg");
186 cv_init(&fstrans_count_cv, "fstcnt");
187 fstrans_psz = pserialize_create();
188 LIST_INIT(&fstrans_fli_head);
189 }
190
191 /*
192 * Deallocate lwp state.
193 */
194 void
195 fstrans_lwp_dtor(lwp_t *l)
196 {
197 struct fstrans_lwp_info *fli, *fli_next;
198
199 for (fli = l->l_fstrans; fli; fli = fli_next) {
200 KASSERT(fli->fli_trans_cnt == 0);
201 KASSERT(fli->fli_cow_cnt == 0);
202 KASSERT(fli->fli_self == l);
203 if (fli->fli_mount != NULL)
204 fstrans_mount_dtor(fli->fli_mountinfo);
205 fli_next = fli->fli_succ;
206 fli->fli_mount = NULL;
207 fli->fli_alias = NULL;
208 fli->fli_mountinfo = NULL;
209 membar_sync();
210 fli->fli_self = NULL;
211 }
212
213 l->l_fstrans = NULL;
214 }
215
216 /*
217 * Dereference mount state.
218 */
219 static void
220 fstrans_mount_dtor(struct fstrans_mount_info *fmi)
221 {
222
223 mutex_enter(&fstrans_mount_lock);
224
225 KASSERT(fmi != NULL);
226 fmi->fmi_ref_cnt -= 1;
227 if (fmi->fmi_ref_cnt > 0) {
228 mutex_exit(&fstrans_mount_lock);
229 return;
230 }
231
232 KASSERT(fmi->fmi_state == FSTRANS_NORMAL);
233 KASSERT(LIST_FIRST(&fmi->fmi_cow_handler) == NULL);
234
235 KASSERT(fstrans_gone_count > 0);
236 fstrans_gone_count -= 1;
237
238 mutex_exit(&fstrans_mount_lock);
239
240 kmem_free(fmi->fmi_mount, sizeof(*fmi->fmi_mount));
241 kmem_free(fmi, sizeof(*fmi));
242 }
243
244 /*
245 * Allocate mount state.
246 */
247 int
248 fstrans_mount(struct mount *mp)
249 {
250 struct fstrans_mount_info *newfmi;
251
252 newfmi = kmem_alloc(sizeof(*newfmi), KM_SLEEP);
253 newfmi->fmi_state = FSTRANS_NORMAL;
254 newfmi->fmi_ref_cnt = 1;
255 newfmi->fmi_gone = false;
256 LIST_INIT(&newfmi->fmi_cow_handler);
257 newfmi->fmi_cow_change = false;
258 newfmi->fmi_mount = mp;
259
260 mutex_enter(&fstrans_mount_lock);
261 mp->mnt_transinfo = newfmi;
262 fstrans_debug_mount(mp);
263 mutex_exit(&fstrans_mount_lock);
264
265 return 0;
266 }
267
268 /*
269 * Deallocate mount state.
270 */
271 void
272 fstrans_unmount(struct mount *mp)
273 {
274 struct fstrans_mount_info *fmi = mp->mnt_transinfo;
275
276 KASSERT(fmi != NULL);
277
278 mutex_enter(&fstrans_mount_lock);
279 fstrans_debug_unmount(mp);
280 fmi->fmi_gone = true;
281 mp->mnt_transinfo = NULL;
282 fstrans_gone_count += 1;
283 mutex_exit(&fstrans_mount_lock);
284
285 fstrans_mount_dtor(fmi);
286 }
287
288 /*
289 * Clear mount entries whose mount is gone.
290 */
291 static void
292 fstrans_clear_lwp_info(void)
293 {
294 struct fstrans_lwp_info **p, *fli;
295
296 /*
297 * Scan our list clearing entries whose mount is gone.
298 */
299 for (p = &curlwp->l_fstrans; *p; p = &(*p)->fli_succ) {
300 fli = *p;
301 if (fli->fli_mount != NULL &&
302 fli->fli_mountinfo->fmi_gone &&
303 fli->fli_trans_cnt == 0 && fli->fli_cow_cnt == 0) {
304 *p = (*p)->fli_succ;
305 fstrans_mount_dtor(fli->fli_mountinfo);
306 fli->fli_mount = NULL;
307 fli->fli_alias = NULL;
308 fli->fli_mountinfo = NULL;
309 membar_sync();
310 fli->fli_self = NULL;
311
312 if (*p == NULL)
313 break;
314 }
315 }
316 }
317
318 /*
319 * Allocate and return per lwp info for this mount.
320 */
321 static struct fstrans_lwp_info *
322 fstrans_alloc_lwp_info(struct mount *mp)
323 {
324 struct fstrans_lwp_info *fli, *fli2;
325 struct fstrans_mount_info *fmi;
326
327 for (fli = curlwp->l_fstrans; fli; fli = fli->fli_succ) {
328 if (fli->fli_mount == mp)
329 return fli;
330 }
331
332 /*
333 * Try to reuse a cleared entry or allocate a new one.
334 */
335 mutex_enter(&fstrans_lock);
336 LIST_FOREACH(fli, &fstrans_fli_head, fli_list) {
337 membar_sync();
338 if (fli->fli_self == NULL) {
339 KASSERT(fli->fli_mount == NULL);
340 KASSERT(fli->fli_trans_cnt == 0);
341 KASSERT(fli->fli_cow_cnt == 0);
342 fli->fli_self = curlwp;
343 fli->fli_succ = curlwp->l_fstrans;
344 curlwp->l_fstrans = fli;
345 break;
346 }
347 }
348 mutex_exit(&fstrans_lock);
349
350 if (fli == NULL) {
351 fli = kmem_alloc(sizeof(*fli), KM_SLEEP);
352 mutex_enter(&fstrans_lock);
353 memset(fli, 0, sizeof(*fli));
354 fli->fli_self = curlwp;
355 LIST_INSERT_HEAD(&fstrans_fli_head, fli, fli_list);
356 mutex_exit(&fstrans_lock);
357 fli->fli_succ = curlwp->l_fstrans;
358 curlwp->l_fstrans = fli;
359 }
360
361 /*
362 * Attach the entry to the mount if its mnt_transinfo is valid.
363 */
364
365 mutex_enter(&fstrans_mount_lock);
366 fstrans_debug_validate_mount(mp);
367 fmi = mp->mnt_transinfo;
368 KASSERT(fmi != NULL);
369 fli->fli_mount = mp;
370 fli->fli_mountinfo = fmi;
371 fmi->fmi_ref_cnt += 1;
372 mp = mp->mnt_lower;
373 mutex_exit(&fstrans_mount_lock);
374
375 if (mp) {
376 fli2 = fstrans_alloc_lwp_info(mp);
377 fli->fli_alias = fli2->fli_mount;
378
379 fli = fli2;
380 }
381
382 return fli;
383 }
384
385 /*
386 * Retrieve the per lwp info for this mount allocating if necessary.
387 */
388 static inline struct fstrans_lwp_info *
389 fstrans_get_lwp_info(struct mount *mp, bool do_alloc)
390 {
391 struct fstrans_lwp_info *fli, *fli2;
392
393 /*
394 * Scan our list for a match.
395 */
396 for (fli = curlwp->l_fstrans; fli; fli = fli->fli_succ) {
397 if (fli->fli_mount == mp) {
398 if (fli->fli_alias != NULL) {
399 for (fli2 = curlwp->l_fstrans; fli2;
400 fli2 = fli2->fli_succ) {
401 if (fli2->fli_mount == fli->fli_alias)
402 break;
403 }
404 KASSERT(fli2 != NULL);
405 fli = fli2;
406 }
407 break;
408 }
409 }
410
411 if (do_alloc) {
412 if (__predict_false(fli == NULL))
413 fli = fstrans_alloc_lwp_info(mp);
414 KASSERT(fli != NULL && !fli->fli_mountinfo->fmi_gone);
415 } else {
416 KASSERT(fli != NULL);
417 }
418
419 return fli;
420 }
421
422 /*
423 * Check if this lock type is granted at this state.
424 */
425 static bool
426 grant_lock(const enum fstrans_state state, const enum fstrans_lock_type type)
427 {
428
429 if (__predict_true(state == FSTRANS_NORMAL))
430 return true;
431 if (type == FSTRANS_EXCL)
432 return true;
433 if (state == FSTRANS_SUSPENDING && type == FSTRANS_LAZY)
434 return true;
435
436 return false;
437 }
438
439 /*
440 * Start a transaction. If this thread already has a transaction on this
441 * file system increment the reference counter.
442 */
443 static inline int
444 _fstrans_start(struct mount *mp, enum fstrans_lock_type lock_type, int wait)
445 {
446 int s;
447 struct fstrans_lwp_info *fli;
448 struct fstrans_mount_info *fmi;
449
450 #ifndef FSTRANS_DEAD_ENABLED
451 if (mp == dead_rootmount)
452 return 0;
453 #endif
454
455 ASSERT_SLEEPABLE();
456
457 fli = fstrans_get_lwp_info(mp, true);
458 fmi = fli->fli_mountinfo;
459
460 if (fli->fli_trans_cnt > 0) {
461 KASSERT(lock_type != FSTRANS_EXCL);
462 fli->fli_trans_cnt += 1;
463
464 return 0;
465 }
466
467 s = pserialize_read_enter();
468 if (__predict_true(grant_lock(fmi->fmi_state, lock_type))) {
469 fli->fli_trans_cnt = 1;
470 fli->fli_lock_type = lock_type;
471 pserialize_read_exit(s);
472
473 return 0;
474 }
475 pserialize_read_exit(s);
476
477 if (! wait)
478 return EBUSY;
479
480 mutex_enter(&fstrans_lock);
481 while (! grant_lock(fmi->fmi_state, lock_type))
482 cv_wait(&fstrans_state_cv, &fstrans_lock);
483 fli->fli_trans_cnt = 1;
484 fli->fli_lock_type = lock_type;
485 mutex_exit(&fstrans_lock);
486
487 return 0;
488 }
489
490 void
491 fstrans_start(struct mount *mp)
492 {
493 int error __diagused;
494
495 error = _fstrans_start(mp, FSTRANS_SHARED, 1);
496 KASSERT(error == 0);
497 }
498
499 int
500 fstrans_start_nowait(struct mount *mp)
501 {
502
503 return _fstrans_start(mp, FSTRANS_SHARED, 0);
504 }
505
506 void
507 fstrans_start_lazy(struct mount *mp)
508 {
509 int error __diagused;
510
511 error = _fstrans_start(mp, FSTRANS_LAZY, 1);
512 KASSERT(error == 0);
513 }
514
515 /*
516 * Finish a transaction.
517 */
518 void
519 fstrans_done(struct mount *mp)
520 {
521 int s;
522 struct fstrans_lwp_info *fli;
523 struct fstrans_mount_info *fmi;
524
525 #ifndef FSTRANS_DEAD_ENABLED
526 if (mp == dead_rootmount)
527 return;
528 #endif
529
530 fli = fstrans_get_lwp_info(mp, false);
531 fmi = fli->fli_mountinfo;
532 KASSERT(fli->fli_trans_cnt > 0);
533
534 if (fli->fli_trans_cnt > 1) {
535 fli->fli_trans_cnt -= 1;
536
537 return;
538 }
539
540 if (__predict_false(fstrans_gone_count > 0))
541 fstrans_clear_lwp_info();
542
543 s = pserialize_read_enter();
544 if (__predict_true(fmi->fmi_state == FSTRANS_NORMAL)) {
545 fli->fli_trans_cnt = 0;
546 pserialize_read_exit(s);
547
548 return;
549 }
550 pserialize_read_exit(s);
551
552 mutex_enter(&fstrans_lock);
553 fli->fli_trans_cnt = 0;
554 cv_signal(&fstrans_count_cv);
555 mutex_exit(&fstrans_lock);
556 }
557
558 /*
559 * Check if this thread has an exclusive lock.
560 */
561 int
562 fstrans_is_owner(struct mount *mp)
563 {
564 struct fstrans_lwp_info *fli;
565
566 KASSERT(mp != dead_rootmount);
567
568 fli = fstrans_get_lwp_info(mp, true);
569
570 if (fli->fli_trans_cnt == 0)
571 return 0;
572
573 return (fli->fli_lock_type == FSTRANS_EXCL);
574 }
575
576 /*
577 * True, if no thread is in a transaction not granted at the current state.
578 */
579 static bool
580 state_change_done(const struct fstrans_mount_info *fmi)
581 {
582 struct fstrans_lwp_info *fli;
583
584 KASSERT(mutex_owned(&fstrans_lock));
585
586 LIST_FOREACH(fli, &fstrans_fli_head, fli_list) {
587 if (fli->fli_mountinfo != fmi)
588 continue;
589 if (fli->fli_trans_cnt == 0)
590 continue;
591 if (grant_lock(fmi->fmi_state, fli->fli_lock_type))
592 continue;
593
594 return false;
595 }
596
597 return true;
598 }
599
600 /*
601 * Set new file system state.
602 */
603 int
604 fstrans_setstate(struct mount *mp, enum fstrans_state new_state)
605 {
606 int error;
607 enum fstrans_state old_state;
608 struct fstrans_lwp_info *fli;
609 struct fstrans_mount_info *fmi;
610
611 KASSERT(mp != dead_rootmount);
612
613 fli = fstrans_get_lwp_info(mp, true);
614 fmi = fli->fli_mountinfo;
615 old_state = fmi->fmi_state;
616 if (old_state == new_state)
617 return 0;
618
619 mutex_enter(&fstrans_lock);
620 fmi->fmi_state = new_state;
621 pserialize_perform(fstrans_psz);
622
623 /*
624 * All threads see the new state now.
625 * Wait for transactions invalid at this state to leave.
626 */
627 error = 0;
628 while (! state_change_done(fmi)) {
629 error = cv_wait_sig(&fstrans_count_cv, &fstrans_lock);
630 if (error) {
631 new_state = fmi->fmi_state = FSTRANS_NORMAL;
632 break;
633 }
634 }
635 cv_broadcast(&fstrans_state_cv);
636 mutex_exit(&fstrans_lock);
637
638 if (old_state != new_state) {
639 if (old_state == FSTRANS_NORMAL)
640 _fstrans_start(mp, FSTRANS_EXCL, 1);
641 if (new_state == FSTRANS_NORMAL)
642 fstrans_done(mp);
643 }
644
645 return error;
646 }
647
648 /*
649 * Get current file system state.
650 */
651 enum fstrans_state
652 fstrans_getstate(struct mount *mp)
653 {
654 struct fstrans_lwp_info *fli;
655 struct fstrans_mount_info *fmi;
656
657 KASSERT(mp != dead_rootmount);
658
659 fli = fstrans_get_lwp_info(mp, true);
660 fmi = fli->fli_mountinfo;
661
662 return fmi->fmi_state;
663 }
664
665 /*
666 * Request a filesystem to suspend all operations.
667 */
668 int
669 vfs_suspend(struct mount *mp, int nowait)
670 {
671 struct fstrans_lwp_info *fli;
672 int error;
673
674 if (mp == dead_rootmount)
675 return EOPNOTSUPP;
676
677 fli = fstrans_get_lwp_info(mp, true);
678 mp = fli->fli_mount;
679
680 if (nowait) {
681 if (!mutex_tryenter(&vfs_suspend_lock))
682 return EWOULDBLOCK;
683 } else
684 mutex_enter(&vfs_suspend_lock);
685
686 if ((error = VFS_SUSPENDCTL(mp, SUSPEND_SUSPEND)) != 0)
687 mutex_exit(&vfs_suspend_lock);
688
689 return error;
690 }
691
692 /*
693 * Request a filesystem to resume all operations.
694 */
695 void
696 vfs_resume(struct mount *mp)
697 {
698 struct fstrans_lwp_info *fli;
699
700 KASSERT(mp != dead_rootmount);
701
702 fli = fstrans_get_lwp_info(mp, false);
703 mp = fli->fli_mount;
704
705 VFS_SUSPENDCTL(mp, SUSPEND_RESUME);
706 mutex_exit(&vfs_suspend_lock);
707 }
708
709
710 /*
711 * True, if no thread is running a cow handler.
712 */
713 static bool
714 cow_state_change_done(const struct fstrans_mount_info *fmi)
715 {
716 struct fstrans_lwp_info *fli;
717
718 KASSERT(mutex_owned(&fstrans_lock));
719 KASSERT(fmi->fmi_cow_change);
720
721 LIST_FOREACH(fli, &fstrans_fli_head, fli_list) {
722 if (fli->fli_mount != fmi->fmi_mount)
723 continue;
724 if (fli->fli_cow_cnt == 0)
725 continue;
726
727 return false;
728 }
729
730 return true;
731 }
732
733 /*
734 * Prepare for changing this mounts cow list.
735 * Returns with fstrans_lock locked.
736 */
737 static void
738 cow_change_enter(struct fstrans_mount_info *fmi)
739 {
740
741 mutex_enter(&fstrans_lock);
742
743 /*
744 * Wait for other threads changing the list.
745 */
746 while (fmi->fmi_cow_change)
747 cv_wait(&fstrans_state_cv, &fstrans_lock);
748
749 /*
750 * Wait until all threads are aware of a state change.
751 */
752 fmi->fmi_cow_change = true;
753 pserialize_perform(fstrans_psz);
754
755 while (! cow_state_change_done(fmi))
756 cv_wait(&fstrans_count_cv, &fstrans_lock);
757 }
758
759 /*
760 * Done changing this mounts cow list.
761 */
762 static void
763 cow_change_done(struct fstrans_mount_info *fmi)
764 {
765
766 KASSERT(mutex_owned(&fstrans_lock));
767
768 fmi->fmi_cow_change = false;
769 pserialize_perform(fstrans_psz);
770
771 cv_broadcast(&fstrans_state_cv);
772
773 mutex_exit(&fstrans_lock);
774 }
775
776 /*
777 * Add a handler to this mount.
778 */
779 int
780 fscow_establish(struct mount *mp, int (*func)(void *, struct buf *, bool),
781 void *arg)
782 {
783 struct fstrans_mount_info *fmi;
784 struct fscow_handler *newch;
785
786 KASSERT(mp != dead_rootmount);
787
788 mutex_enter(&fstrans_mount_lock);
789 fmi = mp->mnt_transinfo;
790 KASSERT(fmi != NULL);
791 fmi->fmi_ref_cnt += 1;
792 mutex_exit(&fstrans_mount_lock);
793
794 newch = kmem_alloc(sizeof(*newch), KM_SLEEP);
795 newch->ch_func = func;
796 newch->ch_arg = arg;
797
798 cow_change_enter(fmi);
799 LIST_INSERT_HEAD(&fmi->fmi_cow_handler, newch, ch_list);
800 cow_change_done(fmi);
801
802 return 0;
803 }
804
805 /*
806 * Remove a handler from this mount.
807 */
808 int
809 fscow_disestablish(struct mount *mp, int (*func)(void *, struct buf *, bool),
810 void *arg)
811 {
812 struct fstrans_mount_info *fmi;
813 struct fscow_handler *hp = NULL;
814
815 KASSERT(mp != dead_rootmount);
816
817 fmi = mp->mnt_transinfo;
818 KASSERT(fmi != NULL);
819
820 cow_change_enter(fmi);
821 LIST_FOREACH(hp, &fmi->fmi_cow_handler, ch_list)
822 if (hp->ch_func == func && hp->ch_arg == arg)
823 break;
824 if (hp != NULL) {
825 LIST_REMOVE(hp, ch_list);
826 kmem_free(hp, sizeof(*hp));
827 }
828 cow_change_done(fmi);
829
830 fstrans_mount_dtor(fmi);
831
832 return hp ? 0 : EINVAL;
833 }
834
835 /*
836 * Check for need to copy block that is about to be written.
837 */
838 int
839 fscow_run(struct buf *bp, bool data_valid)
840 {
841 int error, s;
842 struct mount *mp;
843 struct fstrans_lwp_info *fli;
844 struct fstrans_mount_info *fmi;
845 struct fscow_handler *hp;
846
847 /*
848 * First check if we need run the copy-on-write handler.
849 */
850 if ((bp->b_flags & B_COWDONE))
851 return 0;
852 if (bp->b_vp == NULL) {
853 bp->b_flags |= B_COWDONE;
854 return 0;
855 }
856 if (bp->b_vp->v_type == VBLK)
857 mp = spec_node_getmountedfs(bp->b_vp);
858 else
859 mp = bp->b_vp->v_mount;
860 if (mp == NULL || mp == dead_rootmount) {
861 bp->b_flags |= B_COWDONE;
862 return 0;
863 }
864
865 fli = fstrans_get_lwp_info(mp, true);
866 fmi = fli->fli_mountinfo;
867
868 /*
869 * On non-recursed run check if other threads
870 * want to change the list.
871 */
872 if (fli->fli_cow_cnt == 0) {
873 s = pserialize_read_enter();
874 if (__predict_false(fmi->fmi_cow_change)) {
875 pserialize_read_exit(s);
876 mutex_enter(&fstrans_lock);
877 while (fmi->fmi_cow_change)
878 cv_wait(&fstrans_state_cv, &fstrans_lock);
879 fli->fli_cow_cnt = 1;
880 mutex_exit(&fstrans_lock);
881 } else {
882 fli->fli_cow_cnt = 1;
883 pserialize_read_exit(s);
884 }
885 } else
886 fli->fli_cow_cnt += 1;
887
888 /*
889 * Run all copy-on-write handlers, stop on error.
890 */
891 error = 0;
892 LIST_FOREACH(hp, &fmi->fmi_cow_handler, ch_list)
893 if ((error = (*hp->ch_func)(hp->ch_arg, bp, data_valid)) != 0)
894 break;
895 if (error == 0)
896 bp->b_flags |= B_COWDONE;
897
898 /*
899 * Check if other threads want to change the list.
900 */
901 if (fli->fli_cow_cnt > 1) {
902 fli->fli_cow_cnt -= 1;
903 } else {
904 s = pserialize_read_enter();
905 if (__predict_false(fmi->fmi_cow_change)) {
906 pserialize_read_exit(s);
907 mutex_enter(&fstrans_lock);
908 fli->fli_cow_cnt = 0;
909 cv_signal(&fstrans_count_cv);
910 mutex_exit(&fstrans_lock);
911 } else {
912 fli->fli_cow_cnt = 0;
913 pserialize_read_exit(s);
914 }
915 }
916
917 return error;
918 }
919
920 #if defined(DDB)
921 void fstrans_dump(int);
922
923 static void
924 fstrans_print_lwp(struct proc *p, struct lwp *l, int verbose)
925 {
926 char prefix[9];
927 struct fstrans_lwp_info *fli;
928
929 snprintf(prefix, sizeof(prefix), "%d.%d", p->p_pid, l->l_lid);
930 LIST_FOREACH(fli, &fstrans_fli_head, fli_list) {
931 if (fli->fli_self != l)
932 continue;
933 if (fli->fli_trans_cnt == 0 && fli->fli_cow_cnt == 0) {
934 if (! verbose)
935 continue;
936 }
937 printf("%-8s", prefix);
938 if (verbose)
939 printf(" @%p", fli);
940 if (fli->fli_mount == dead_rootmount)
941 printf(" <dead>");
942 else if (fli->fli_mount != NULL)
943 printf(" (%s)", fli->fli_mount->mnt_stat.f_mntonname);
944 else
945 printf(" NULL");
946 if (fli->fli_alias != NULL)
947 printf(" alias (%s)",
948 fli->fli_alias->mnt_stat.f_mntonname);
949 if (fli->fli_mountinfo && fli->fli_mountinfo->fmi_gone)
950 printf(" gone");
951 if (fli->fli_trans_cnt == 0) {
952 printf(" -");
953 } else {
954 switch (fli->fli_lock_type) {
955 case FSTRANS_LAZY:
956 printf(" lazy");
957 break;
958 case FSTRANS_SHARED:
959 printf(" shared");
960 break;
961 case FSTRANS_EXCL:
962 printf(" excl");
963 break;
964 default:
965 printf(" %#x", fli->fli_lock_type);
966 break;
967 }
968 }
969 printf(" %d cow %d\n", fli->fli_trans_cnt, fli->fli_cow_cnt);
970 prefix[0] = '\0';
971 }
972 }
973
974 static void
975 fstrans_print_mount(struct mount *mp, int verbose)
976 {
977 struct fstrans_mount_info *fmi;
978
979 fmi = mp->mnt_transinfo;
980 if (!verbose && (fmi == NULL || fmi->fmi_state == FSTRANS_NORMAL))
981 return;
982
983 printf("%-16s ", mp->mnt_stat.f_mntonname);
984 if (fmi == NULL) {
985 printf("(null)\n");
986 return;
987 }
988 switch (fmi->fmi_state) {
989 case FSTRANS_NORMAL:
990 printf("state normal\n");
991 break;
992 case FSTRANS_SUSPENDING:
993 printf("state suspending\n");
994 break;
995 case FSTRANS_SUSPENDED:
996 printf("state suspended\n");
997 break;
998 default:
999 printf("state %#x\n", fmi->fmi_state);
1000 break;
1001 }
1002 }
1003
1004 void
1005 fstrans_dump(int full)
1006 {
1007 const struct proclist_desc *pd;
1008 struct proc *p;
1009 struct lwp *l;
1010 struct mount *mp;
1011
1012 printf("Fstrans locks by lwp:\n");
1013 for (pd = proclists; pd->pd_list != NULL; pd++)
1014 PROCLIST_FOREACH(p, pd->pd_list)
1015 LIST_FOREACH(l, &p->p_lwps, l_sibling)
1016 fstrans_print_lwp(p, l, full == 1);
1017
1018 printf("Fstrans state by mount:\n");
1019 for (mp = _mountlist_next(NULL); mp; mp = _mountlist_next(mp))
1020 fstrans_print_mount(mp, full == 1);
1021 }
1022 #endif /* defined(DDB) */
1023