vfs_trans.c revision 1.73 1 /* $NetBSD: vfs_trans.c,v 1.73 2024/12/07 02:27:38 riastradh Exp $ */
2
3 /*-
4 * Copyright (c) 2007, 2020 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Juergen Hannken-Illjes.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 #include <sys/cdefs.h>
33 __KERNEL_RCSID(0, "$NetBSD: vfs_trans.c,v 1.73 2024/12/07 02:27:38 riastradh Exp $");
34
35 /*
36 * File system transaction operations.
37 */
38
39 #ifdef _KERNEL_OPT
40 #include "opt_ddb.h"
41 #endif
42
43 #include <sys/param.h>
44 #include <sys/types.h>
45
46 #include <sys/atomic.h>
47 #include <sys/buf.h>
48 #include <sys/fstrans.h>
49 #include <sys/hash.h>
50 #include <sys/kmem.h>
51 #include <sys/mount.h>
52 #include <sys/pool.h>
53 #include <sys/proc.h>
54 #include <sys/pserialize.h>
55 #include <sys/sdt.h>
56 #include <sys/systm.h>
57 #include <sys/vnode.h>
58
59 #include <miscfs/deadfs/deadfs.h>
60 #include <miscfs/specfs/specdev.h>
61
62 #define FSTRANS_MOUNT_HASHSIZE 32
63
64 enum fstrans_lock_type {
65 FSTRANS_LAZY, /* Granted while not suspended */
66 FSTRANS_SHARED /* Granted while not suspending */
67 };
68
69 struct fscow_handler {
70 LIST_ENTRY(fscow_handler) ch_list;
71 int (*ch_func)(void *, struct buf *, bool);
72 void *ch_arg;
73 };
74 struct fstrans_lwp_info {
75 struct fstrans_lwp_info *fli_succ;
76 struct lwp *fli_self;
77 struct mount *fli_mount;
78 struct fstrans_lwp_info *fli_alias;
79 struct fstrans_mount_info *fli_mountinfo;
80 int fli_trans_cnt;
81 int fli_alias_cnt;
82 int fli_cow_cnt;
83 enum fstrans_lock_type fli_lock_type;
84 LIST_ENTRY(fstrans_lwp_info) fli_list;
85 };
86 struct fstrans_mount_info {
87 enum fstrans_state fmi_state;
88 unsigned int fmi_ref_cnt;
89 bool fmi_gone;
90 bool fmi_cow_change;
91 SLIST_ENTRY(fstrans_mount_info) fmi_hash;
92 LIST_HEAD(, fscow_handler) fmi_cow_handler;
93 struct mount *fmi_mount;
94 struct fstrans_mount_info *fmi_lower_info;
95 struct lwp *fmi_owner;
96 };
97 SLIST_HEAD(fstrans_mount_hashhead, fstrans_mount_info);
98
99 static kmutex_t vfs_suspend_lock /* Serialize suspensions. */
100 __cacheline_aligned;
101 static kmutex_t fstrans_lock /* Fstrans big lock. */
102 __cacheline_aligned;
103 static kcondvar_t fstrans_state_cv; /* Fstrans or cow state changed. */
104 static kcondvar_t fstrans_count_cv; /* Fstrans or cow count changed. */
105 static pserialize_t fstrans_psz; /* Pserialize state. */
106 static LIST_HEAD(fstrans_lwp_head, fstrans_lwp_info) fstrans_fli_head;
107 /* List of all fstrans_lwp_info. */
108 static pool_cache_t fstrans_lwp_cache; /* Cache of fstrans_lwp_info. */
109
110 static u_long fstrans_mount_hashmask;
111 static struct fstrans_mount_hashhead *fstrans_mount_hashtab;
112 static int fstrans_gone_count; /* Number of fstrans_mount_info gone. */
113
114 static inline uint32_t fstrans_mount_hash(struct mount *);
115 static inline struct fstrans_mount_info *fstrans_mount_get(struct mount *);
116 static void fstrans_mount_dtor(struct fstrans_mount_info *);
117 static void fstrans_clear_lwp_info(void);
118 static inline struct fstrans_lwp_info *
119 fstrans_get_lwp_info(struct mount *, bool);
120 static struct fstrans_lwp_info *fstrans_alloc_lwp_info(struct mount *);
121 static int fstrans_lwp_pcc(void *, void *, int);
122 static void fstrans_lwp_pcd(void *, void *);
123 static inline int _fstrans_start(struct mount *, enum fstrans_lock_type, int);
124 static bool grant_lock(const struct fstrans_mount_info *,
125 const enum fstrans_lock_type);
126 static bool state_change_done(const struct fstrans_mount_info *);
127 static bool cow_state_change_done(const struct fstrans_mount_info *);
128 static void cow_change_enter(struct fstrans_mount_info *);
129 static void cow_change_done(struct fstrans_mount_info *);
130
131 /*
132 * Initialize.
133 */
134 void
135 fstrans_init(void)
136 {
137
138 mutex_init(&vfs_suspend_lock, MUTEX_DEFAULT, IPL_NONE);
139 mutex_init(&fstrans_lock, MUTEX_DEFAULT, IPL_NONE);
140 cv_init(&fstrans_state_cv, "fstchg");
141 cv_init(&fstrans_count_cv, "fstcnt");
142 fstrans_psz = pserialize_create();
143 LIST_INIT(&fstrans_fli_head);
144 fstrans_lwp_cache = pool_cache_init(sizeof(struct fstrans_lwp_info),
145 coherency_unit, 0, 0, "fstlwp", NULL, IPL_NONE,
146 fstrans_lwp_pcc, fstrans_lwp_pcd, NULL);
147 KASSERT(fstrans_lwp_cache != NULL);
148 fstrans_mount_hashtab = hashinit(FSTRANS_MOUNT_HASHSIZE, HASH_SLIST,
149 true, &fstrans_mount_hashmask);
150 }
151
152 /*
153 * pool_cache constructor for fstrans_lwp_info. Updating the global list
154 * produces cache misses on MP. Minimise by keeping free entries on list.
155 */
156 int
157 fstrans_lwp_pcc(void *arg, void *obj, int flags)
158 {
159 struct fstrans_lwp_info *fli = obj;
160
161 memset(fli, 0, sizeof(*fli));
162
163 mutex_enter(&fstrans_lock);
164 LIST_INSERT_HEAD(&fstrans_fli_head, fli, fli_list);
165 mutex_exit(&fstrans_lock);
166
167 return 0;
168 }
169
170 /*
171 * pool_cache destructor
172 */
173 void
174 fstrans_lwp_pcd(void *arg, void *obj)
175 {
176 struct fstrans_lwp_info *fli = obj;
177
178 mutex_enter(&fstrans_lock);
179 LIST_REMOVE(fli, fli_list);
180 mutex_exit(&fstrans_lock);
181 }
182
183 /*
184 * Deallocate lwp state.
185 */
186 void
187 fstrans_lwp_dtor(lwp_t *l)
188 {
189 struct fstrans_lwp_info *fli, *fli_next;
190
191 if (l->l_fstrans == NULL)
192 return;
193
194 mutex_enter(&fstrans_lock);
195 for (fli = l->l_fstrans; fli; fli = fli_next) {
196 KASSERT(fli->fli_trans_cnt == 0);
197 KASSERT(fli->fli_cow_cnt == 0);
198 KASSERT(fli->fli_self == l);
199 if (fli->fli_mount != NULL)
200 fstrans_mount_dtor(fli->fli_mountinfo);
201 fli_next = fli->fli_succ;
202 fli->fli_alias_cnt = 0;
203 fli->fli_mount = NULL;
204 fli->fli_alias = NULL;
205 fli->fli_mountinfo = NULL;
206 fli->fli_self = NULL;
207 }
208 mutex_exit(&fstrans_lock);
209
210 for (fli = l->l_fstrans; fli; fli = fli_next) {
211 fli_next = fli->fli_succ;
212 pool_cache_put(fstrans_lwp_cache, fli);
213 }
214 l->l_fstrans = NULL;
215 }
216
217 /*
218 * mount pointer to hash
219 */
220 static inline uint32_t
221 fstrans_mount_hash(struct mount *mp)
222 {
223
224 return hash32_buf(&mp, sizeof(mp), HASH32_BUF_INIT) &
225 fstrans_mount_hashmask;
226 }
227
228 /*
229 * retrieve fstrans_mount_info by mount or NULL
230 */
231 static inline struct fstrans_mount_info *
232 fstrans_mount_get(struct mount *mp)
233 {
234 uint32_t indx;
235 struct fstrans_mount_info *fmi, *fmi_lower;
236
237 KASSERT(mutex_owned(&fstrans_lock));
238
239 indx = fstrans_mount_hash(mp);
240 SLIST_FOREACH(fmi, &fstrans_mount_hashtab[indx], fmi_hash) {
241 if (fmi->fmi_mount == mp) {
242 if (__predict_false(mp->mnt_lower != NULL &&
243 fmi->fmi_lower_info == NULL)) {
244 /*
245 * Intern the lower/lowest mount into
246 * this mount info on first lookup.
247 */
248 KASSERT(fmi->fmi_ref_cnt == 1);
249
250 fmi_lower = fstrans_mount_get(mp->mnt_lower);
251 if (fmi_lower && fmi_lower->fmi_lower_info)
252 fmi_lower = fmi_lower->fmi_lower_info;
253 if (fmi_lower == NULL)
254 return NULL;
255 fmi->fmi_lower_info = fmi_lower;
256 fmi->fmi_lower_info->fmi_ref_cnt += 1;
257 }
258 return fmi;
259 }
260 }
261
262 return NULL;
263 }
264
265 /*
266 * Dereference mount state.
267 */
268 static void
269 fstrans_mount_dtor(struct fstrans_mount_info *fmi)
270 {
271
272 KASSERT(mutex_owned(&fstrans_lock));
273
274 KASSERT(fmi != NULL);
275 fmi->fmi_ref_cnt -= 1;
276 if (__predict_true(fmi->fmi_ref_cnt > 0)) {
277 return;
278 }
279
280 KASSERT(fmi->fmi_state == FSTRANS_NORMAL);
281 KASSERT(LIST_FIRST(&fmi->fmi_cow_handler) == NULL);
282 KASSERT(fmi->fmi_owner == NULL);
283
284 if (fmi->fmi_lower_info)
285 fstrans_mount_dtor(fmi->fmi_lower_info);
286
287 KASSERT(fstrans_gone_count > 0);
288 fstrans_gone_count -= 1;
289
290 KASSERT(fmi->fmi_mount->mnt_lower == NULL);
291
292 kmem_free(fmi->fmi_mount, sizeof(*fmi->fmi_mount));
293 kmem_free(fmi, sizeof(*fmi));
294 }
295
296 /*
297 * Allocate mount state.
298 */
299 int
300 fstrans_mount(struct mount *mp)
301 {
302 uint32_t indx;
303 struct fstrans_mount_info *newfmi;
304
305 indx = fstrans_mount_hash(mp);
306
307 newfmi = kmem_alloc(sizeof(*newfmi), KM_SLEEP);
308 newfmi->fmi_state = FSTRANS_NORMAL;
309 newfmi->fmi_ref_cnt = 1;
310 newfmi->fmi_gone = false;
311 LIST_INIT(&newfmi->fmi_cow_handler);
312 newfmi->fmi_cow_change = false;
313 newfmi->fmi_mount = mp;
314 newfmi->fmi_lower_info = NULL;
315 newfmi->fmi_owner = NULL;
316
317 mutex_enter(&fstrans_lock);
318 SLIST_INSERT_HEAD(&fstrans_mount_hashtab[indx], newfmi, fmi_hash);
319 mutex_exit(&fstrans_lock);
320
321 return 0;
322 }
323
324 /*
325 * Deallocate mount state.
326 */
327 void
328 fstrans_unmount(struct mount *mp)
329 {
330 uint32_t indx;
331 struct fstrans_mount_info *fmi;
332
333 indx = fstrans_mount_hash(mp);
334
335 mutex_enter(&fstrans_lock);
336 fmi = fstrans_mount_get(mp);
337 KASSERT(fmi != NULL);
338 fmi->fmi_gone = true;
339 SLIST_REMOVE(&fstrans_mount_hashtab[indx],
340 fmi, fstrans_mount_info, fmi_hash);
341 fstrans_gone_count += 1;
342 fstrans_mount_dtor(fmi);
343 mutex_exit(&fstrans_lock);
344 }
345
346 /*
347 * Clear mount entries whose mount is gone.
348 */
349 static void
350 fstrans_clear_lwp_info(void)
351 {
352 struct fstrans_lwp_info **p, *fli, *tofree = NULL;
353
354 /*
355 * Scan our list clearing entries whose mount is gone.
356 */
357 mutex_enter(&fstrans_lock);
358 for (p = &curlwp->l_fstrans; *p; ) {
359 fli = *p;
360 if (fli->fli_mount != NULL &&
361 fli->fli_mountinfo->fmi_gone &&
362 fli->fli_trans_cnt == 0 &&
363 fli->fli_cow_cnt == 0 &&
364 fli->fli_alias_cnt == 0) {
365 *p = (*p)->fli_succ;
366 fstrans_mount_dtor(fli->fli_mountinfo);
367 if (fli->fli_alias) {
368 KASSERT(fli->fli_alias->fli_alias_cnt > 0);
369 fli->fli_alias->fli_alias_cnt--;
370 }
371 fli->fli_mount = NULL;
372 fli->fli_alias = NULL;
373 fli->fli_mountinfo = NULL;
374 fli->fli_self = NULL;
375 p = &curlwp->l_fstrans;
376 fli->fli_succ = tofree;
377 tofree = fli;
378 } else {
379 p = &(*p)->fli_succ;
380 }
381 }
382 #ifdef DIAGNOSTIC
383 for (fli = curlwp->l_fstrans; fli; fli = fli->fli_succ)
384 if (fli->fli_alias != NULL)
385 KASSERT(fli->fli_alias->fli_self == curlwp);
386 #endif /* DIAGNOSTIC */
387 mutex_exit(&fstrans_lock);
388
389 while (tofree != NULL) {
390 fli = tofree;
391 tofree = fli->fli_succ;
392 pool_cache_put(fstrans_lwp_cache, fli);
393 }
394 }
395
396 /*
397 * Allocate and return per lwp info for this mount.
398 */
399 static struct fstrans_lwp_info *
400 fstrans_alloc_lwp_info(struct mount *mp)
401 {
402 struct fstrans_lwp_info *fli, *fli_lower;
403 struct fstrans_mount_info *fmi;
404
405 for (fli = curlwp->l_fstrans; fli; fli = fli->fli_succ) {
406 if (fli->fli_mount == mp)
407 return fli;
408 }
409
410 /*
411 * Lookup mount info and get lower mount per lwp info.
412 */
413 mutex_enter(&fstrans_lock);
414 fmi = fstrans_mount_get(mp);
415 if (fmi == NULL) {
416 mutex_exit(&fstrans_lock);
417 return NULL;
418 }
419 fmi->fmi_ref_cnt += 1;
420 mutex_exit(&fstrans_lock);
421
422 if (fmi->fmi_lower_info) {
423 fli_lower =
424 fstrans_alloc_lwp_info(fmi->fmi_lower_info->fmi_mount);
425 if (fli_lower == NULL) {
426 mutex_enter(&fstrans_lock);
427 fstrans_mount_dtor(fmi);
428 mutex_exit(&fstrans_lock);
429
430 return NULL;
431 }
432 } else {
433 fli_lower = NULL;
434 }
435
436 /*
437 * Allocate a new entry.
438 */
439 fli = pool_cache_get(fstrans_lwp_cache, PR_WAITOK);
440 KASSERT(fli->fli_trans_cnt == 0);
441 KASSERT(fli->fli_cow_cnt == 0);
442 KASSERT(fli->fli_alias_cnt == 0);
443 KASSERT(fli->fli_mount == NULL);
444 KASSERT(fli->fli_alias == NULL);
445 KASSERT(fli->fli_mountinfo == NULL);
446 KASSERT(fli->fli_self == NULL);
447
448 /*
449 * Attach the mount info and alias.
450 */
451
452 fli->fli_self = curlwp;
453 fli->fli_mount = mp;
454 fli->fli_mountinfo = fmi;
455
456 fli->fli_succ = curlwp->l_fstrans;
457 curlwp->l_fstrans = fli;
458
459 if (fli_lower) {
460 fli->fli_alias = fli_lower;
461 fli->fli_alias->fli_alias_cnt++;
462 fli = fli->fli_alias;
463 }
464
465 return fli;
466 }
467
468 /*
469 * Retrieve the per lwp info for this mount allocating if necessary.
470 */
471 static inline struct fstrans_lwp_info *
472 fstrans_get_lwp_info(struct mount *mp, bool do_alloc)
473 {
474 struct fstrans_lwp_info *fli;
475
476 /*
477 * Scan our list for a match.
478 */
479 for (fli = curlwp->l_fstrans; fli; fli = fli->fli_succ) {
480 if (fli->fli_mount == mp) {
481 KASSERT(mp->mnt_lower == NULL ||
482 fli->fli_alias != NULL);
483 if (fli->fli_alias != NULL)
484 fli = fli->fli_alias;
485 break;
486 }
487 }
488
489 if (do_alloc) {
490 if (__predict_false(fli == NULL))
491 fli = fstrans_alloc_lwp_info(mp);
492 }
493
494 return fli;
495 }
496
497 /*
498 * Check if this lock type is granted at this state.
499 */
500 static bool
501 grant_lock(const struct fstrans_mount_info *fmi,
502 const enum fstrans_lock_type type)
503 {
504
505 if (__predict_true(fmi->fmi_state == FSTRANS_NORMAL))
506 return true;
507 if (fmi->fmi_owner == curlwp)
508 return true;
509 if (fmi->fmi_state == FSTRANS_SUSPENDING && type == FSTRANS_LAZY)
510 return true;
511
512 return false;
513 }
514
515 /*
516 * Start a transaction. If this thread already has a transaction on this
517 * file system increment the reference counter.
518 */
519 static inline int
520 _fstrans_start(struct mount *mp, enum fstrans_lock_type lock_type, int wait)
521 {
522 int s;
523 struct fstrans_lwp_info *fli;
524 struct fstrans_mount_info *fmi;
525
526 ASSERT_SLEEPABLE();
527
528 fli = fstrans_get_lwp_info(mp, true);
529 if (fli == NULL)
530 return 0;
531 fmi = fli->fli_mountinfo;
532
533 if (fli->fli_trans_cnt > 0) {
534 fli->fli_trans_cnt += 1;
535
536 return 0;
537 }
538
539 s = pserialize_read_enter();
540 if (__predict_true(grant_lock(fmi, lock_type))) {
541 fli->fli_trans_cnt = 1;
542 fli->fli_lock_type = lock_type;
543 pserialize_read_exit(s);
544
545 return 0;
546 }
547 pserialize_read_exit(s);
548
549 if (! wait)
550 return SET_ERROR(EBUSY);
551
552 mutex_enter(&fstrans_lock);
553 while (! grant_lock(fmi, lock_type))
554 cv_wait(&fstrans_state_cv, &fstrans_lock);
555 fli->fli_trans_cnt = 1;
556 fli->fli_lock_type = lock_type;
557 mutex_exit(&fstrans_lock);
558
559 return 0;
560 }
561
562 void
563 fstrans_start(struct mount *mp)
564 {
565 int error __diagused;
566
567 error = _fstrans_start(mp, FSTRANS_SHARED, 1);
568 KASSERT(error == 0);
569 }
570
571 int
572 fstrans_start_nowait(struct mount *mp)
573 {
574
575 return _fstrans_start(mp, FSTRANS_SHARED, 0);
576 }
577
578 void
579 fstrans_start_lazy(struct mount *mp)
580 {
581 int error __diagused;
582
583 error = _fstrans_start(mp, FSTRANS_LAZY, 1);
584 KASSERT(error == 0);
585 }
586
587 /*
588 * Finish a transaction.
589 */
590 void
591 fstrans_done(struct mount *mp)
592 {
593 int s;
594 struct fstrans_lwp_info *fli;
595 struct fstrans_mount_info *fmi;
596
597 fli = fstrans_get_lwp_info(mp, false);
598 if (fli == NULL)
599 return;
600 fmi = fli->fli_mountinfo;
601 KASSERT(fli->fli_trans_cnt > 0);
602
603 if (fli->fli_trans_cnt > 1) {
604 fli->fli_trans_cnt -= 1;
605
606 return;
607 }
608
609 if (__predict_false(fstrans_gone_count > 0))
610 fstrans_clear_lwp_info();
611
612 s = pserialize_read_enter();
613 if (__predict_true(fmi->fmi_state == FSTRANS_NORMAL)) {
614 fli->fli_trans_cnt = 0;
615 pserialize_read_exit(s);
616
617 return;
618 }
619 pserialize_read_exit(s);
620
621 mutex_enter(&fstrans_lock);
622 fli->fli_trans_cnt = 0;
623 cv_signal(&fstrans_count_cv);
624 mutex_exit(&fstrans_lock);
625 }
626
627 /*
628 * Check if we hold an lock.
629 */
630 int
631 fstrans_held(struct mount *mp)
632 {
633 struct fstrans_lwp_info *fli;
634 struct fstrans_mount_info *fmi;
635
636 KASSERT(mp != dead_rootmount);
637
638 fli = fstrans_get_lwp_info(mp, false);
639 if (fli == NULL)
640 return 0;
641 fmi = fli->fli_mountinfo;
642
643 return (fli->fli_trans_cnt > 0 || fmi->fmi_owner == curlwp);
644 }
645
646 /*
647 * Check if this thread has an exclusive lock.
648 */
649 int
650 fstrans_is_owner(struct mount *mp)
651 {
652 struct fstrans_lwp_info *fli;
653 struct fstrans_mount_info *fmi;
654
655 KASSERT(mp != dead_rootmount);
656
657 fli = fstrans_get_lwp_info(mp, false);
658 if (fli == NULL)
659 return 0;
660 fmi = fli->fli_mountinfo;
661
662 return (fmi->fmi_owner == curlwp);
663 }
664
665 /*
666 * True, if no thread is in a transaction not granted at the current state.
667 */
668 static bool
669 state_change_done(const struct fstrans_mount_info *fmi)
670 {
671 struct fstrans_lwp_info *fli;
672
673 KASSERT(mutex_owned(&fstrans_lock));
674
675 LIST_FOREACH(fli, &fstrans_fli_head, fli_list) {
676 if (fli->fli_mountinfo != fmi)
677 continue;
678 if (fli->fli_trans_cnt == 0)
679 continue;
680 if (fli->fli_self == curlwp)
681 continue;
682 if (grant_lock(fmi, fli->fli_lock_type))
683 continue;
684
685 return false;
686 }
687
688 return true;
689 }
690
691 /*
692 * Set new file system state.
693 */
694 int
695 fstrans_setstate(struct mount *mp, enum fstrans_state new_state)
696 {
697 int error;
698 enum fstrans_state old_state;
699 struct fstrans_lwp_info *fli;
700 struct fstrans_mount_info *fmi;
701
702 KASSERT(mp != dead_rootmount);
703
704 fli = fstrans_get_lwp_info(mp, true);
705 if (fli == NULL)
706 return SET_ERROR(ENOENT);
707 fmi = fli->fli_mountinfo;
708 old_state = fmi->fmi_state;
709 if (old_state == new_state)
710 return 0;
711
712 mutex_enter(&fstrans_lock);
713 fmi->fmi_state = new_state;
714 pserialize_perform(fstrans_psz);
715
716 /*
717 * All threads see the new state now.
718 * Wait for transactions invalid at this state to leave.
719 */
720 error = 0;
721 while (! state_change_done(fmi)) {
722 error = cv_wait_sig(&fstrans_count_cv, &fstrans_lock);
723 if (error) {
724 new_state = fmi->fmi_state = FSTRANS_NORMAL;
725 break;
726 }
727 }
728 if (old_state != new_state) {
729 if (old_state == FSTRANS_NORMAL) {
730 KASSERT(fmi->fmi_owner == NULL);
731 fmi->fmi_owner = curlwp;
732 }
733 if (new_state == FSTRANS_NORMAL) {
734 KASSERT(fmi->fmi_owner == curlwp);
735 fmi->fmi_owner = NULL;
736 }
737 }
738 cv_broadcast(&fstrans_state_cv);
739 mutex_exit(&fstrans_lock);
740
741 return error;
742 }
743
744 /*
745 * Get current file system state.
746 */
747 enum fstrans_state
748 fstrans_getstate(struct mount *mp)
749 {
750 struct fstrans_lwp_info *fli;
751 struct fstrans_mount_info *fmi;
752
753 KASSERT(mp != dead_rootmount);
754
755 fli = fstrans_get_lwp_info(mp, true);
756 KASSERT(fli != NULL);
757 fmi = fli->fli_mountinfo;
758
759 return fmi->fmi_state;
760 }
761
762 /*
763 * Request a filesystem to suspend all operations.
764 */
765 int
766 vfs_suspend(struct mount *mp, int nowait)
767 {
768 struct fstrans_lwp_info *fli;
769 int error;
770
771 if (mp == dead_rootmount)
772 return SET_ERROR(EOPNOTSUPP);
773
774 fli = fstrans_get_lwp_info(mp, true);
775 if (fli == NULL)
776 return SET_ERROR(ENOENT);
777
778 if (nowait) {
779 if (!mutex_tryenter(&vfs_suspend_lock))
780 return SET_ERROR(EWOULDBLOCK);
781 } else
782 mutex_enter(&vfs_suspend_lock);
783
784 if ((error = VFS_SUSPENDCTL(fli->fli_mount, SUSPEND_SUSPEND)) != 0) {
785 mutex_exit(&vfs_suspend_lock);
786 return error;
787 }
788
789 if ((mp->mnt_iflag & IMNT_GONE) != 0) {
790 vfs_resume(mp);
791 return SET_ERROR(ENOENT);
792 }
793
794 return 0;
795 }
796
797 /*
798 * Request a filesystem to resume all operations.
799 */
800 void
801 vfs_resume(struct mount *mp)
802 {
803 struct fstrans_lwp_info *fli;
804
805 KASSERT(mp != dead_rootmount);
806
807 fli = fstrans_get_lwp_info(mp, false);
808 mp = fli->fli_mount;
809
810 VFS_SUSPENDCTL(mp, SUSPEND_RESUME);
811 mutex_exit(&vfs_suspend_lock);
812 }
813
814 /*
815 * True, if no thread is running a cow handler.
816 */
817 static bool
818 cow_state_change_done(const struct fstrans_mount_info *fmi)
819 {
820 struct fstrans_lwp_info *fli;
821
822 KASSERT(mutex_owned(&fstrans_lock));
823 KASSERT(fmi->fmi_cow_change);
824
825 LIST_FOREACH(fli, &fstrans_fli_head, fli_list) {
826 if (fli->fli_mount != fmi->fmi_mount)
827 continue;
828 if (fli->fli_cow_cnt == 0)
829 continue;
830
831 return false;
832 }
833
834 return true;
835 }
836
837 /*
838 * Prepare for changing this mounts cow list.
839 * Returns with fstrans_lock locked.
840 */
841 static void
842 cow_change_enter(struct fstrans_mount_info *fmi)
843 {
844
845 mutex_enter(&fstrans_lock);
846
847 /*
848 * Wait for other threads changing the list.
849 */
850 while (fmi->fmi_cow_change)
851 cv_wait(&fstrans_state_cv, &fstrans_lock);
852
853 /*
854 * Wait until all threads are aware of a state change.
855 */
856 fmi->fmi_cow_change = true;
857 pserialize_perform(fstrans_psz);
858
859 while (! cow_state_change_done(fmi))
860 cv_wait(&fstrans_count_cv, &fstrans_lock);
861 }
862
863 /*
864 * Done changing this mounts cow list.
865 */
866 static void
867 cow_change_done(struct fstrans_mount_info *fmi)
868 {
869
870 KASSERT(mutex_owned(&fstrans_lock));
871
872 fmi->fmi_cow_change = false;
873 pserialize_perform(fstrans_psz);
874
875 cv_broadcast(&fstrans_state_cv);
876
877 mutex_exit(&fstrans_lock);
878 }
879
880 /*
881 * Add a handler to this mount.
882 */
883 int
884 fscow_establish(struct mount *mp, int (*func)(void *, struct buf *, bool),
885 void *arg)
886 {
887 struct fstrans_mount_info *fmi;
888 struct fscow_handler *newch;
889
890 KASSERT(mp != dead_rootmount);
891
892 mutex_enter(&fstrans_lock);
893 fmi = fstrans_mount_get(mp);
894 KASSERT(fmi != NULL);
895 fmi->fmi_ref_cnt += 1;
896 mutex_exit(&fstrans_lock);
897
898 newch = kmem_alloc(sizeof(*newch), KM_SLEEP);
899 newch->ch_func = func;
900 newch->ch_arg = arg;
901
902 cow_change_enter(fmi);
903 LIST_INSERT_HEAD(&fmi->fmi_cow_handler, newch, ch_list);
904 cow_change_done(fmi);
905
906 return 0;
907 }
908
909 /*
910 * Remove a handler from this mount.
911 */
912 int
913 fscow_disestablish(struct mount *mp, int (*func)(void *, struct buf *, bool),
914 void *arg)
915 {
916 struct fstrans_mount_info *fmi;
917 struct fscow_handler *hp = NULL;
918
919 KASSERT(mp != dead_rootmount);
920
921 mutex_enter(&fstrans_lock);
922 fmi = fstrans_mount_get(mp);
923 KASSERT(fmi != NULL);
924 mutex_exit(&fstrans_lock);
925
926 cow_change_enter(fmi);
927 LIST_FOREACH(hp, &fmi->fmi_cow_handler, ch_list)
928 if (hp->ch_func == func && hp->ch_arg == arg)
929 break;
930 if (hp != NULL) {
931 LIST_REMOVE(hp, ch_list);
932 kmem_free(hp, sizeof(*hp));
933 }
934 fstrans_mount_dtor(fmi);
935 cow_change_done(fmi);
936
937 return hp ? 0 : SET_ERROR(EINVAL);
938 }
939
940 /*
941 * Check for need to copy block that is about to be written.
942 */
943 int
944 fscow_run(struct buf *bp, bool data_valid)
945 {
946 int error, s;
947 struct mount *mp;
948 struct fstrans_lwp_info *fli;
949 struct fstrans_mount_info *fmi;
950 struct fscow_handler *hp;
951
952 /*
953 * First check if we need run the copy-on-write handler.
954 */
955 if ((bp->b_flags & B_COWDONE))
956 return 0;
957 if (bp->b_vp == NULL) {
958 bp->b_flags |= B_COWDONE;
959 return 0;
960 }
961 if (bp->b_vp->v_type == VBLK)
962 mp = spec_node_getmountedfs(bp->b_vp);
963 else
964 mp = bp->b_vp->v_mount;
965 if (mp == NULL || mp == dead_rootmount) {
966 bp->b_flags |= B_COWDONE;
967 return 0;
968 }
969
970 fli = fstrans_get_lwp_info(mp, true);
971 KASSERT(fli != NULL);
972 fmi = fli->fli_mountinfo;
973
974 /*
975 * On non-recursed run check if other threads
976 * want to change the list.
977 */
978 if (fli->fli_cow_cnt == 0) {
979 s = pserialize_read_enter();
980 if (__predict_false(fmi->fmi_cow_change)) {
981 pserialize_read_exit(s);
982 mutex_enter(&fstrans_lock);
983 while (fmi->fmi_cow_change)
984 cv_wait(&fstrans_state_cv, &fstrans_lock);
985 fli->fli_cow_cnt = 1;
986 mutex_exit(&fstrans_lock);
987 } else {
988 fli->fli_cow_cnt = 1;
989 pserialize_read_exit(s);
990 }
991 } else
992 fli->fli_cow_cnt += 1;
993
994 /*
995 * Run all copy-on-write handlers, stop on error.
996 */
997 error = 0;
998 LIST_FOREACH(hp, &fmi->fmi_cow_handler, ch_list)
999 if ((error = (*hp->ch_func)(hp->ch_arg, bp, data_valid)) != 0)
1000 break;
1001 if (error == 0)
1002 bp->b_flags |= B_COWDONE;
1003
1004 /*
1005 * Check if other threads want to change the list.
1006 */
1007 if (fli->fli_cow_cnt > 1) {
1008 fli->fli_cow_cnt -= 1;
1009 } else {
1010 s = pserialize_read_enter();
1011 if (__predict_false(fmi->fmi_cow_change)) {
1012 pserialize_read_exit(s);
1013 mutex_enter(&fstrans_lock);
1014 fli->fli_cow_cnt = 0;
1015 cv_signal(&fstrans_count_cv);
1016 mutex_exit(&fstrans_lock);
1017 } else {
1018 fli->fli_cow_cnt = 0;
1019 pserialize_read_exit(s);
1020 }
1021 }
1022
1023 return error;
1024 }
1025
1026 #if defined(DDB)
1027 void fstrans_dump(int);
1028
1029 static void
1030 fstrans_print_lwp(struct proc *p, struct lwp *l, int verbose)
1031 {
1032 char prefix[9];
1033 struct fstrans_lwp_info *fli;
1034
1035 snprintf(prefix, sizeof(prefix), "%d.%d", p->p_pid, l->l_lid);
1036 LIST_FOREACH(fli, &fstrans_fli_head, fli_list) {
1037 if (fli->fli_self != l)
1038 continue;
1039 if (fli->fli_trans_cnt == 0 && fli->fli_cow_cnt == 0) {
1040 if (! verbose)
1041 continue;
1042 }
1043 printf("%-8s", prefix);
1044 if (verbose)
1045 printf(" @%p", fli);
1046 if (fli->fli_mount == dead_rootmount)
1047 printf(" <dead>");
1048 else if (fli->fli_mount != NULL)
1049 printf(" (%s)", fli->fli_mount->mnt_stat.f_mntonname);
1050 else
1051 printf(" NULL");
1052 if (fli->fli_alias != NULL) {
1053 struct mount *amp = fli->fli_alias->fli_mount;
1054
1055 printf(" alias");
1056 if (verbose)
1057 printf(" @%p", fli->fli_alias);
1058 if (amp == NULL)
1059 printf(" NULL");
1060 else
1061 printf(" (%s)", amp->mnt_stat.f_mntonname);
1062 }
1063 if (fli->fli_mountinfo && fli->fli_mountinfo->fmi_gone)
1064 printf(" gone");
1065 if (fli->fli_trans_cnt == 0) {
1066 printf(" -");
1067 } else {
1068 switch (fli->fli_lock_type) {
1069 case FSTRANS_LAZY:
1070 printf(" lazy");
1071 break;
1072 case FSTRANS_SHARED:
1073 printf(" shared");
1074 break;
1075 default:
1076 printf(" %#x", fli->fli_lock_type);
1077 break;
1078 }
1079 }
1080 printf(" %d cow %d alias %d\n",
1081 fli->fli_trans_cnt, fli->fli_cow_cnt, fli->fli_alias_cnt);
1082 prefix[0] = '\0';
1083 }
1084 }
1085
1086 static void
1087 fstrans_print_mount(struct mount *mp, int verbose)
1088 {
1089 uint32_t indx;
1090 struct fstrans_mount_info *fmi;
1091
1092 indx = fstrans_mount_hash(mp);
1093 SLIST_FOREACH(fmi, &fstrans_mount_hashtab[indx], fmi_hash)
1094 if (fmi->fmi_mount == mp)
1095 break;
1096
1097 if (!verbose && (fmi == NULL || fmi->fmi_state == FSTRANS_NORMAL))
1098 return;
1099
1100 printf("%-16s ", mp->mnt_stat.f_mntonname);
1101 if (fmi == NULL) {
1102 printf("(null)\n");
1103 return;
1104 }
1105 printf("owner %p ", fmi->fmi_owner);
1106 switch (fmi->fmi_state) {
1107 case FSTRANS_NORMAL:
1108 printf("state normal\n");
1109 break;
1110 case FSTRANS_SUSPENDING:
1111 printf("state suspending\n");
1112 break;
1113 case FSTRANS_SUSPENDED:
1114 printf("state suspended\n");
1115 break;
1116 default:
1117 printf("state %#x\n", fmi->fmi_state);
1118 break;
1119 }
1120 }
1121
1122 void
1123 fstrans_dump(int full)
1124 {
1125 const struct proclist_desc *pd;
1126 struct proc *p;
1127 struct lwp *l;
1128 struct mount *mp;
1129
1130 printf("Fstrans locks by lwp:\n");
1131 for (pd = proclists; pd->pd_list != NULL; pd++)
1132 PROCLIST_FOREACH(p, pd->pd_list)
1133 LIST_FOREACH(l, &p->p_lwps, l_sibling)
1134 fstrans_print_lwp(p, l, full == 1);
1135
1136 printf("Fstrans state by mount:\n");
1137 for (mp = _mountlist_next(NULL); mp; mp = _mountlist_next(mp))
1138 fstrans_print_mount(mp, full == 1);
1139 }
1140 #endif /* defined(DDB) */
1141