vfs_trans.c revision 1.11.4.2 1 /* $NetBSD: vfs_trans.c,v 1.11.4.2 2007/12/03 16:15:00 joerg Exp $ */
2
3 /*-
4 * Copyright (c) 2007 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Juergen Hannken-Illjes.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the NetBSD
21 * Foundation, Inc. and its contributors.
22 * 4. Neither the name of The NetBSD Foundation nor the names of its
23 * contributors may be used to endorse or promote products derived
24 * from this software without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 * POSSIBILITY OF SUCH DAMAGE.
37 */
38
39 #include <sys/cdefs.h>
40 __KERNEL_RCSID(0, "$NetBSD: vfs_trans.c,v 1.11.4.2 2007/12/03 16:15:00 joerg Exp $");
41
42 /*
43 * File system transaction operations.
44 */
45
46 #include "opt_ddb.h"
47
48 #if defined(DDB)
49 #define _LWP_API_PRIVATE /* Need _lwp_getspecific_by_lwp() */
50 #endif
51
52 #include <sys/param.h>
53 #include <sys/systm.h>
54 #include <sys/malloc.h>
55 #include <sys/kmem.h>
56 #include <sys/mount.h>
57 #include <sys/rwlock.h>
58 #include <sys/vnode.h>
59 #define _FSTRANS_API_PRIVATE
60 #include <sys/fstrans.h>
61 #include <sys/proc.h>
62
63 #include <miscfs/specfs/specdev.h>
64 #include <miscfs/syncfs/syncfs.h>
65
66 struct fstrans_lwp_info {
67 struct fstrans_lwp_info *fli_succ;
68 struct mount *fli_mount;
69 int fli_count;
70 enum fstrans_lock_type fli_lock_type;
71 };
72 struct fstrans_mount_info {
73 enum fstrans_state fmi_state;
74 krwlock_t fmi_shared_lock;
75 krwlock_t fmi_lazy_lock;
76 };
77
78 static specificdata_key_t lwp_data_key;
79 static specificdata_key_t mount_data_key;
80 static specificdata_key_t mount_cow_key;
81 static kmutex_t vfs_suspend_lock; /* Serialize suspensions. */
82 static kmutex_t fstrans_init_lock;
83
84 POOL_INIT(fstrans_pl, sizeof(struct fstrans_lwp_info), 0, 0, 0,
85 "fstrans", NULL, IPL_NONE);
86
87 static void fstrans_lwp_dtor(void *);
88 static void fstrans_mount_dtor(void *);
89 static void fscow_mount_dtor(void *);
90 static struct fstrans_mount_info *fstrans_mount_init(struct mount *);
91
92 /*
93 * Initialize
94 */
95 void
96 fstrans_init(void)
97 {
98 int error;
99
100 error = lwp_specific_key_create(&lwp_data_key, fstrans_lwp_dtor);
101 KASSERT(error == 0);
102 error = mount_specific_key_create(&mount_data_key, fstrans_mount_dtor);
103 KASSERT(error == 0);
104 error = mount_specific_key_create(&mount_cow_key, fscow_mount_dtor);
105 KASSERT(error == 0);
106
107 mutex_init(&vfs_suspend_lock, MUTEX_DEFAULT, IPL_NONE);
108 mutex_init(&fstrans_init_lock, MUTEX_DEFAULT, IPL_NONE);
109 }
110
111 /*
112 * Deallocate lwp state
113 */
114 static void
115 fstrans_lwp_dtor(void *arg)
116 {
117 struct fstrans_lwp_info *fli, *fli_next;
118
119 for (fli = arg; fli; fli = fli_next) {
120 KASSERT(fli->fli_mount == NULL);
121 KASSERT(fli->fli_count == 0);
122 fli_next = fli->fli_succ;
123 pool_put(&fstrans_pl, fli);
124 }
125 }
126
127 /*
128 * Deallocate mount state
129 */
130 static void
131 fstrans_mount_dtor(void *arg)
132 {
133 struct fstrans_mount_info *fmi = arg;
134
135 KASSERT(fmi->fmi_state == FSTRANS_NORMAL);
136 rw_destroy(&fmi->fmi_lazy_lock);
137 rw_destroy(&fmi->fmi_shared_lock);
138 free(fmi, M_MOUNT);
139 }
140
141 /*
142 * Create mount info for this mount
143 */
144 static struct fstrans_mount_info *
145 fstrans_mount_init(struct mount *mp)
146 {
147 struct fstrans_mount_info *new;
148
149 mutex_enter(&fstrans_init_lock);
150
151 if ((new = mount_getspecific(mp, mount_data_key)) != NULL) {
152 mutex_exit(&fstrans_init_lock);
153 return new;
154 }
155
156 new = malloc(sizeof(*new), M_MOUNT, M_WAITOK);
157 new->fmi_state = FSTRANS_NORMAL;
158 rw_init(&new->fmi_lazy_lock);
159 rw_init(&new->fmi_shared_lock);
160
161 mount_setspecific(mp, mount_data_key, new);
162 mutex_exit(&fstrans_init_lock);
163
164 return new;
165 }
166
167 /*
168 * Start a transaction. If this thread already has a transaction on this
169 * file system increment the reference counter.
170 * A thread with an exclusive transaction lock may get a shared or lazy one.
171 * A thread with a shared or lazy transaction lock cannot upgrade to an
172 * exclusive one yet.
173 */
174 int
175 _fstrans_start(struct mount *mp, enum fstrans_lock_type lock_type, int wait)
176 {
177 krwlock_t *lock_p;
178 krw_t lock_op;
179 struct fstrans_lwp_info *fli, *new_fli;
180 struct fstrans_mount_info *fmi;
181
182 ASSERT_SLEEPABLE(NULL, __func__);
183
184 if (mp == NULL || (mp->mnt_iflag & IMNT_HAS_TRANS) == 0)
185 return 0;
186
187 new_fli = NULL;
188 for (fli = lwp_getspecific(lwp_data_key); fli; fli = fli->fli_succ) {
189 if (fli->fli_mount == NULL && new_fli == NULL)
190 new_fli = fli;
191 if (fli->fli_mount == mp) {
192 KASSERT(fli->fli_count > 0);
193 if (fli->fli_lock_type != FSTRANS_EXCL &&
194 lock_type == FSTRANS_EXCL)
195 panic("fstrans_start: cannot upgrade lock");
196 fli->fli_count += 1;
197 return 0;
198 }
199 }
200
201 if (new_fli == NULL) {
202 new_fli = pool_get(&fstrans_pl, PR_WAITOK);
203 new_fli->fli_mount = NULL;
204 new_fli->fli_count = 0;
205 new_fli->fli_succ = lwp_getspecific(lwp_data_key);
206 lwp_setspecific(lwp_data_key, new_fli);
207 }
208
209 KASSERT(new_fli->fli_mount == NULL);
210 KASSERT(new_fli->fli_count == 0);
211
212 if ((fmi = mount_getspecific(mp, mount_data_key)) == NULL)
213 fmi = fstrans_mount_init(mp);
214
215 if (lock_type == FSTRANS_LAZY)
216 lock_p = &fmi->fmi_lazy_lock;
217 else
218 lock_p = &fmi->fmi_shared_lock;
219 lock_op = (lock_type == FSTRANS_EXCL ? RW_WRITER : RW_READER);
220
221 if (wait)
222 rw_enter(lock_p, lock_op);
223 else if (rw_tryenter(lock_p, lock_op) == 0)
224 return EBUSY;
225
226 new_fli->fli_mount = mp;
227 new_fli->fli_count = 1;
228 new_fli->fli_lock_type = lock_type;
229
230 return 0;
231 }
232
233 /*
234 * Finish a transaction.
235 */
236 void
237 fstrans_done(struct mount *mp)
238 {
239 struct fstrans_lwp_info *fli;
240 struct fstrans_mount_info *fmi;
241
242 if (mp == NULL || (mp->mnt_iflag & IMNT_HAS_TRANS) == 0)
243 return;
244
245 for (fli = lwp_getspecific(lwp_data_key); fli; fli = fli->fli_succ) {
246 if (fli->fli_mount == mp) {
247 fli->fli_count -= 1;
248 if (fli->fli_count > 0)
249 return;
250 break;
251 }
252 }
253
254 KASSERT(fli != NULL);
255 KASSERT(fli->fli_mount == mp);
256 KASSERT(fli->fli_count == 0);
257 fli->fli_mount = NULL;
258 fmi = mount_getspecific(mp, mount_data_key);
259 KASSERT(fmi != NULL);
260 if (fli->fli_lock_type == FSTRANS_LAZY)
261 rw_exit(&fmi->fmi_lazy_lock);
262 else
263 rw_exit(&fmi->fmi_shared_lock);
264 }
265
266 /*
267 * Check if this thread has an exclusive lock.
268 */
269 int
270 fstrans_is_owner(struct mount *mp)
271 {
272 struct fstrans_lwp_info *fli;
273
274 if (mp == NULL)
275 return 0;
276 if ((mp->mnt_iflag & IMNT_HAS_TRANS) == 0)
277 return 0;
278
279 for (fli = lwp_getspecific(lwp_data_key); fli; fli = fli->fli_succ)
280 if (fli->fli_mount == mp)
281 break;
282
283 if (fli == NULL)
284 return 0;
285
286 KASSERT(fli->fli_mount == mp);
287 KASSERT(fli->fli_count > 0);
288 return (fli->fli_lock_type == FSTRANS_EXCL);
289 }
290
291 /*
292 * Set new file system state.
293 */
294 int
295 fstrans_setstate(struct mount *mp, enum fstrans_state new_state)
296 {
297 struct fstrans_mount_info *fmi;
298
299 if ((fmi = mount_getspecific(mp, mount_data_key)) == NULL)
300 fmi = fstrans_mount_init(mp);
301
302 switch (new_state) {
303 case FSTRANS_SUSPENDING:
304 KASSERT(fmi->fmi_state == FSTRANS_NORMAL);
305 fstrans_start(mp, FSTRANS_EXCL);
306 fmi->fmi_state = FSTRANS_SUSPENDING;
307 break;
308
309 case FSTRANS_SUSPENDED:
310 KASSERT(fmi->fmi_state == FSTRANS_NORMAL ||
311 fmi->fmi_state == FSTRANS_SUSPENDING);
312 KASSERT(fmi->fmi_state == FSTRANS_NORMAL ||
313 fstrans_is_owner(mp));
314 if (fmi->fmi_state == FSTRANS_NORMAL)
315 fstrans_start(mp, FSTRANS_EXCL);
316 rw_enter(&fmi->fmi_lazy_lock, RW_WRITER);
317 fmi->fmi_state = FSTRANS_SUSPENDED;
318 break;
319
320 case FSTRANS_NORMAL:
321 KASSERT(fmi->fmi_state == FSTRANS_NORMAL ||
322 fstrans_is_owner(mp));
323 if (fmi->fmi_state == FSTRANS_SUSPENDED)
324 rw_exit(&fmi->fmi_lazy_lock);
325 if (fmi->fmi_state == FSTRANS_SUSPENDING ||
326 fmi->fmi_state == FSTRANS_SUSPENDED) {
327 fmi->fmi_state = FSTRANS_NORMAL;
328 fstrans_done(mp);
329 }
330 break;
331
332 default:
333 panic("%s: illegal state %d", __func__, new_state);
334 }
335
336 return 0;
337 }
338
339 /*
340 * Get current file system state
341 */
342 enum fstrans_state
343 fstrans_getstate(struct mount *mp)
344 {
345 struct fstrans_mount_info *fmi;
346
347 if ((fmi = mount_getspecific(mp, mount_data_key)) == NULL)
348 return FSTRANS_NORMAL;
349
350 return fmi->fmi_state;
351 }
352
353 /*
354 * Request a filesystem to suspend all operations.
355 */
356 int
357 vfs_suspend(struct mount *mp, int nowait)
358 {
359 int error;
360
361 if (nowait) {
362 if (!mutex_tryenter(&vfs_suspend_lock))
363 return EWOULDBLOCK;
364 } else
365 mutex_enter(&vfs_suspend_lock);
366
367 mutex_enter(&syncer_mutex);
368
369 if ((error = VFS_SUSPENDCTL(mp, SUSPEND_SUSPEND)) != 0) {
370 mutex_exit(&syncer_mutex);
371 mutex_exit(&vfs_suspend_lock);
372 }
373
374 return error;
375 }
376
377 /*
378 * Request a filesystem to resume all operations.
379 */
380 void
381 vfs_resume(struct mount *mp)
382 {
383
384 VFS_SUSPENDCTL(mp, SUSPEND_RESUME);
385 mutex_exit(&syncer_mutex);
386 mutex_exit(&vfs_suspend_lock);
387 }
388
389 #if defined(DDB)
390 void fstrans_dump(int);
391
392 static void
393 fstrans_print_lwp(struct proc *p, struct lwp *l, int verbose)
394 {
395 char prefix[9];
396 struct fstrans_lwp_info *fli;
397
398 snprintf(prefix, sizeof(prefix), "%d.%d", p->p_pid, l->l_lid);
399 for (fli = _lwp_getspecific_by_lwp(l, lwp_data_key);
400 fli;
401 fli = fli->fli_succ) {
402 if (!verbose && fli->fli_count == 0)
403 continue;
404 printf("%-8s", prefix);
405 if (verbose)
406 printf(" @%p", fli);
407 if (fli->fli_mount != NULL)
408 printf(" (%s)", fli->fli_mount->mnt_stat.f_mntonname);
409 else
410 printf(" NULL");
411 switch (fli->fli_lock_type) {
412 case FSTRANS_LAZY:
413 printf(" lazy");
414 break;
415 case FSTRANS_SHARED:
416 printf(" shared");
417 break;
418 case FSTRANS_EXCL:
419 printf(" excl");
420 break;
421 default:
422 printf(" %#x", fli->fli_lock_type);
423 break;
424 }
425 printf(" %d\n", fli->fli_count);
426 prefix[0] = '\0';
427 }
428 }
429
430 static void
431 fstrans_print_mount(struct mount *mp, int verbose)
432 {
433 struct fstrans_mount_info *fmi;
434
435 fmi = mount_getspecific(mp, mount_data_key);
436 if (!verbose && (fmi == NULL || fmi->fmi_state == FSTRANS_NORMAL))
437 return;
438
439 printf("%-16s ", mp->mnt_stat.f_mntonname);
440 if (fmi == NULL) {
441 printf("(null)\n");
442 return;
443 }
444 switch (fmi->fmi_state) {
445 case FSTRANS_NORMAL:
446 printf("state normal\n");
447 break;
448 case FSTRANS_SUSPENDING:
449 printf("state suspending\n");
450 break;
451 case FSTRANS_SUSPENDED:
452 printf("state suspended\n");
453 break;
454 default:
455 printf("state %#x\n", fmi->fmi_state);
456 break;
457 }
458 printf("%16s r=%d w=%d\n", "lock_lazy:",
459 rw_read_held(&fmi->fmi_lazy_lock),
460 rw_write_held(&fmi->fmi_lazy_lock));
461 printf("%16s r=%d w=%d\n", "lock_shared:",
462 rw_read_held(&fmi->fmi_shared_lock),
463 rw_write_held(&fmi->fmi_shared_lock));
464 }
465
466 void
467 fstrans_dump(int full)
468 {
469 const struct proclist_desc *pd;
470 struct proc *p;
471 struct lwp *l;
472 struct mount *mp;
473
474 printf("Fstrans locks by lwp:\n");
475 for (pd = proclists; pd->pd_list != NULL; pd++)
476 LIST_FOREACH(p, pd->pd_list, p_list)
477 LIST_FOREACH(l, &p->p_lwps, l_sibling)
478 fstrans_print_lwp(p, l, full == 1);
479
480 printf("Fstrans state by mount:\n");
481 CIRCLEQ_FOREACH(mp, &mountlist, mnt_list)
482 fstrans_print_mount(mp, full == 1);
483 }
484 #endif /* defined(DDB) */
485
486
487 struct fscow_handler {
488 SLIST_ENTRY(fscow_handler) ch_list;
489 int (*ch_func)(void *, struct buf *, bool);
490 void *ch_arg;
491 };
492
493 struct fscow_mount_info {
494 krwlock_t cmi_lock;
495 SLIST_HEAD(, fscow_handler) cmi_handler;
496 };
497
498 /*
499 * Deallocate mount state
500 */
501 static void
502 fscow_mount_dtor(void *arg)
503 {
504 struct fscow_mount_info *cmi = arg;
505
506 KASSERT(SLIST_EMPTY(&cmi->cmi_handler));
507 rw_destroy(&cmi->cmi_lock);
508 kmem_free(cmi, sizeof(*cmi));
509 }
510
511 /*
512 * Create mount info for this mount
513 */
514 static struct fscow_mount_info *
515 fscow_mount_init(struct mount *mp)
516 {
517 struct fscow_mount_info *new;
518
519 mutex_enter(&fstrans_init_lock);
520
521 if ((new = mount_getspecific(mp, mount_cow_key)) != NULL) {
522 mutex_exit(&fstrans_init_lock);
523 return new;
524 }
525
526 if ((new = kmem_alloc(sizeof(*new), KM_SLEEP)) != NULL) {
527 SLIST_INIT(&new->cmi_handler);
528 rw_init(&new->cmi_lock);
529 mount_setspecific(mp, mount_cow_key, new);
530 }
531
532 mutex_exit(&fstrans_init_lock);
533
534 return new;
535 }
536
537 int
538 fscow_establish(struct mount *mp, int (*func)(void *, struct buf *, bool),
539 void *arg)
540 {
541 struct fscow_mount_info *cmi;
542 struct fscow_handler *new;
543
544 if ((cmi = mount_getspecific(mp, mount_cow_key)) == NULL)
545 cmi = fscow_mount_init(mp);
546 if (cmi == NULL)
547 return ENOMEM;
548
549 if ((new = kmem_alloc(sizeof(*new), KM_SLEEP)) == NULL)
550 return ENOMEM;
551 new->ch_func = func;
552 new->ch_arg = arg;
553 rw_enter(&cmi->cmi_lock, RW_WRITER);
554 SLIST_INSERT_HEAD(&cmi->cmi_handler, new, ch_list);
555 rw_exit(&cmi->cmi_lock);
556
557 return 0;
558 }
559
560 int
561 fscow_disestablish(struct mount *mp, int (*func)(void *, struct buf *, bool),
562 void *arg)
563 {
564 struct fscow_mount_info *cmi;
565 struct fscow_handler *hp = NULL;
566
567 if ((cmi = mount_getspecific(mp, mount_cow_key)) == NULL)
568 return EINVAL;
569
570 rw_enter(&cmi->cmi_lock, RW_WRITER);
571 SLIST_FOREACH(hp, &cmi->cmi_handler, ch_list)
572 if (hp->ch_func == func && hp->ch_arg == arg)
573 break;
574 if (hp != NULL) {
575 SLIST_REMOVE(&cmi->cmi_handler, hp, fscow_handler, ch_list);
576 kmem_free(hp, sizeof(*hp));
577 }
578 rw_exit(&cmi->cmi_lock);
579
580 return hp ? 0 : EINVAL;
581 }
582
583 int
584 fscow_run(struct buf *bp, bool data_valid)
585 {
586 int error = 0;
587 struct mount *mp;
588 struct fscow_mount_info *cmi;
589 struct fscow_handler *hp;
590
591 if ((bp->b_flags & B_COWDONE))
592 goto done;
593 if (bp->b_vp == NULL)
594 goto done;
595 if (bp->b_vp->v_type == VBLK)
596 mp = bp->b_vp->v_specmountpoint;
597 else
598 mp = bp->b_vp->v_mount;
599 if (mp == NULL)
600 goto done;
601
602 if ((cmi = mount_getspecific(mp, mount_cow_key)) == NULL)
603 goto done;
604
605 rw_enter(&cmi->cmi_lock, RW_READER);
606 SLIST_FOREACH(hp, &cmi->cmi_handler, ch_list)
607 if ((error = (*hp->ch_func)(hp->ch_arg, bp, data_valid)) != 0)
608 break;
609 rw_exit(&cmi->cmi_lock);
610
611 done:
612 if (error == 0)
613 bp->b_flags |= B_COWDONE;
614
615 return error;
616 }
617