vfs_trans.c revision 1.17 1 /* $NetBSD: vfs_trans.c,v 1.17 2008/02/02 16:51:34 hannken Exp $ */
2
3 /*-
4 * Copyright (c) 2007 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Juergen Hannken-Illjes.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the NetBSD
21 * Foundation, Inc. and its contributors.
22 * 4. Neither the name of The NetBSD Foundation nor the names of its
23 * contributors may be used to endorse or promote products derived
24 * from this software without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 * POSSIBILITY OF SUCH DAMAGE.
37 */
38
39 #include <sys/cdefs.h>
40 __KERNEL_RCSID(0, "$NetBSD: vfs_trans.c,v 1.17 2008/02/02 16:51:34 hannken Exp $");
41
42 /*
43 * File system transaction operations.
44 */
45
46 #include "opt_ddb.h"
47
48 #if defined(DDB)
49 #define _LWP_API_PRIVATE /* Need _lwp_getspecific_by_lwp() */
50 #endif
51
52 #include <sys/param.h>
53 #include <sys/systm.h>
54 #include <sys/kmem.h>
55 #include <sys/mount.h>
56 #include <sys/rwlock.h>
57 #include <sys/vnode.h>
58 #define _FSTRANS_API_PRIVATE
59 #include <sys/fstrans.h>
60 #include <sys/proc.h>
61
62 #include <miscfs/specfs/specdev.h>
63 #include <miscfs/syncfs/syncfs.h>
64
65 struct fscow_handler {
66 SLIST_ENTRY(fscow_handler) ch_list;
67 int (*ch_func)(void *, struct buf *, bool);
68 void *ch_arg;
69 };
70 struct fstrans_lwp_info {
71 struct fstrans_lwp_info *fli_succ;
72 struct mount *fli_mount;
73 int fli_count;
74 enum fstrans_lock_type fli_lock_type;
75 };
76 struct fstrans_mount_info {
77 enum fstrans_state fmi_state;
78 krwlock_t fmi_shared_lock;
79 krwlock_t fmi_lazy_lock;
80 krwlock_t fmi_cow_lock;
81 SLIST_HEAD(, fscow_handler) fmi_cow_handler;
82 };
83
84 static specificdata_key_t lwp_data_key;
85 static kmutex_t vfs_suspend_lock; /* Serialize suspensions. */
86
87 POOL_INIT(fstrans_pl, sizeof(struct fstrans_lwp_info), 0, 0, 0,
88 "fstrans", NULL, IPL_NONE);
89
90 static void fstrans_lwp_dtor(void *);
91
92 /*
93 * Initialize
94 */
95 void
96 fstrans_init(void)
97 {
98 int error;
99
100 error = lwp_specific_key_create(&lwp_data_key, fstrans_lwp_dtor);
101 KASSERT(error == 0);
102
103 mutex_init(&vfs_suspend_lock, MUTEX_DEFAULT, IPL_NONE);
104 }
105
106 /*
107 * Deallocate lwp state
108 */
109 static void
110 fstrans_lwp_dtor(void *arg)
111 {
112 struct fstrans_lwp_info *fli, *fli_next;
113
114 for (fli = arg; fli; fli = fli_next) {
115 KASSERT(fli->fli_mount == NULL);
116 KASSERT(fli->fli_count == 0);
117 fli_next = fli->fli_succ;
118 pool_put(&fstrans_pl, fli);
119 }
120 }
121
122 /*
123 * Allocate mount state
124 */
125 int
126 fstrans_mount(struct mount *mp)
127 {
128 struct fstrans_mount_info *new;
129
130 if ((new = kmem_alloc(sizeof(*new), KM_SLEEP)) == NULL)
131 return ENOMEM;
132 new->fmi_state = FSTRANS_NORMAL;
133 rw_init(&new->fmi_lazy_lock);
134 rw_init(&new->fmi_shared_lock);
135 SLIST_INIT(&new->fmi_cow_handler);
136 rw_init(&new->fmi_cow_lock);
137
138 mp->mnt_transinfo = new;
139 mp->mnt_iflag |= IMNT_HAS_TRANS;
140
141 return 0;
142 }
143
144 /*
145 * Deallocate mount state
146 */
147 void
148 fstrans_unmount(struct mount *mp)
149 {
150 struct fstrans_mount_info *fmi;
151
152 if ((fmi = mp->mnt_transinfo) == NULL)
153 return;
154
155 KASSERT(fmi->fmi_state == FSTRANS_NORMAL);
156 rw_destroy(&fmi->fmi_lazy_lock);
157 rw_destroy(&fmi->fmi_shared_lock);
158 KASSERT(SLIST_EMPTY(&fmi->fmi_cow_handler));
159 rw_destroy(&fmi->fmi_cow_lock);
160 kmem_free(fmi, sizeof(*fmi));
161 mp->mnt_iflag &= ~IMNT_HAS_TRANS;
162 mp->mnt_transinfo = NULL;
163 }
164
165 /*
166 * Start a transaction. If this thread already has a transaction on this
167 * file system increment the reference counter.
168 * A thread with an exclusive transaction lock may get a shared or lazy one.
169 * A thread with a shared or lazy transaction lock cannot upgrade to an
170 * exclusive one yet.
171 */
172 int
173 _fstrans_start(struct mount *mp, enum fstrans_lock_type lock_type, int wait)
174 {
175 krwlock_t *lock_p;
176 krw_t lock_op;
177 struct fstrans_lwp_info *fli, *new_fli;
178 struct fstrans_mount_info *fmi;
179
180 ASSERT_SLEEPABLE(NULL, __func__);
181
182 if (mp == NULL || (mp->mnt_iflag & IMNT_HAS_TRANS) == 0)
183 return 0;
184
185 new_fli = NULL;
186 for (fli = lwp_getspecific(lwp_data_key); fli; fli = fli->fli_succ) {
187 if (fli->fli_mount == NULL && new_fli == NULL)
188 new_fli = fli;
189 if (fli->fli_mount == mp) {
190 KASSERT(fli->fli_count > 0);
191 if (fli->fli_lock_type != FSTRANS_EXCL &&
192 lock_type == FSTRANS_EXCL)
193 panic("fstrans_start: cannot upgrade lock");
194 fli->fli_count += 1;
195 return 0;
196 }
197 }
198
199 if (new_fli == NULL) {
200 new_fli = pool_get(&fstrans_pl, PR_WAITOK);
201 new_fli->fli_mount = NULL;
202 new_fli->fli_count = 0;
203 new_fli->fli_succ = lwp_getspecific(lwp_data_key);
204 lwp_setspecific(lwp_data_key, new_fli);
205 }
206
207 KASSERT(new_fli->fli_mount == NULL);
208 KASSERT(new_fli->fli_count == 0);
209
210 fmi = mp->mnt_transinfo;
211
212 if (lock_type == FSTRANS_LAZY)
213 lock_p = &fmi->fmi_lazy_lock;
214 else
215 lock_p = &fmi->fmi_shared_lock;
216 lock_op = (lock_type == FSTRANS_EXCL ? RW_WRITER : RW_READER);
217
218 if (wait)
219 rw_enter(lock_p, lock_op);
220 else if (rw_tryenter(lock_p, lock_op) == 0)
221 return EBUSY;
222
223 new_fli->fli_mount = mp;
224 new_fli->fli_count = 1;
225 new_fli->fli_lock_type = lock_type;
226
227 return 0;
228 }
229
230 /*
231 * Finish a transaction.
232 */
233 void
234 fstrans_done(struct mount *mp)
235 {
236 struct fstrans_lwp_info *fli;
237 struct fstrans_mount_info *fmi;
238
239 if (mp == NULL || (mp->mnt_iflag & IMNT_HAS_TRANS) == 0)
240 return;
241
242 for (fli = lwp_getspecific(lwp_data_key); fli; fli = fli->fli_succ) {
243 if (fli->fli_mount == mp) {
244 fli->fli_count -= 1;
245 if (fli->fli_count > 0)
246 return;
247 break;
248 }
249 }
250
251 KASSERT(fli != NULL);
252 KASSERT(fli->fli_mount == mp);
253 KASSERT(fli->fli_count == 0);
254 fli->fli_mount = NULL;
255 fmi = mp->mnt_transinfo;
256 KASSERT(fmi != NULL);
257 if (fli->fli_lock_type == FSTRANS_LAZY)
258 rw_exit(&fmi->fmi_lazy_lock);
259 else
260 rw_exit(&fmi->fmi_shared_lock);
261 }
262
263 /*
264 * Check if this thread has an exclusive lock.
265 */
266 int
267 fstrans_is_owner(struct mount *mp)
268 {
269 struct fstrans_lwp_info *fli;
270
271 if (mp == NULL)
272 return 0;
273 if ((mp->mnt_iflag & IMNT_HAS_TRANS) == 0)
274 return 0;
275
276 for (fli = lwp_getspecific(lwp_data_key); fli; fli = fli->fli_succ)
277 if (fli->fli_mount == mp)
278 break;
279
280 if (fli == NULL)
281 return 0;
282
283 KASSERT(fli->fli_mount == mp);
284 KASSERT(fli->fli_count > 0);
285 return (fli->fli_lock_type == FSTRANS_EXCL);
286 }
287
288 /*
289 * Set new file system state.
290 */
291 int
292 fstrans_setstate(struct mount *mp, enum fstrans_state new_state)
293 {
294 struct fstrans_mount_info *fmi;
295
296 fmi = mp->mnt_transinfo;
297
298 switch (new_state) {
299 case FSTRANS_SUSPENDING:
300 KASSERT(fmi->fmi_state == FSTRANS_NORMAL);
301 fstrans_start(mp, FSTRANS_EXCL);
302 fmi->fmi_state = FSTRANS_SUSPENDING;
303 break;
304
305 case FSTRANS_SUSPENDED:
306 KASSERT(fmi->fmi_state == FSTRANS_NORMAL ||
307 fmi->fmi_state == FSTRANS_SUSPENDING);
308 KASSERT(fmi->fmi_state == FSTRANS_NORMAL ||
309 fstrans_is_owner(mp));
310 if (fmi->fmi_state == FSTRANS_NORMAL)
311 fstrans_start(mp, FSTRANS_EXCL);
312 rw_enter(&fmi->fmi_lazy_lock, RW_WRITER);
313 fmi->fmi_state = FSTRANS_SUSPENDED;
314 break;
315
316 case FSTRANS_NORMAL:
317 KASSERT(fmi->fmi_state == FSTRANS_NORMAL ||
318 fstrans_is_owner(mp));
319 if (fmi->fmi_state == FSTRANS_SUSPENDED)
320 rw_exit(&fmi->fmi_lazy_lock);
321 if (fmi->fmi_state == FSTRANS_SUSPENDING ||
322 fmi->fmi_state == FSTRANS_SUSPENDED) {
323 fmi->fmi_state = FSTRANS_NORMAL;
324 fstrans_done(mp);
325 }
326 break;
327
328 default:
329 panic("%s: illegal state %d", __func__, new_state);
330 }
331
332 return 0;
333 }
334
335 /*
336 * Get current file system state
337 */
338 enum fstrans_state
339 fstrans_getstate(struct mount *mp)
340 {
341 struct fstrans_mount_info *fmi;
342
343 fmi = mp->mnt_transinfo;
344
345 return fmi->fmi_state;
346 }
347
348 /*
349 * Request a filesystem to suspend all operations.
350 */
351 int
352 vfs_suspend(struct mount *mp, int nowait)
353 {
354 int error;
355
356 if (nowait) {
357 if (!mutex_tryenter(&vfs_suspend_lock))
358 return EWOULDBLOCK;
359 } else
360 mutex_enter(&vfs_suspend_lock);
361
362 mutex_enter(&syncer_mutex);
363
364 if ((error = VFS_SUSPENDCTL(mp, SUSPEND_SUSPEND)) != 0) {
365 mutex_exit(&syncer_mutex);
366 mutex_exit(&vfs_suspend_lock);
367 }
368
369 return error;
370 }
371
372 /*
373 * Request a filesystem to resume all operations.
374 */
375 void
376 vfs_resume(struct mount *mp)
377 {
378
379 VFS_SUSPENDCTL(mp, SUSPEND_RESUME);
380 mutex_exit(&syncer_mutex);
381 mutex_exit(&vfs_suspend_lock);
382 }
383
384 #if defined(DDB)
385 void fstrans_dump(int);
386
387 static void
388 fstrans_print_lwp(struct proc *p, struct lwp *l, int verbose)
389 {
390 char prefix[9];
391 struct fstrans_lwp_info *fli;
392
393 snprintf(prefix, sizeof(prefix), "%d.%d", p->p_pid, l->l_lid);
394 for (fli = _lwp_getspecific_by_lwp(l, lwp_data_key);
395 fli;
396 fli = fli->fli_succ) {
397 if (!verbose && fli->fli_count == 0)
398 continue;
399 printf("%-8s", prefix);
400 if (verbose)
401 printf(" @%p", fli);
402 if (fli->fli_mount != NULL)
403 printf(" (%s)", fli->fli_mount->mnt_stat.f_mntonname);
404 else
405 printf(" NULL");
406 switch (fli->fli_lock_type) {
407 case FSTRANS_LAZY:
408 printf(" lazy");
409 break;
410 case FSTRANS_SHARED:
411 printf(" shared");
412 break;
413 case FSTRANS_EXCL:
414 printf(" excl");
415 break;
416 default:
417 printf(" %#x", fli->fli_lock_type);
418 break;
419 }
420 printf(" %d\n", fli->fli_count);
421 prefix[0] = '\0';
422 }
423 }
424
425 static void
426 fstrans_print_mount(struct mount *mp, int verbose)
427 {
428 struct fstrans_mount_info *fmi;
429
430 fmi = mp->mnt_transinfo;
431 if (!verbose && (fmi == NULL || fmi->fmi_state == FSTRANS_NORMAL))
432 return;
433
434 printf("%-16s ", mp->mnt_stat.f_mntonname);
435 if (fmi == NULL) {
436 printf("(null)\n");
437 return;
438 }
439 switch (fmi->fmi_state) {
440 case FSTRANS_NORMAL:
441 printf("state normal\n");
442 break;
443 case FSTRANS_SUSPENDING:
444 printf("state suspending\n");
445 break;
446 case FSTRANS_SUSPENDED:
447 printf("state suspended\n");
448 break;
449 default:
450 printf("state %#x\n", fmi->fmi_state);
451 break;
452 }
453 printf("%16s r=%d w=%d\n", "lock_lazy:",
454 rw_read_held(&fmi->fmi_lazy_lock),
455 rw_write_held(&fmi->fmi_lazy_lock));
456 printf("%16s r=%d w=%d\n", "lock_shared:",
457 rw_read_held(&fmi->fmi_shared_lock),
458 rw_write_held(&fmi->fmi_shared_lock));
459 }
460
461 void
462 fstrans_dump(int full)
463 {
464 const struct proclist_desc *pd;
465 struct proc *p;
466 struct lwp *l;
467 struct mount *mp;
468
469 printf("Fstrans locks by lwp:\n");
470 for (pd = proclists; pd->pd_list != NULL; pd++)
471 LIST_FOREACH(p, pd->pd_list, p_list)
472 LIST_FOREACH(l, &p->p_lwps, l_sibling)
473 fstrans_print_lwp(p, l, full == 1);
474
475 printf("Fstrans state by mount:\n");
476 CIRCLEQ_FOREACH(mp, &mountlist, mnt_list)
477 fstrans_print_mount(mp, full == 1);
478 }
479 #endif /* defined(DDB) */
480
481 int
482 fscow_establish(struct mount *mp, int (*func)(void *, struct buf *, bool),
483 void *arg)
484 {
485 struct fstrans_mount_info *fmi;
486 struct fscow_handler *new;
487
488 if ((mp->mnt_iflag & IMNT_HAS_TRANS) == 0)
489 return EINVAL;
490
491 fmi = mp->mnt_transinfo;
492
493 if ((new = kmem_alloc(sizeof(*new), KM_SLEEP)) == NULL)
494 return ENOMEM;
495 new->ch_func = func;
496 new->ch_arg = arg;
497 rw_enter(&fmi->fmi_cow_lock, RW_WRITER);
498 SLIST_INSERT_HEAD(&fmi->fmi_cow_handler, new, ch_list);
499 rw_exit(&fmi->fmi_cow_lock);
500
501 return 0;
502 }
503
504 int
505 fscow_disestablish(struct mount *mp, int (*func)(void *, struct buf *, bool),
506 void *arg)
507 {
508 struct fstrans_mount_info *fmi;
509 struct fscow_handler *hp = NULL;
510
511 if ((mp->mnt_iflag & IMNT_HAS_TRANS) == 0)
512 return EINVAL;
513
514 fmi = mp->mnt_transinfo;
515
516 rw_enter(&fmi->fmi_cow_lock, RW_WRITER);
517 SLIST_FOREACH(hp, &fmi->fmi_cow_handler, ch_list)
518 if (hp->ch_func == func && hp->ch_arg == arg)
519 break;
520 if (hp != NULL) {
521 SLIST_REMOVE(&fmi->fmi_cow_handler, hp, fscow_handler, ch_list);
522 kmem_free(hp, sizeof(*hp));
523 }
524 rw_exit(&fmi->fmi_cow_lock);
525
526 return hp ? 0 : EINVAL;
527 }
528
529 int
530 fscow_run(struct buf *bp, bool data_valid)
531 {
532 int error = 0;
533 struct mount *mp;
534 struct fstrans_mount_info *fmi;
535 struct fscow_handler *hp;
536
537 if ((bp->b_flags & B_COWDONE))
538 goto done;
539 if (bp->b_vp == NULL)
540 goto done;
541 if (bp->b_vp->v_type == VBLK)
542 mp = bp->b_vp->v_specmountpoint;
543 else
544 mp = bp->b_vp->v_mount;
545 if (mp == NULL || (mp->mnt_iflag & IMNT_HAS_TRANS) == 0)
546 goto done;
547
548 fmi = mp->mnt_transinfo;
549
550 rw_enter(&fmi->fmi_cow_lock, RW_READER);
551 SLIST_FOREACH(hp, &fmi->fmi_cow_handler, ch_list)
552 if ((error = (*hp->ch_func)(hp->ch_arg, bp, data_valid)) != 0)
553 break;
554 rw_exit(&fmi->fmi_cow_lock);
555
556 done:
557 if (error == 0)
558 bp->b_flags |= B_COWDONE;
559
560 return error;
561 }
562