lfs_bio.c revision 1.68 1 /* $NetBSD: lfs_bio.c,v 1.68 2003/07/02 13:41:38 yamt Exp $ */
2
3 /*-
4 * Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Konrad E. Schroder <perseant (at) hhhh.org>.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the NetBSD
21 * Foundation, Inc. and its contributors.
22 * 4. Neither the name of The NetBSD Foundation nor the names of its
23 * contributors may be used to endorse or promote products derived
24 * from this software without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 * POSSIBILITY OF SUCH DAMAGE.
37 */
38 /*
39 * Copyright (c) 1991, 1993
40 * The Regents of the University of California. All rights reserved.
41 *
42 * Redistribution and use in source and binary forms, with or without
43 * modification, are permitted provided that the following conditions
44 * are met:
45 * 1. Redistributions of source code must retain the above copyright
46 * notice, this list of conditions and the following disclaimer.
47 * 2. Redistributions in binary form must reproduce the above copyright
48 * notice, this list of conditions and the following disclaimer in the
49 * documentation and/or other materials provided with the distribution.
50 * 3. All advertising materials mentioning features or use of this software
51 * must display the following acknowledgement:
52 * This product includes software developed by the University of
53 * California, Berkeley and its contributors.
54 * 4. Neither the name of the University nor the names of its contributors
55 * may be used to endorse or promote products derived from this software
56 * without specific prior written permission.
57 *
58 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
59 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
60 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
61 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
62 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
63 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
64 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
65 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
66 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
67 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
68 * SUCH DAMAGE.
69 *
70 * @(#)lfs_bio.c 8.10 (Berkeley) 6/10/95
71 */
72
73 #include <sys/cdefs.h>
74 __KERNEL_RCSID(0, "$NetBSD: lfs_bio.c,v 1.68 2003/07/02 13:41:38 yamt Exp $");
75
76 #include <sys/param.h>
77 #include <sys/systm.h>
78 #include <sys/proc.h>
79 #include <sys/buf.h>
80 #include <sys/vnode.h>
81 #include <sys/resourcevar.h>
82 #include <sys/mount.h>
83 #include <sys/kernel.h>
84
85 #include <ufs/ufs/inode.h>
86 #include <ufs/ufs/ufsmount.h>
87 #include <ufs/ufs/ufs_extern.h>
88
89 #include <ufs/lfs/lfs.h>
90 #include <ufs/lfs/lfs_extern.h>
91
92 #include <uvm/uvm.h>
93
94 /* Macros to clear/set/test flags. */
95 # define SET(t, f) (t) |= (f)
96 # define CLR(t, f) (t) &= ~(f)
97 # define ISSET(t, f) ((t) & (f))
98
99 /*
100 * LFS block write function.
101 *
102 * XXX
103 * No write cost accounting is done.
104 * This is almost certainly wrong for synchronous operations and NFS.
105 */
106 int locked_queue_count = 0; /* Count of locked-down buffers. */
107 long locked_queue_bytes = 0L; /* Total size of locked buffers. */
108 int lfs_subsys_pages = 0L; /* Total number LFS-written pages */
109 int lfs_writing = 0; /* Set if already kicked off a writer
110 because of buffer space */
111 /* Lock for lfs_subsys_pages */
112 struct simplelock lfs_subsys_lock = SIMPLELOCK_INITIALIZER;
113
114 extern int lfs_dostats;
115 extern int lfs_do_flush;
116
117 /*
118 * reserved number/bytes of locked buffers
119 */
120 int locked_queue_rcount = 0;
121 long locked_queue_rbytes = 0L;
122
123 int lfs_fits_buf(struct lfs *, int, int);
124 int lfs_reservebuf(struct lfs *, struct vnode *vp, struct vnode *vp2,
125 int, int);
126 int lfs_reserveavail(struct lfs *, struct vnode *vp, struct vnode *vp2, int);
127
128 int
129 lfs_fits_buf(struct lfs *fs, int n, int bytes)
130 {
131 int count_fit =
132 (locked_queue_count + locked_queue_rcount + n < LFS_WAIT_BUFS);
133 int bytes_fit =
134 (locked_queue_bytes + locked_queue_rbytes + bytes < LFS_WAIT_BYTES);
135
136 #ifdef DEBUG_LFS
137 if (!count_fit) {
138 printf("lfs_fits_buf: no fit count: %d + %d + %d >= %d\n",
139 locked_queue_count, locked_queue_rcount,
140 n, LFS_WAIT_BUFS);
141 }
142 if (!bytes_fit) {
143 printf("lfs_fits_buf: no fit bytes: %ld + %ld + %d >= %d\n",
144 locked_queue_bytes, locked_queue_rbytes,
145 bytes, LFS_WAIT_BYTES);
146 }
147 #endif /* DEBUG_LFS */
148
149 return (count_fit && bytes_fit);
150 }
151
152 /* ARGSUSED */
153 int
154 lfs_reservebuf(struct lfs *fs, struct vnode *vp, struct vnode *vp2,
155 int n, int bytes)
156 {
157 KASSERT(locked_queue_rcount >= 0);
158 KASSERT(locked_queue_rbytes >= 0);
159
160 while (n > 0 && !lfs_fits_buf(fs, n, bytes)) {
161 int error;
162
163 lfs_flush(fs, 0);
164
165 error = tsleep(&locked_queue_count, PCATCH | PUSER,
166 "lfsresbuf", hz * LFS_BUFWAIT);
167 if (error && error != EWOULDBLOCK)
168 return error;
169 }
170
171 locked_queue_rcount += n;
172 locked_queue_rbytes += bytes;
173
174 KASSERT(locked_queue_rcount >= 0);
175 KASSERT(locked_queue_rbytes >= 0);
176
177 return 0;
178 }
179
180 /*
181 * Try to reserve some blocks, prior to performing a sensitive operation that
182 * requires the vnode lock to be honored. If there is not enough space, give
183 * up the vnode lock temporarily and wait for the space to become available.
184 *
185 * Called with vp locked. (Note nowever that if fsb < 0, vp is ignored.)
186 *
187 * XXX YAMT - it isn't safe to unlock vp here
188 * because the node might be modified while we sleep.
189 * (eg. cached states like i_offset might be stale,
190 * the vnode might be truncated, etc..)
191 * maybe we should have a way to restart the vnodeop (EVOPRESTART?)
192 * or rearrange vnodeop interface to leave vnode locking to file system
193 * specific code so that each file systems can have their own vnode locking and
194 * vnode re-using strategies.
195 */
196 int
197 lfs_reserveavail(struct lfs *fs, struct vnode *vp, struct vnode *vp2, int fsb)
198 {
199 CLEANERINFO *cip;
200 struct buf *bp;
201 int error, slept;
202
203 slept = 0;
204 while (fsb > 0 && !lfs_fits(fs, fsb + fs->lfs_ravail)) {
205 #if 0
206 /*
207 * XXX ideally, we should unlock vnodes here
208 * because we might sleep very long time.
209 */
210 VOP_UNLOCK(vp, 0);
211 if (vp2 != NULL) {
212 VOP_UNLOCK(vp2, 0);
213 }
214 #else
215 /*
216 * XXX since we'll sleep for cleaner with vnode lock holding,
217 * deadlock will occur if cleaner tries to lock the vnode.
218 * (eg. lfs_markv -> lfs_fastvget -> getnewvnode -> vclean)
219 */
220 #endif
221
222 if (!slept) {
223 #ifdef DEBUG
224 printf("lfs_reserve: waiting for %ld (bfree = %d,"
225 " est_bfree = %d)\n",
226 fsb + fs->lfs_ravail, fs->lfs_bfree,
227 LFS_EST_BFREE(fs));
228 #endif
229 }
230 ++slept;
231
232 /* Wake up the cleaner */
233 LFS_CLEANERINFO(cip, fs, bp);
234 LFS_SYNC_CLEANERINFO(cip, fs, bp, 0);
235 wakeup(&lfs_allclean_wakeup);
236 wakeup(&fs->lfs_nextseg);
237
238 error = tsleep(&fs->lfs_avail, PCATCH | PUSER, "lfs_reserve",
239 0);
240 #if 0
241 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX use lockstatus */
242 vn_lock(vp2, LK_EXCLUSIVE | LK_RETRY); /* XXX use lockstatus */
243 #endif
244 if (error)
245 return error;
246 }
247 #ifdef DEBUG
248 if (slept)
249 printf("lfs_reserve: woke up\n");
250 #endif
251 fs->lfs_ravail += fsb;
252
253 return 0;
254 }
255
256 #ifdef DIAGNOSTIC
257 int lfs_rescount;
258 int lfs_rescountdirop;
259 #endif
260
261 int
262 lfs_reserve(struct lfs *fs, struct vnode *vp, struct vnode *vp2, int fsb)
263 {
264 int error;
265 int cantwait;
266
267 KASSERT(fsb < 0 || VOP_ISLOCKED(vp));
268 KASSERT(vp2 == NULL || fsb < 0 || VOP_ISLOCKED(vp2));
269 KASSERT(vp2 == NULL || !(VTOI(vp2)->i_flag & IN_ADIROP));
270 KASSERT(vp2 == NULL || vp2 != fs->lfs_unlockvp);
271
272 cantwait = (VTOI(vp)->i_flag & IN_ADIROP) || fs->lfs_unlockvp == vp;
273 #ifdef DIAGNOSTIC
274 if (cantwait) {
275 if (fsb > 0)
276 lfs_rescountdirop++;
277 else if (fsb < 0)
278 lfs_rescountdirop--;
279 if (lfs_rescountdirop < 0)
280 panic("lfs_rescountdirop");
281 }
282 else {
283 if (fsb > 0)
284 lfs_rescount++;
285 else if (fsb < 0)
286 lfs_rescount--;
287 if (lfs_rescount < 0)
288 panic("lfs_rescount");
289 }
290 #endif
291 if (cantwait)
292 return 0;
293
294 /*
295 * XXX
296 * vref vnodes here so that cleaner doesn't try to reuse them.
297 * (see XXX comment in lfs_reserveavail)
298 */
299 lfs_vref(vp);
300 if (vp2 != NULL) {
301 lfs_vref(vp2);
302 }
303
304 error = lfs_reserveavail(fs, vp, vp2, fsb);
305 if (error)
306 goto done;
307
308 /*
309 * XXX just a guess. should be more precise.
310 */
311 error = lfs_reservebuf(fs, vp, vp2,
312 fragstoblks(fs, fsb), fsbtob(fs, fsb));
313 if (error)
314 lfs_reserveavail(fs, vp, vp2, -fsb);
315
316 done:
317 lfs_vunref(vp);
318 if (vp2 != NULL) {
319 lfs_vunref(vp2);
320 }
321
322 return error;
323 }
324
325 int
326 lfs_bwrite(void *v)
327 {
328 struct vop_bwrite_args /* {
329 struct buf *a_bp;
330 } */ *ap = v;
331 struct buf *bp = ap->a_bp;
332
333 #ifdef DIAGNOSTIC
334 if (VTOI(bp->b_vp)->i_lfs->lfs_ronly == 0 && (bp->b_flags & B_ASYNC)) {
335 panic("bawrite LFS buffer");
336 }
337 #endif /* DIAGNOSTIC */
338 return lfs_bwrite_ext(bp,0);
339 }
340
341 /*
342 * Determine if there is enough room currently available to write fsb
343 * blocks. We need enough blocks for the new blocks, the current
344 * inode blocks (including potentially the ifile inode), a summary block,
345 * and the segment usage table, plus an ifile block.
346 */
347 int
348 lfs_fits(struct lfs *fs, int fsb)
349 {
350 int needed;
351
352 needed = fsb + btofsb(fs, fs->lfs_sumsize) +
353 ((howmany(fs->lfs_uinodes + 1, INOPB(fs)) + fs->lfs_segtabsz +
354 1) << (fs->lfs_blktodb - fs->lfs_fsbtodb));
355
356 if (needed >= fs->lfs_avail) {
357 #ifdef DEBUG
358 printf("lfs_fits: no fit: fsb = %d, uinodes = %d, "
359 "needed = %d, avail = %d\n",
360 fsb, fs->lfs_uinodes, needed, fs->lfs_avail);
361 #endif
362 return 0;
363 }
364 return 1;
365 }
366
367 int
368 lfs_availwait(struct lfs *fs, int fsb)
369 {
370 int error;
371 CLEANERINFO *cip;
372 struct buf *cbp;
373
374 /* Push cleaner blocks through regardless */
375 simple_lock(&fs->lfs_interlock);
376 if (fs->lfs_seglock &&
377 fs->lfs_lockpid == curproc->p_pid &&
378 fs->lfs_sp->seg_flags & (SEGM_CLEAN | SEGM_FORCE_CKP)) {
379 simple_unlock(&fs->lfs_interlock);
380 return 0;
381 }
382 simple_unlock(&fs->lfs_interlock);
383
384 while (!lfs_fits(fs, fsb)) {
385 /*
386 * Out of space, need cleaner to run.
387 * Update the cleaner info, then wake it up.
388 * Note the cleanerinfo block is on the ifile
389 * so it CANT_WAIT.
390 */
391 LFS_CLEANERINFO(cip, fs, cbp);
392 LFS_SYNC_CLEANERINFO(cip, fs, cbp, 0);
393
394 printf("lfs_availwait: out of available space, "
395 "waiting on cleaner\n");
396
397 wakeup(&lfs_allclean_wakeup);
398 wakeup(&fs->lfs_nextseg);
399 #ifdef DIAGNOSTIC
400 if (fs->lfs_seglock && fs->lfs_lockpid == curproc->p_pid)
401 panic("lfs_availwait: deadlock");
402 #endif
403 error = tsleep(&fs->lfs_avail, PCATCH | PUSER, "cleaner", 0);
404 if (error)
405 return (error);
406 }
407 return 0;
408 }
409
410 int
411 lfs_bwrite_ext(struct buf *bp, int flags)
412 {
413 struct lfs *fs;
414 struct inode *ip;
415 int fsb, s;
416
417 KASSERT(bp->b_flags & B_BUSY);
418 KASSERT(flags & BW_CLEAN || !LFS_IS_MALLOC_BUF(bp));
419
420 /*
421 * Don't write *any* blocks if we're mounted read-only.
422 * In particular the cleaner can't write blocks either.
423 */
424 if (VTOI(bp->b_vp)->i_lfs->lfs_ronly) {
425 bp->b_flags &= ~(B_DELWRI | B_READ | B_ERROR);
426 LFS_UNLOCK_BUF(bp);
427 if (LFS_IS_MALLOC_BUF(bp))
428 bp->b_flags &= ~B_BUSY;
429 else
430 brelse(bp);
431 return EROFS;
432 }
433
434 /*
435 * Set the delayed write flag and use reassignbuf to move the buffer
436 * from the clean list to the dirty one.
437 *
438 * Set the B_LOCKED flag and unlock the buffer, causing brelse to move
439 * the buffer onto the LOCKED free list. This is necessary, otherwise
440 * getnewbuf() would try to reclaim the buffers using bawrite, which
441 * isn't going to work.
442 *
443 * XXX we don't let meta-data writes run out of space because they can
444 * come from the segment writer. We need to make sure that there is
445 * enough space reserved so that there's room to write meta-data
446 * blocks.
447 */
448 if (!(bp->b_flags & B_LOCKED)) {
449 fs = VFSTOUFS(bp->b_vp->v_mount)->um_lfs;
450 fsb = fragstofsb(fs, numfrags(fs, bp->b_bcount));
451
452 ip = VTOI(bp->b_vp);
453 if (flags & BW_CLEAN) {
454 LFS_SET_UINO(ip, IN_CLEANING);
455 } else {
456 LFS_SET_UINO(ip, IN_MODIFIED);
457 }
458 fs->lfs_avail -= fsb;
459 bp->b_flags |= B_DELWRI;
460
461 LFS_LOCK_BUF(bp);
462 bp->b_flags &= ~(B_READ | B_DONE | B_ERROR);
463 s = splbio();
464 reassignbuf(bp, bp->b_vp);
465 splx(s);
466 }
467
468 if (bp->b_flags & B_CALL)
469 bp->b_flags &= ~B_BUSY;
470 else
471 brelse(bp);
472
473 return (0);
474 }
475
476 void
477 lfs_flush_fs(struct lfs *fs, int flags)
478 {
479 if (fs->lfs_ronly)
480 return;
481
482 lfs_writer_enter(fs, "fldirop");
483
484 if (lfs_dostats)
485 ++lfs_stats.flush_invoked;
486 lfs_segwrite(fs->lfs_ivnode->v_mount, flags);
487
488 lfs_writer_leave(fs);
489 }
490
491 /*
492 * XXX
493 * This routine flushes buffers out of the B_LOCKED queue when LFS has too
494 * many locked down. Eventually the pageout daemon will simply call LFS
495 * when pages need to be reclaimed. Note, we have one static count of locked
496 * buffers, so we can't have more than a single file system. To make this
497 * work for multiple file systems, put the count into the mount structure.
498 */
499 void
500 lfs_flush(struct lfs *fs, int flags)
501 {
502 struct mount *mp, *nmp;
503
504 KDASSERT(fs == NULL || !LFS_SEGLOCK_HELD(fs));
505
506 if (lfs_dostats)
507 ++lfs_stats.write_exceeded;
508 if (lfs_writing && flags == 0) {/* XXX flags */
509 #ifdef DEBUG_LFS
510 printf("lfs_flush: not flushing because another flush is active\n");
511 #endif
512 return;
513 }
514 /* XXX MP */
515 while (lfs_writing && (flags & SEGM_WRITERD))
516 ltsleep(&lfs_writing, PRIBIO + 1, "lfsflush", 0, 0);
517 lfs_writing = 1;
518
519 simple_lock(&lfs_subsys_lock);
520 lfs_subsys_pages = 0; /* XXXUBC need a better way to count this */
521 simple_unlock(&lfs_subsys_lock);
522 wakeup(&lfs_subsys_pages);
523
524 simple_lock(&mountlist_slock);
525 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) {
526 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) {
527 nmp = mp->mnt_list.cqe_next;
528 continue;
529 }
530 if (strncmp(&mp->mnt_stat.f_fstypename[0], MOUNT_LFS, MFSNAMELEN) == 0)
531 lfs_flush_fs(VFSTOUFS(mp)->um_lfs, flags);
532 simple_lock(&mountlist_slock);
533 nmp = mp->mnt_list.cqe_next;
534 vfs_unbusy(mp);
535 }
536 simple_unlock(&mountlist_slock);
537 LFS_DEBUG_COUNTLOCKED("flush");
538
539 lfs_writing = 0;
540 wakeup(&lfs_writing);
541 }
542
543 #define INOCOUNT(fs) howmany((fs)->lfs_uinodes, INOPB(fs))
544 #define INOBYTES(fs) ((fs)->lfs_uinodes * sizeof (struct ufs1_dinode))
545
546 int
547 lfs_check(struct vnode *vp, daddr_t blkno, int flags)
548 {
549 int error;
550 struct lfs *fs;
551 struct inode *ip;
552 extern int lfs_dirvcount;
553
554 error = 0;
555 ip = VTOI(vp);
556
557 /* If out of buffers, wait on writer */
558 /* XXX KS - if it's the Ifile, we're probably the cleaner! */
559 if (ip->i_number == LFS_IFILE_INUM)
560 return 0;
561 /* If we're being called from inside a dirop, don't sleep */
562 if (ip->i_flag & IN_ADIROP)
563 return 0;
564
565 fs = ip->i_lfs;
566
567 /*
568 * If we would flush below, but dirops are active, sleep.
569 * Note that a dirop cannot ever reach this code!
570 */
571 simple_lock(&lfs_subsys_lock);
572 while (fs->lfs_dirops > 0 &&
573 (locked_queue_count + INOCOUNT(fs) > LFS_MAX_BUFS ||
574 locked_queue_bytes + INOBYTES(fs) > LFS_MAX_BYTES ||
575 lfs_subsys_pages > LFS_MAX_PAGES ||
576 lfs_dirvcount > LFS_MAX_DIROP || fs->lfs_diropwait > 0))
577 {
578 ++fs->lfs_diropwait;
579 ltsleep(&fs->lfs_writer, PRIBIO+1, "bufdirop", 0,
580 &lfs_subsys_lock);
581 --fs->lfs_diropwait;
582 }
583
584 #ifdef DEBUG_LFS_FLUSH
585 if (locked_queue_count + INOCOUNT(fs) > LFS_MAX_BUFS)
586 printf("lqc = %d, max %d\n", locked_queue_count + INOCOUNT(fs),
587 LFS_MAX_BUFS);
588 if (locked_queue_bytes + INOBYTES(fs) > LFS_MAX_BYTES)
589 printf("lqb = %ld, max %d\n", locked_queue_bytes + INOBYTES(fs),
590 LFS_MAX_BYTES);
591 if (lfs_subsys_pages > LFS_MAX_PAGES)
592 printf("lssp = %d, max %d\n", lfs_subsys_pages, LFS_MAX_PAGES);
593 if (lfs_dirvcount > LFS_MAX_DIROP)
594 printf("ldvc = %d, max %d\n", lfs_dirvcount, LFS_MAX_DIROP);
595 if (fs->lfs_diropwait > 0)
596 printf("ldvw = %d\n", fs->lfs_diropwait);
597 #endif
598
599 if (locked_queue_count + INOCOUNT(fs) > LFS_MAX_BUFS ||
600 locked_queue_bytes + INOBYTES(fs) > LFS_MAX_BYTES ||
601 lfs_subsys_pages > LFS_MAX_PAGES ||
602 lfs_dirvcount > LFS_MAX_DIROP || fs->lfs_diropwait > 0)
603 {
604 simple_unlock(&lfs_subsys_lock);
605 lfs_flush(fs, flags);
606 simple_lock(&lfs_subsys_lock);
607 }
608
609 while (locked_queue_count + INOCOUNT(fs) > LFS_WAIT_BUFS ||
610 locked_queue_bytes + INOBYTES(fs) > LFS_WAIT_BYTES ||
611 lfs_subsys_pages > LFS_WAIT_PAGES ||
612 lfs_dirvcount > LFS_MAX_DIROP)
613 {
614 simple_unlock(&lfs_subsys_lock);
615 if (lfs_dostats)
616 ++lfs_stats.wait_exceeded;
617 #ifdef DEBUG_LFS
618 printf("lfs_check: waiting: count=%d, bytes=%ld\n",
619 locked_queue_count, locked_queue_bytes);
620 #endif
621 error = tsleep(&locked_queue_count, PCATCH | PUSER,
622 "buffers", hz * LFS_BUFWAIT);
623 if (error != EWOULDBLOCK) {
624 simple_lock(&lfs_subsys_lock);
625 break;
626 }
627 /*
628 * lfs_flush might not flush all the buffers, if some of the
629 * inodes were locked or if most of them were Ifile blocks
630 * and we weren't asked to checkpoint. Try flushing again
631 * to keep us from blocking indefinitely.
632 */
633 if (locked_queue_count + INOCOUNT(fs) > LFS_MAX_BUFS ||
634 locked_queue_bytes + INOBYTES(fs) > LFS_MAX_BYTES)
635 {
636 lfs_flush(fs, flags | SEGM_CKP);
637 }
638 simple_lock(&lfs_subsys_lock);
639 }
640 simple_unlock(&lfs_subsys_lock);
641 return (error);
642 }
643
644 /*
645 * Allocate a new buffer header.
646 */
647 struct buf *
648 lfs_newbuf(struct lfs *fs, struct vnode *vp, daddr_t daddr, size_t size, int type)
649 {
650 struct buf *bp;
651 size_t nbytes;
652 int s;
653
654 nbytes = roundup(size, fsbtob(fs, 1));
655
656 s = splbio();
657 bp = pool_get(&bufpool, PR_WAITOK);
658 splx(s);
659 memset(bp, 0, sizeof(struct buf));
660 BUF_INIT(bp);
661 if (nbytes) {
662 bp->b_data = lfs_malloc(fs, nbytes, type);
663 /* memset(bp->b_data, 0, nbytes); */
664 }
665 #ifdef DIAGNOSTIC
666 if (vp == NULL)
667 panic("vp is NULL in lfs_newbuf");
668 if (bp == NULL)
669 panic("bp is NULL after malloc in lfs_newbuf");
670 #endif
671 s = splbio();
672 bgetvp(vp, bp);
673 splx(s);
674
675 bp->b_saveaddr = (caddr_t)fs;
676 bp->b_bufsize = size;
677 bp->b_bcount = size;
678 bp->b_lblkno = daddr;
679 bp->b_blkno = daddr;
680 bp->b_error = 0;
681 bp->b_resid = 0;
682 bp->b_iodone = lfs_callback;
683 bp->b_flags |= B_BUSY | B_CALL | B_NOCACHE;
684
685 return (bp);
686 }
687
688 void
689 lfs_freebuf(struct lfs *fs, struct buf *bp)
690 {
691 int s;
692
693 s = splbio();
694 if (bp->b_vp)
695 brelvp(bp);
696 if (!(bp->b_flags & B_INVAL)) { /* B_INVAL indicates a "fake" buffer */
697 lfs_free(fs, bp->b_data, LFS_NB_UNKNOWN);
698 bp->b_data = NULL;
699 }
700 pool_put(&bufpool, bp);
701 splx(s);
702 }
703
704 /*
705 * Definitions for the buffer free lists.
706 */
707 #define BQUEUES 4 /* number of free buffer queues */
708
709 #define BQ_LOCKED 0 /* super-blocks &c */
710 #define BQ_LRU 1 /* lru, useful buffers */
711 #define BQ_AGE 2 /* rubbish */
712 #define BQ_EMPTY 3 /* buffer headers with no memory */
713
714 extern TAILQ_HEAD(bqueues, buf) bufqueues[BQUEUES];
715
716 /*
717 * Return a count of buffers on the "locked" queue.
718 * Don't count malloced buffers, since they don't detract from the total.
719 */
720 void
721 lfs_countlocked(int *count, long *bytes, char *msg)
722 {
723 struct buf *bp;
724 int n = 0;
725 long int size = 0L;
726
727 for (bp = bufqueues[BQ_LOCKED].tqh_first; bp;
728 bp = bp->b_freelist.tqe_next) {
729 if (bp->b_flags & B_CALL)
730 continue;
731 n++;
732 size += bp->b_bufsize;
733 #ifdef DEBUG_LOCKED_LIST
734 if (n > nbuf)
735 panic("lfs_countlocked: this can't happen: more"
736 " buffers locked than exist");
737 #endif
738 }
739 #ifdef DEBUG_LOCKED_LIST
740 /* Theoretically this function never really does anything */
741 if (n != *count)
742 printf("lfs_countlocked: %s: adjusted buf count from %d to %d\n",
743 msg, *count, n);
744 if (size != *bytes)
745 printf("lfs_countlocked: %s: adjusted byte count from %ld to %ld\n",
746 msg, *bytes, size);
747 #endif
748 *count = n;
749 *bytes = size;
750 return;
751 }
752