1 /* $NetBSD: lfs_bio.c,v 1.150 2025/09/15 03:55:24 perseant Exp $ */ 2 3 /*- 4 * Copyright (c) 1999, 2000, 2001, 2002, 2003, 2008 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Konrad E. Schroder <perseant (at) hhhh.org>. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 /* 32 * Copyright (c) 1991, 1993 33 * The Regents of the University of California. All rights reserved. 34 * 35 * Redistribution and use in source and binary forms, with or without 36 * modification, are permitted provided that the following conditions 37 * are met: 38 * 1. Redistributions of source code must retain the above copyright 39 * notice, this list of conditions and the following disclaimer. 40 * 2. Redistributions in binary form must reproduce the above copyright 41 * notice, this list of conditions and the following disclaimer in the 42 * documentation and/or other materials provided with the distribution. 43 * 3. Neither the name of the University nor the names of its contributors 44 * may be used to endorse or promote products derived from this software 45 * without specific prior written permission. 46 * 47 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 48 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 49 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 50 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 51 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 52 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 53 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 54 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 55 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 56 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 57 * SUCH DAMAGE. 58 * 59 * @(#)lfs_bio.c 8.10 (Berkeley) 6/10/95 60 */ 61 62 #include <sys/cdefs.h> 63 __KERNEL_RCSID(0, "$NetBSD: lfs_bio.c,v 1.150 2025/09/15 03:55:24 perseant Exp $"); 64 65 #include <sys/param.h> 66 #include <sys/systm.h> 67 #include <sys/proc.h> 68 #include <sys/buf.h> 69 #include <sys/vnode.h> 70 #include <sys/resourcevar.h> 71 #include <sys/mount.h> 72 #include <sys/kernel.h> 73 #include <sys/kauth.h> 74 75 #include <ufs/lfs/ulfs_inode.h> 76 #include <ufs/lfs/ulfsmount.h> 77 #include <ufs/lfs/ulfs_extern.h> 78 79 #include <ufs/lfs/lfs.h> 80 #include <ufs/lfs/lfs_accessors.h> 81 #include <ufs/lfs/lfs_extern.h> 82 #include <ufs/lfs/lfs_kernel.h> 83 84 #include <uvm/uvm_extern.h> 85 86 /* 87 * LFS block write function. 88 * 89 * XXX 90 * No write cost accounting is done. 91 * This is almost certainly wrong for synchronous operations and NFS. 92 * 93 * protected by lfs_lock. 94 */ 95 int locked_queue_count = 0; /* Count of locked-down buffers. */ 96 long locked_queue_bytes = 0L; /* Total size of locked buffers. */ 97 int lfs_subsys_pages = 0L; /* Total number LFS-written pages */ 98 int lfs_fs_pagetrip = 0; /* # of pages to trip per-fs write */ 99 int lfs_writing = 0; /* Set if already kicked off a writer 100 because of buffer space */ 101 int locked_queue_waiters = 0; /* Number of processes waiting on lq */ 102 103 /* Lock and condition variables for above. */ 104 kcondvar_t locked_queue_cv; 105 kcondvar_t lfs_writing_cv; 106 kmutex_t lfs_lock; 107 extern kcondvar_t lfs_writerd_cv; 108 109 extern int lfs_dostats; 110 111 /* 112 * reserved number/bytes of locked buffers 113 */ 114 int locked_queue_rcount = 0; 115 long locked_queue_rbytes = 0L; 116 117 static int lfs_fits_buf(struct lfs *, int, int); 118 static int lfs_reservebuf(struct lfs *, struct vnode *vp, struct vnode *vp2, 119 int, int); 120 static int lfs_reserveavail(struct lfs *, struct vnode *vp, struct vnode *vp2, 121 int); 122 123 static int 124 lfs_fits_buf(struct lfs *fs, int n, int bytes) 125 { 126 int count_fit, bytes_fit; 127 128 ASSERT_NO_SEGLOCK(fs); 129 KASSERT(mutex_owned(&lfs_lock)); 130 131 count_fit = 132 (locked_queue_count + locked_queue_rcount + n <= LFS_WAIT_BUFS); 133 bytes_fit = 134 (locked_queue_bytes + locked_queue_rbytes + bytes <= LFS_WAIT_BYTES); 135 136 #ifdef DEBUG 137 if (!count_fit) { 138 DLOG((DLOG_AVAIL, "lfs_fits_buf: no fit count: %d + %d + %d >= %d\n", 139 locked_queue_count, locked_queue_rcount, 140 n, LFS_WAIT_BUFS)); 141 } 142 if (!bytes_fit) { 143 DLOG((DLOG_AVAIL, "lfs_fits_buf: no fit bytes: %ld + %ld + %d >= %ld\n", 144 locked_queue_bytes, locked_queue_rbytes, 145 bytes, LFS_WAIT_BYTES)); 146 } 147 #endif /* DEBUG */ 148 149 return (count_fit && bytes_fit); 150 } 151 152 /* ARGSUSED */ 153 static int 154 lfs_reservebuf(struct lfs *fs, struct vnode *vp, 155 struct vnode *vp2, int n, int bytes) 156 { 157 int cantwait; 158 159 ASSERT_MAYBE_SEGLOCK(fs); 160 KASSERT(locked_queue_rcount >= 0); 161 KASSERT(locked_queue_rbytes >= 0); 162 163 cantwait = (VTOI(vp)->i_state & IN_ADIROP) || fs->lfs_unlockvp == vp; 164 mutex_enter(&lfs_lock); 165 while (!cantwait && n > 0 && !lfs_fits_buf(fs, n, bytes)) { 166 int error; 167 168 DLOG((DLOG_FLUSH, "lfs_reservebuf: flush filesystem %p with checkpoint\n", fs)); 169 lfs_flush(fs, SEGM_CKP, 0); 170 171 DLOG((DLOG_AVAIL, "lfs_reservebuf: waiting: count=%d, bytes=%ld\n", 172 locked_queue_count, locked_queue_bytes)); 173 ++locked_queue_waiters; 174 cv_broadcast(&lfs_writerd_cv); 175 error = cv_timedwait_sig(&locked_queue_cv, &lfs_lock, 176 hz * LFS_BUFWAIT); 177 --locked_queue_waiters; 178 if (error && error != EWOULDBLOCK) { 179 mutex_exit(&lfs_lock); 180 return error; 181 } 182 } 183 184 locked_queue_rcount += n; 185 locked_queue_rbytes += bytes; 186 187 if (n < 0 && locked_queue_waiters > 0) { 188 DLOG((DLOG_AVAIL, "lfs_reservebuf: broadcast: count=%d, bytes=%ld\n", 189 locked_queue_count, locked_queue_bytes)); 190 cv_broadcast(&locked_queue_cv); 191 } 192 193 mutex_exit(&lfs_lock); 194 195 KASSERT(locked_queue_rcount >= 0); 196 KASSERT(locked_queue_rbytes >= 0); 197 198 return 0; 199 } 200 201 /* 202 * Try to reserve some blocks, prior to performing a sensitive operation that 203 * requires the vnode lock to be honored. If there is not enough space, wait 204 * for the space to become available. 205 * 206 * Called with vp locked. (Note nowever that if fsb < 0, vp is ignored.) 207 */ 208 static int 209 lfs_reserveavail(struct lfs *fs, struct vnode *vp, 210 struct vnode *vp2, int fsb) 211 { 212 CLEANERINFO *cip; 213 struct buf *bp; 214 int error, slept; 215 int cantwait; 216 217 ASSERT_MAYBE_SEGLOCK(fs); 218 slept = 0; 219 mutex_enter(&lfs_lock); 220 cantwait = (VTOI(vp)->i_state & IN_ADIROP) || fs->lfs_unlockvp == vp; 221 while (!cantwait && fsb > 0 && 222 !lfs_fits(fs, fsb + fs->lfs_ravail + fs->lfs_favail)) { 223 mutex_exit(&lfs_lock); 224 225 if (!slept) { 226 DLOG((DLOG_AVAIL, "lfs_reserve: waiting for %ld (bfree = %jd," 227 " est_bfree = %jd)\n", 228 fsb + fs->lfs_ravail + fs->lfs_favail, 229 (intmax_t)lfs_sb_getbfree(fs), 230 (intmax_t)LFS_EST_BFREE(fs))); 231 } 232 ++slept; 233 234 /* Wake up the cleaner */ 235 LFS_CLEANERINFO(cip, fs, bp); 236 LFS_SYNC_CLEANERINFO(cip, fs, bp, 0); 237 lfs_wakeup_cleaner(fs); 238 239 mutex_enter(&lfs_lock); 240 /* Cleaner might have run while we were reading, check again */ 241 if (lfs_fits(fs, fsb + fs->lfs_ravail + fs->lfs_favail)) 242 break; 243 244 error = mtsleep(&fs->lfs_availsleep, PCATCH | PUSER, 245 "lfs_reserve", 0, &lfs_lock); 246 if (error) { 247 mutex_exit(&lfs_lock); 248 return error; 249 } 250 } 251 #ifdef DEBUG 252 if (slept) { 253 DLOG((DLOG_AVAIL, "lfs_reserve: woke up\n")); 254 } 255 #endif 256 fs->lfs_ravail += fsb; 257 mutex_exit(&lfs_lock); 258 259 return 0; 260 } 261 262 #ifdef DIAGNOSTIC 263 int lfs_rescount; 264 int lfs_rescountdirop; 265 #endif 266 267 int 268 lfs_reserve(struct lfs *fs, struct vnode *vp, struct vnode *vp2, int fsb) 269 { 270 int error; 271 272 ASSERT_MAYBE_SEGLOCK(fs); 273 if (vp2) { 274 /* Make sure we're not in the process of reclaiming vp2 */ 275 mutex_enter(&lfs_lock); 276 while(fs->lfs_flags & LFS_UNDIROP) { 277 mtsleep(&fs->lfs_flags, PRIBIO + 1, "lfsrundirop", 0, 278 &lfs_lock); 279 } 280 mutex_exit(&lfs_lock); 281 } 282 283 KASSERT(fsb < 0 || VOP_ISLOCKED(vp)); 284 KASSERT(vp2 == NULL || fsb < 0 || VOP_ISLOCKED(vp2)); 285 KASSERT(vp2 == NULL || vp2 != fs->lfs_unlockvp); 286 287 #ifdef DIAGNOSTIC 288 mutex_enter(&lfs_lock); 289 if (fsb > 0) 290 lfs_rescount++; 291 else if (fsb < 0) 292 lfs_rescount--; 293 if (lfs_rescount < 0) 294 panic("lfs_rescount"); 295 mutex_exit(&lfs_lock); 296 #endif 297 298 error = lfs_reserveavail(fs, vp, vp2, fsb); 299 if (error) 300 return error; 301 302 /* 303 * XXX just a guess. should be more precise. 304 */ 305 error = lfs_reservebuf(fs, vp, vp2, fsb, lfs_fsbtob(fs, fsb)); 306 if (error) 307 lfs_reserveavail(fs, vp, vp2, -fsb); 308 309 return error; 310 } 311 312 int 313 lfs_max_bufs(void) 314 { 315 316 return LFS_MAX_RESOURCE(buf_nbuf(), 1); 317 } 318 319 int 320 lfs_wait_bufs(void) 321 { 322 323 return LFS_WAIT_RESOURCE(buf_nbuf(), 1); 324 } 325 326 int 327 lfs_bwrite(void *v) 328 { 329 struct vop_bwrite_args /* { 330 struct vnode *a_vp; 331 struct buf *a_bp; 332 } */ *ap = v; 333 struct buf *bp = ap->a_bp; 334 335 KASSERTMSG((VTOI(bp->b_vp)->i_lfs->lfs_ronly || 336 !(bp->b_flags & B_ASYNC)), 337 "bawrite LFS buffer"); 338 return lfs_bwrite_ext(bp, 0); 339 } 340 341 /* 342 * Determine if there is enough room currently available to write fsb 343 * blocks. We need enough blocks for the new blocks, the current 344 * inode blocks (including potentially the ifile inode), a summary block, 345 * and the segment usage table, plus an ifile block. 346 */ 347 int 348 lfs_fits(struct lfs *fs, int fsb) 349 { 350 int64_t needed; 351 352 ASSERT_NO_SEGLOCK(fs); 353 needed = fsb + lfs_btofsb(fs, lfs_sb_getsumsize(fs)) + 354 ((howmany(lfs_sb_getuinodes(fs) + 1, LFS_INOPB(fs)) + 355 lfs_sb_getsegtabsz(fs) + 356 1) << (lfs_sb_getbshift(fs) - lfs_sb_getffshift(fs))); 357 358 if (needed >= lfs_sb_getavail(fs)) { 359 #ifdef DEBUG 360 DLOG((DLOG_AVAIL, "lfs_fits: no fit: fsb = %ld, uinodes = %ld, " 361 "needed = %jd, avail = %jd\n", 362 (long)fsb, (long)lfs_sb_getuinodes(fs), (intmax_t)needed, 363 (intmax_t)lfs_sb_getavail(fs))); 364 #endif 365 return 0; 366 } 367 return 1; 368 } 369 370 int 371 lfs_availwait(struct lfs *fs, int fsb) 372 { 373 int error; 374 CLEANERINFO *cip; 375 struct buf *cbp; 376 377 ASSERT_NO_SEGLOCK(fs); 378 /* Push cleaner blocks through regardless */ 379 mutex_enter(&lfs_lock); 380 if (LFS_SEGLOCK_HELD(fs) && 381 fs->lfs_sp->seg_flags & (SEGM_CLEAN | SEGM_FORCE_CKP)) { 382 mutex_exit(&lfs_lock); 383 return 0; 384 } 385 mutex_exit(&lfs_lock); 386 387 while (!lfs_fits(fs, fsb)) { 388 /* 389 * Out of space, need cleaner to run. 390 * Update the cleaner info, then wake it up. 391 * Note the cleanerinfo block is on the ifile 392 * so it CANT_WAIT. 393 */ 394 LFS_CLEANERINFO(cip, fs, cbp); 395 LFS_SYNC_CLEANERINFO(cip, fs, cbp, 0); 396 397 #ifdef DEBUG 398 DLOG((DLOG_AVAIL, "lfs_availwait: out of available space, " 399 "waiting on cleaner\n")); 400 #endif 401 402 lfs_wakeup_cleaner(fs); 403 KASSERTMSG(!LFS_SEGLOCK_HELD(fs), "lfs_availwait: deadlock"); 404 error = tsleep(&fs->lfs_availsleep, PCATCH | PUSER, 405 "cleaner", 0); 406 if (error) 407 return (error); 408 } 409 return 0; 410 } 411 412 int 413 lfs_bwrite_ext(struct buf *bp, int flags) 414 { 415 struct lfs *fs; 416 struct inode *ip; 417 struct vnode *vp; 418 int fsb; 419 420 vp = bp->b_vp; 421 fs = VFSTOULFS(vp->v_mount)->um_lfs; 422 423 ASSERT_MAYBE_SEGLOCK(fs); 424 KASSERT(bp->b_cflags & BC_BUSY); 425 KASSERT(flags & BW_CLEAN || !LFS_IS_MALLOC_BUF(bp)); 426 KASSERT((bp->b_flags & B_LOCKED) || !(bp->b_oflags & BO_DELWRI)); 427 428 /* 429 * Don't write *any* blocks if we're mounted read-only, or 430 * if we are "already unmounted". 431 * 432 * In particular the cleaner can't write blocks either. 433 */ 434 if (fs->lfs_ronly || (lfs_sb_getpflags(fs) & LFS_PF_CLEAN)) { 435 bp->b_oflags &= ~BO_DELWRI; 436 bp->b_flags |= B_READ; /* XXX is this right? --ks */ 437 bp->b_error = 0; 438 mutex_enter(&bufcache_lock); 439 LFS_UNLOCK_BUF(bp); 440 if (LFS_IS_MALLOC_BUF(bp)) 441 bp->b_cflags &= ~BC_BUSY; 442 else 443 brelsel(bp, 0); 444 mutex_exit(&bufcache_lock); 445 return (fs->lfs_ronly ? EROFS : 0); 446 } 447 448 /* 449 * Set the delayed write flag and use reassignbuf to move the buffer 450 * from the clean list to the dirty one. 451 * 452 * Set the B_LOCKED flag and unlock the buffer, causing brelse to move 453 * the buffer onto the LOCKED free list. This is necessary, otherwise 454 * getnewbuf() would try to reclaim the buffers using bawrite, which 455 * isn't going to work. 456 * 457 * XXX we don't let meta-data writes run out of space because they can 458 * come from the segment writer. We need to make sure that there is 459 * enough space reserved so that there's room to write meta-data 460 * blocks. 461 */ 462 if ((bp->b_flags & B_LOCKED) == 0) { 463 fsb = lfs_numfrags(fs, bp->b_bcount); 464 465 ip = VTOI(vp); 466 mutex_enter(&lfs_lock); 467 if (flags & BW_CLEAN) { 468 LFS_SET_UINO(ip, IN_CLEANING); 469 } else { 470 LFS_SET_UINO(ip, IN_MODIFIED); 471 } 472 mutex_exit(&lfs_lock); 473 lfs_sb_subavail(fs, fsb); 474 475 mutex_enter(&bufcache_lock); 476 mutex_enter(vp->v_interlock); 477 bp->b_oflags = (bp->b_oflags | BO_DELWRI) & ~BO_DONE; 478 LFS_LOCK_BUF(bp); 479 bp->b_flags &= ~B_READ; 480 bp->b_error = 0; 481 reassignbuf(bp, bp->b_vp); 482 mutex_exit(vp->v_interlock); 483 } else { 484 mutex_enter(&bufcache_lock); 485 } 486 487 if (bp->b_iodone != NULL) 488 bp->b_cflags &= ~BC_BUSY; 489 else 490 brelsel(bp, 0); 491 mutex_exit(&bufcache_lock); 492 493 return (0); 494 } 495 496 /* 497 * Called and return with the lfs_lock held. 498 */ 499 void 500 lfs_flush_fs(struct lfs *fs, int flags) 501 { 502 ASSERT_NO_SEGLOCK(fs); 503 KASSERT(mutex_owned(&lfs_lock)); 504 if (fs->lfs_ronly) 505 return; 506 507 if (lfs_dostats) 508 ++lfs_stats.flush_invoked; 509 510 fs->lfs_pdflush = 0; 511 mutex_exit(&lfs_lock); 512 lfs_writer_enter(fs, "fldirop"); 513 lfs_segwrite(fs->lfs_ivnode->v_mount, flags); 514 lfs_writer_leave(fs); 515 mutex_enter(&lfs_lock); 516 fs->lfs_favail = 0; /* XXX */ 517 } 518 519 /* 520 * This routine initiates segment writes when LFS is consuming too many 521 * resources. Ideally the pageout daemon would be able to direct LFS 522 * more subtly. 523 * XXX We have one static count of locked buffers; 524 * XXX need to think more about the multiple filesystem case. 525 * 526 * Called and return with lfs_lock held. 527 * If fs != NULL, we hold the segment lock for fs. 528 */ 529 void 530 lfs_flush(struct lfs *fs, int flags, int only_onefs) 531 { 532 extern u_int64_t locked_fakequeue_count; 533 mount_iterator_t *iter; 534 struct mount *mp; 535 struct lfs *tfs; 536 537 KASSERT(mutex_owned(&lfs_lock)); 538 KDASSERT(fs == NULL || !LFS_SEGLOCK_HELD(fs)); 539 KASSERT(!(fs == NULL && only_onefs)); 540 541 if (lfs_dostats) 542 ++lfs_stats.write_exceeded; 543 if (lfs_writing && !(flags & (SEGM_SYNC|SEGM_CKP))) { 544 DLOG((DLOG_FLUSH, "lfs_flush: not flushing because another flush is active\n")); 545 return; 546 } 547 while (lfs_writing) 548 cv_wait(&lfs_writing_cv, &lfs_lock); 549 lfs_writing = 1; 550 551 mutex_exit(&lfs_lock); 552 553 if (fs != NULL) { 554 if (!(fs->lfs_flags & LFS_NOTYET) 555 && vfs_busy(fs->lfs_ivnode->v_mount)) 556 goto errout; 557 mutex_enter(&lfs_lock); 558 lfs_flush_fs(fs, flags); 559 mutex_exit(&lfs_lock); 560 if (!(fs->lfs_flags & LFS_NOTYET)) 561 vfs_unbusy(fs->lfs_ivnode->v_mount); 562 } 563 if (!only_onefs) { 564 locked_fakequeue_count = 0; 565 mountlist_iterator_init(&iter); 566 while ((mp = mountlist_iterator_next(iter)) != NULL) { 567 if (strncmp(&mp->mnt_stat.f_fstypename[0], MOUNT_LFS, 568 sizeof(mp->mnt_stat.f_fstypename)) == 0) { 569 tfs = VFSTOULFS(mp)->um_lfs; 570 if (tfs == fs) 571 continue; 572 mutex_enter(&lfs_lock); 573 lfs_flush_fs(tfs, flags); 574 mutex_exit(&lfs_lock); 575 } 576 } 577 mountlist_iterator_destroy(iter); 578 } 579 wakeup(&lfs_subsys_pages); 580 581 errout: 582 mutex_enter(&lfs_lock); 583 KASSERT(lfs_writing); 584 lfs_writing = 0; 585 wakeup(&lfs_writing); 586 } 587 588 #define INOCOUNT(fs) howmany(lfs_sb_getuinodes(fs), LFS_INOPB(fs)) 589 #define INOBYTES(fs) (lfs_sb_getuinodes(fs) * DINOSIZE(fs)) 590 591 /* 592 * make sure that we don't have too many locked buffers. 593 * flush buffers if needed. 594 */ 595 int 596 lfs_check(struct vnode *vp, daddr_t blkno, int flags) 597 { 598 int error; 599 struct lfs *fs; 600 struct inode *ip; 601 extern kcondvar_t lfs_writerd_cv; 602 603 error = 0; 604 ip = VTOI(vp); 605 606 /* If out of buffers, wait on writer */ 607 /* XXX KS - if it's the Ifile, we're probably the cleaner! */ 608 if (ip->i_number == LFS_IFILE_INUM) 609 return 0; 610 /* If we're being called from inside a dirop, don't sleep */ 611 if (ip->i_state & IN_ADIROP) 612 return 0; 613 614 fs = ip->i_lfs; 615 616 ASSERT_NO_SEGLOCK(fs); 617 618 /* 619 * If we would flush below, but dirops are active, sleep. 620 * Note that a dirop cannot ever reach this code! 621 */ 622 mutex_enter(&lfs_lock); 623 while (fs->lfs_dirops > 0 && 624 (locked_queue_count + INOCOUNT(fs) > LFS_WAIT_BUFS || 625 locked_queue_bytes + INOBYTES(fs) > LFS_WAIT_BYTES || 626 lfs_subsys_pages > LFS_WAIT_PAGES || 627 fs->lfs_dirvcount > LFS_MAX_FSDIROP(fs) || 628 lfs_dirvcount > LFS_MAX_DIROP || fs->lfs_diropwait > 0)) 629 { 630 ++fs->lfs_diropwait; 631 mtsleep(&fs->lfs_writer, PRIBIO+1, "bufdirop", 0, 632 &lfs_lock); 633 --fs->lfs_diropwait; 634 } 635 636 #ifdef DEBUG 637 if (locked_queue_count + INOCOUNT(fs) > LFS_MAX_BUFS) 638 DLOG((DLOG_FLUSH, "lfs_check: lqc = %d, max %d\n", 639 locked_queue_count + INOCOUNT(fs), LFS_MAX_BUFS)); 640 if (locked_queue_bytes + INOBYTES(fs) > LFS_MAX_BYTES) 641 DLOG((DLOG_FLUSH, "lfs_check: lqb = %ld, max %ld\n", 642 locked_queue_bytes + INOBYTES(fs), LFS_MAX_BYTES)); 643 if (lfs_subsys_pages > LFS_MAX_PAGES) 644 DLOG((DLOG_FLUSH, "lfs_check: lssp = %d, max %d\n", 645 lfs_subsys_pages, LFS_MAX_PAGES)); 646 if (lfs_fs_pagetrip && fs->lfs_pages > lfs_fs_pagetrip) 647 DLOG((DLOG_FLUSH, "lfs_check: fssp = %d, trip at %d\n", 648 fs->lfs_pages, lfs_fs_pagetrip)); 649 if (lfs_dirvcount > LFS_MAX_DIROP) 650 DLOG((DLOG_FLUSH, "lfs_check: ldvc = %d, max %d\n", 651 lfs_dirvcount, LFS_MAX_DIROP)); 652 if (fs->lfs_dirvcount > LFS_MAX_FSDIROP(fs)) 653 DLOG((DLOG_FLUSH, "lfs_check: lfdvc = %d, max %d\n", 654 fs->lfs_dirvcount, LFS_MAX_FSDIROP(fs))); 655 if (fs->lfs_diropwait > 0) 656 DLOG((DLOG_FLUSH, "lfs_check: ldvw = %d\n", 657 fs->lfs_diropwait)); 658 #endif 659 660 /* If there are too many pending dirops, we have to flush them. */ 661 if (fs->lfs_dirvcount > LFS_MAX_FSDIROP(fs) || 662 lfs_dirvcount > LFS_MAX_DIROP || fs->lfs_diropwait > 0) { 663 KASSERT(fs->lfs_dirops == 0); 664 fs->lfs_writer++; 665 mutex_exit(&lfs_lock); 666 lfs_flush_dirops(fs); 667 mutex_enter(&lfs_lock); 668 if (--fs->lfs_writer == 0) 669 cv_broadcast(&fs->lfs_diropscv); 670 KASSERT(fs->lfs_dirops == 0); 671 } else if (locked_queue_count + INOCOUNT(fs) > LFS_MAX_BUFS || 672 locked_queue_bytes + INOBYTES(fs) > LFS_MAX_BYTES || 673 lfs_subsys_pages > LFS_MAX_PAGES || 674 fs->lfs_dirvcount > LFS_MAX_FSDIROP(fs) || 675 lfs_dirvcount > LFS_MAX_DIROP || fs->lfs_diropwait > 0) { 676 lfs_flush(fs, flags, 0); 677 } else if (lfs_fs_pagetrip && fs->lfs_pages > lfs_fs_pagetrip) { 678 /* 679 * If we didn't flush the whole thing, some filesystems 680 * still might want to be flushed. 681 */ 682 ++fs->lfs_pdflush; 683 cv_broadcast(&lfs_writerd_cv); 684 } 685 686 while (locked_queue_count + INOCOUNT(fs) >= LFS_WAIT_BUFS || 687 locked_queue_bytes + INOBYTES(fs) >= LFS_WAIT_BYTES || 688 lfs_subsys_pages > LFS_WAIT_PAGES || 689 fs->lfs_dirvcount > LFS_MAX_FSDIROP(fs) || 690 lfs_dirvcount > LFS_MAX_DIROP) { 691 692 if (lfs_dostats) 693 ++lfs_stats.wait_exceeded; 694 DLOG((DLOG_AVAIL, "lfs_check: waiting: count=%d, bytes=%ld\n", 695 locked_queue_count, locked_queue_bytes)); 696 ++locked_queue_waiters; 697 cv_broadcast(&lfs_writerd_cv); 698 error = cv_timedwait_sig(&locked_queue_cv, &lfs_lock, 699 hz * LFS_BUFWAIT); 700 --locked_queue_waiters; 701 if (error != EWOULDBLOCK) 702 break; 703 704 /* 705 * lfs_flush might not flush all the buffers, if some of the 706 * inodes were locked or if most of them were Ifile blocks 707 * and we weren't asked to checkpoint. Try flushing again 708 * to keep us from blocking indefinitely. 709 */ 710 if (locked_queue_count + INOCOUNT(fs) >= LFS_MAX_BUFS || 711 locked_queue_bytes + INOBYTES(fs) >= LFS_MAX_BYTES) { 712 lfs_flush(fs, flags | SEGM_CKP, 0); 713 } 714 } 715 mutex_exit(&lfs_lock); 716 return (error); 717 } 718 719 /* 720 * Allocate a new buffer header. 721 */ 722 struct buf * 723 lfs_newbuf(struct lfs *fs, struct vnode *vp, daddr_t daddr, size_t size, int type) 724 { 725 struct buf *bp; 726 size_t nbytes; 727 728 ASSERT_MAYBE_SEGLOCK(fs); 729 nbytes = roundup(size, lfs_fsbtob(fs, 1)); 730 731 bp = getiobuf(NULL, true); 732 if (nbytes) { 733 bp->b_data = lfs_malloc(fs, nbytes, type); 734 /* memset(bp->b_data, 0, nbytes); */ 735 } 736 KASSERT(vp != NULL); 737 KASSERT(bp != NULL); 738 739 bp->b_bufsize = size; 740 bp->b_bcount = size; 741 bp->b_lblkno = daddr; 742 bp->b_blkno = daddr; 743 bp->b_error = 0; 744 bp->b_resid = 0; 745 bp->b_iodone = lfs_free_aiodone; 746 bp->b_cflags |= BC_BUSY | BC_NOCACHE; 747 bp->b_private = fs; 748 749 mutex_enter(&bufcache_lock); 750 mutex_enter(vp->v_interlock); 751 bgetvp(vp, bp); 752 mutex_exit(vp->v_interlock); 753 mutex_exit(&bufcache_lock); 754 755 return (bp); 756 } 757 758 void 759 lfs_freebuf(struct lfs *fs, struct buf *bp) 760 { 761 struct vnode *vp; 762 763 if ((vp = bp->b_vp) != NULL) { 764 mutex_enter(&bufcache_lock); 765 mutex_enter(vp->v_interlock); 766 brelvp(bp); 767 mutex_exit(vp->v_interlock); 768 mutex_exit(&bufcache_lock); 769 } 770 if (!(bp->b_cflags & BC_INVAL)) { /* BC_INVAL indicates a "fake" buffer */ 771 lfs_free(fs, bp->b_data, LFS_NB_UNKNOWN); 772 bp->b_data = NULL; 773 } 774 putiobuf(bp); 775 } 776 777 int 778 lfs_wait_pages(void) 779 { 780 int active, inactive; 781 782 uvm_estimatepageable(&active, &inactive); 783 return LFS_WAIT_RESOURCE(active + inactive + uvm_availmem(false), 1); 784 } 785 786 int 787 lfs_max_pages(void) 788 { 789 int active, inactive; 790 791 uvm_estimatepageable(&active, &inactive); 792 return LFS_MAX_RESOURCE(active + inactive + uvm_availmem(false), 1); 793 } 794