1 1.150 perseant /* $NetBSD: lfs_bio.c,v 1.150 2025/09/15 03:55:24 perseant Exp $ */ 2 1.2 cgd 3 1.7 perseant /*- 4 1.108 ad * Copyright (c) 1999, 2000, 2001, 2002, 2003, 2008 The NetBSD Foundation, Inc. 5 1.7 perseant * All rights reserved. 6 1.7 perseant * 7 1.7 perseant * This code is derived from software contributed to The NetBSD Foundation 8 1.7 perseant * by Konrad E. Schroder <perseant (at) hhhh.org>. 9 1.7 perseant * 10 1.7 perseant * Redistribution and use in source and binary forms, with or without 11 1.7 perseant * modification, are permitted provided that the following conditions 12 1.7 perseant * are met: 13 1.7 perseant * 1. Redistributions of source code must retain the above copyright 14 1.7 perseant * notice, this list of conditions and the following disclaimer. 15 1.7 perseant * 2. Redistributions in binary form must reproduce the above copyright 16 1.7 perseant * notice, this list of conditions and the following disclaimer in the 17 1.7 perseant * documentation and/or other materials provided with the distribution. 18 1.7 perseant * 19 1.7 perseant * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 1.7 perseant * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 1.7 perseant * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 1.7 perseant * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 1.7 perseant * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 1.7 perseant * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 1.7 perseant * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 1.7 perseant * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 1.7 perseant * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 1.7 perseant * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 1.7 perseant * POSSIBILITY OF SUCH DAMAGE. 30 1.7 perseant */ 31 1.1 mycroft /* 32 1.1 mycroft * Copyright (c) 1991, 1993 33 1.1 mycroft * The Regents of the University of California. All rights reserved. 34 1.1 mycroft * 35 1.1 mycroft * Redistribution and use in source and binary forms, with or without 36 1.1 mycroft * modification, are permitted provided that the following conditions 37 1.1 mycroft * are met: 38 1.1 mycroft * 1. Redistributions of source code must retain the above copyright 39 1.1 mycroft * notice, this list of conditions and the following disclaimer. 40 1.1 mycroft * 2. Redistributions in binary form must reproduce the above copyright 41 1.1 mycroft * notice, this list of conditions and the following disclaimer in the 42 1.1 mycroft * documentation and/or other materials provided with the distribution. 43 1.72 agc * 3. Neither the name of the University nor the names of its contributors 44 1.1 mycroft * may be used to endorse or promote products derived from this software 45 1.1 mycroft * without specific prior written permission. 46 1.1 mycroft * 47 1.1 mycroft * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 48 1.1 mycroft * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 49 1.1 mycroft * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 50 1.1 mycroft * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 51 1.1 mycroft * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 52 1.1 mycroft * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 53 1.1 mycroft * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 54 1.1 mycroft * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 55 1.1 mycroft * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 56 1.1 mycroft * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 57 1.1 mycroft * SUCH DAMAGE. 58 1.1 mycroft * 59 1.6 fvdl * @(#)lfs_bio.c 8.10 (Berkeley) 6/10/95 60 1.1 mycroft */ 61 1.39 lukem 62 1.39 lukem #include <sys/cdefs.h> 63 1.150 perseant __KERNEL_RCSID(0, "$NetBSD: lfs_bio.c,v 1.150 2025/09/15 03:55:24 perseant Exp $"); 64 1.1 mycroft 65 1.1 mycroft #include <sys/param.h> 66 1.5 christos #include <sys/systm.h> 67 1.1 mycroft #include <sys/proc.h> 68 1.1 mycroft #include <sys/buf.h> 69 1.1 mycroft #include <sys/vnode.h> 70 1.1 mycroft #include <sys/resourcevar.h> 71 1.1 mycroft #include <sys/mount.h> 72 1.1 mycroft #include <sys/kernel.h> 73 1.93 elad #include <sys/kauth.h> 74 1.1 mycroft 75 1.123 dholland #include <ufs/lfs/ulfs_inode.h> 76 1.123 dholland #include <ufs/lfs/ulfsmount.h> 77 1.123 dholland #include <ufs/lfs/ulfs_extern.h> 78 1.1 mycroft 79 1.1 mycroft #include <ufs/lfs/lfs.h> 80 1.132 dholland #include <ufs/lfs/lfs_accessors.h> 81 1.1 mycroft #include <ufs/lfs/lfs_extern.h> 82 1.126 dholland #include <ufs/lfs/lfs_kernel.h> 83 1.1 mycroft 84 1.149 riastrad #include <uvm/uvm_extern.h> 85 1.58 perseant 86 1.1 mycroft /* 87 1.1 mycroft * LFS block write function. 88 1.1 mycroft * 89 1.1 mycroft * XXX 90 1.1 mycroft * No write cost accounting is done. 91 1.1 mycroft * This is almost certainly wrong for synchronous operations and NFS. 92 1.71 yamt * 93 1.107 ad * protected by lfs_lock. 94 1.1 mycroft */ 95 1.58 perseant int locked_queue_count = 0; /* Count of locked-down buffers. */ 96 1.58 perseant long locked_queue_bytes = 0L; /* Total size of locked buffers. */ 97 1.61 perseant int lfs_subsys_pages = 0L; /* Total number LFS-written pages */ 98 1.78 perseant int lfs_fs_pagetrip = 0; /* # of pages to trip per-fs write */ 99 1.61 perseant int lfs_writing = 0; /* Set if already kicked off a writer 100 1.1 mycroft because of buffer space */ 101 1.121 perseant int locked_queue_waiters = 0; /* Number of processes waiting on lq */ 102 1.107 ad 103 1.107 ad /* Lock and condition variables for above. */ 104 1.107 ad kcondvar_t locked_queue_cv; 105 1.107 ad kcondvar_t lfs_writing_cv; 106 1.107 ad kmutex_t lfs_lock; 107 1.150 perseant extern kcondvar_t lfs_writerd_cv; 108 1.64 perseant 109 1.7 perseant extern int lfs_dostats; 110 1.7 perseant 111 1.27 perseant /* 112 1.51 yamt * reserved number/bytes of locked buffers 113 1.51 yamt */ 114 1.51 yamt int locked_queue_rcount = 0; 115 1.51 yamt long locked_queue_rbytes = 0L; 116 1.51 yamt 117 1.117 mlelstv static int lfs_fits_buf(struct lfs *, int, int); 118 1.117 mlelstv static int lfs_reservebuf(struct lfs *, struct vnode *vp, struct vnode *vp2, 119 1.52 yamt int, int); 120 1.117 mlelstv static int lfs_reserveavail(struct lfs *, struct vnode *vp, struct vnode *vp2, 121 1.117 mlelstv int); 122 1.51 yamt 123 1.117 mlelstv static int 124 1.51 yamt lfs_fits_buf(struct lfs *fs, int n, int bytes) 125 1.51 yamt { 126 1.71 yamt int count_fit, bytes_fit; 127 1.71 yamt 128 1.82 perseant ASSERT_NO_SEGLOCK(fs); 129 1.107 ad KASSERT(mutex_owned(&lfs_lock)); 130 1.71 yamt 131 1.71 yamt count_fit = 132 1.117 mlelstv (locked_queue_count + locked_queue_rcount + n <= LFS_WAIT_BUFS); 133 1.71 yamt bytes_fit = 134 1.117 mlelstv (locked_queue_bytes + locked_queue_rbytes + bytes <= LFS_WAIT_BYTES); 135 1.51 yamt 136 1.80 perseant #ifdef DEBUG 137 1.51 yamt if (!count_fit) { 138 1.80 perseant DLOG((DLOG_AVAIL, "lfs_fits_buf: no fit count: %d + %d + %d >= %d\n", 139 1.80 perseant locked_queue_count, locked_queue_rcount, 140 1.80 perseant n, LFS_WAIT_BUFS)); 141 1.51 yamt } 142 1.51 yamt if (!bytes_fit) { 143 1.80 perseant DLOG((DLOG_AVAIL, "lfs_fits_buf: no fit bytes: %ld + %ld + %d >= %ld\n", 144 1.80 perseant locked_queue_bytes, locked_queue_rbytes, 145 1.80 perseant bytes, LFS_WAIT_BYTES)); 146 1.51 yamt } 147 1.80 perseant #endif /* DEBUG */ 148 1.51 yamt 149 1.51 yamt return (count_fit && bytes_fit); 150 1.51 yamt } 151 1.51 yamt 152 1.52 yamt /* ARGSUSED */ 153 1.117 mlelstv static int 154 1.98 christos lfs_reservebuf(struct lfs *fs, struct vnode *vp, 155 1.98 christos struct vnode *vp2, int n, int bytes) 156 1.51 yamt { 157 1.122 perseant int cantwait; 158 1.122 perseant 159 1.82 perseant ASSERT_MAYBE_SEGLOCK(fs); 160 1.51 yamt KASSERT(locked_queue_rcount >= 0); 161 1.51 yamt KASSERT(locked_queue_rbytes >= 0); 162 1.51 yamt 163 1.141 maya cantwait = (VTOI(vp)->i_state & IN_ADIROP) || fs->lfs_unlockvp == vp; 164 1.107 ad mutex_enter(&lfs_lock); 165 1.122 perseant while (!cantwait && n > 0 && !lfs_fits_buf(fs, n, bytes)) { 166 1.51 yamt int error; 167 1.51 yamt 168 1.150 perseant DLOG((DLOG_FLUSH, "lfs_reservebuf: flush filesystem %p with checkpoint\n", fs)); 169 1.150 perseant lfs_flush(fs, SEGM_CKP, 0); 170 1.51 yamt 171 1.121 perseant DLOG((DLOG_AVAIL, "lfs_reservebuf: waiting: count=%d, bytes=%ld\n", 172 1.121 perseant locked_queue_count, locked_queue_bytes)); 173 1.121 perseant ++locked_queue_waiters; 174 1.150 perseant cv_broadcast(&lfs_writerd_cv); 175 1.107 ad error = cv_timedwait_sig(&locked_queue_cv, &lfs_lock, 176 1.107 ad hz * LFS_BUFWAIT); 177 1.121 perseant --locked_queue_waiters; 178 1.71 yamt if (error && error != EWOULDBLOCK) { 179 1.107 ad mutex_exit(&lfs_lock); 180 1.51 yamt return error; 181 1.71 yamt } 182 1.51 yamt } 183 1.51 yamt 184 1.51 yamt locked_queue_rcount += n; 185 1.51 yamt locked_queue_rbytes += bytes; 186 1.51 yamt 187 1.121 perseant if (n < 0 && locked_queue_waiters > 0) { 188 1.121 perseant DLOG((DLOG_AVAIL, "lfs_reservebuf: broadcast: count=%d, bytes=%ld\n", 189 1.121 perseant locked_queue_count, locked_queue_bytes)); 190 1.117 mlelstv cv_broadcast(&locked_queue_cv); 191 1.121 perseant } 192 1.117 mlelstv 193 1.107 ad mutex_exit(&lfs_lock); 194 1.71 yamt 195 1.51 yamt KASSERT(locked_queue_rcount >= 0); 196 1.51 yamt KASSERT(locked_queue_rbytes >= 0); 197 1.51 yamt 198 1.51 yamt return 0; 199 1.51 yamt } 200 1.51 yamt 201 1.51 yamt /* 202 1.27 perseant * Try to reserve some blocks, prior to performing a sensitive operation that 203 1.135 hannken * requires the vnode lock to be honored. If there is not enough space, wait 204 1.135 hannken * for the space to become available. 205 1.27 perseant * 206 1.44 perseant * Called with vp locked. (Note nowever that if fsb < 0, vp is ignored.) 207 1.27 perseant */ 208 1.117 mlelstv static int 209 1.98 christos lfs_reserveavail(struct lfs *fs, struct vnode *vp, 210 1.98 christos struct vnode *vp2, int fsb) 211 1.27 perseant { 212 1.27 perseant CLEANERINFO *cip; 213 1.27 perseant struct buf *bp; 214 1.27 perseant int error, slept; 215 1.122 perseant int cantwait; 216 1.27 perseant 217 1.82 perseant ASSERT_MAYBE_SEGLOCK(fs); 218 1.27 perseant slept = 0; 219 1.107 ad mutex_enter(&lfs_lock); 220 1.141 maya cantwait = (VTOI(vp)->i_state & IN_ADIROP) || fs->lfs_unlockvp == vp; 221 1.122 perseant while (!cantwait && fsb > 0 && 222 1.122 perseant !lfs_fits(fs, fsb + fs->lfs_ravail + fs->lfs_favail)) { 223 1.107 ad mutex_exit(&lfs_lock); 224 1.27 perseant 225 1.30 perseant if (!slept) { 226 1.133 dholland DLOG((DLOG_AVAIL, "lfs_reserve: waiting for %ld (bfree = %jd," 227 1.133 dholland " est_bfree = %jd)\n", 228 1.80 perseant fsb + fs->lfs_ravail + fs->lfs_favail, 229 1.133 dholland (intmax_t)lfs_sb_getbfree(fs), 230 1.133 dholland (intmax_t)LFS_EST_BFREE(fs))); 231 1.80 perseant } 232 1.27 perseant ++slept; 233 1.27 perseant 234 1.27 perseant /* Wake up the cleaner */ 235 1.27 perseant LFS_CLEANERINFO(cip, fs, bp); 236 1.31 perseant LFS_SYNC_CLEANERINFO(cip, fs, bp, 0); 237 1.94 perseant lfs_wakeup_cleaner(fs); 238 1.79 perry 239 1.107 ad mutex_enter(&lfs_lock); 240 1.82 perseant /* Cleaner might have run while we were reading, check again */ 241 1.82 perseant if (lfs_fits(fs, fsb + fs->lfs_ravail + fs->lfs_favail)) 242 1.82 perseant break; 243 1.82 perseant 244 1.129 dholland error = mtsleep(&fs->lfs_availsleep, PCATCH | PUSER, 245 1.129 dholland "lfs_reserve", 0, &lfs_lock); 246 1.91 perseant if (error) { 247 1.107 ad mutex_exit(&lfs_lock); 248 1.27 perseant return error; 249 1.91 perseant } 250 1.27 perseant } 251 1.80 perseant #ifdef DEBUG 252 1.80 perseant if (slept) { 253 1.80 perseant DLOG((DLOG_AVAIL, "lfs_reserve: woke up\n")); 254 1.80 perseant } 255 1.44 perseant #endif 256 1.44 perseant fs->lfs_ravail += fsb; 257 1.107 ad mutex_exit(&lfs_lock); 258 1.51 yamt 259 1.27 perseant return 0; 260 1.27 perseant } 261 1.27 perseant 262 1.51 yamt #ifdef DIAGNOSTIC 263 1.51 yamt int lfs_rescount; 264 1.51 yamt int lfs_rescountdirop; 265 1.51 yamt #endif 266 1.51 yamt 267 1.51 yamt int 268 1.52 yamt lfs_reserve(struct lfs *fs, struct vnode *vp, struct vnode *vp2, int fsb) 269 1.51 yamt { 270 1.51 yamt int error; 271 1.51 yamt 272 1.82 perseant ASSERT_MAYBE_SEGLOCK(fs); 273 1.82 perseant if (vp2) { 274 1.82 perseant /* Make sure we're not in the process of reclaiming vp2 */ 275 1.107 ad mutex_enter(&lfs_lock); 276 1.82 perseant while(fs->lfs_flags & LFS_UNDIROP) { 277 1.107 ad mtsleep(&fs->lfs_flags, PRIBIO + 1, "lfsrundirop", 0, 278 1.107 ad &lfs_lock); 279 1.82 perseant } 280 1.107 ad mutex_exit(&lfs_lock); 281 1.82 perseant } 282 1.82 perseant 283 1.52 yamt KASSERT(fsb < 0 || VOP_ISLOCKED(vp)); 284 1.52 yamt KASSERT(vp2 == NULL || fsb < 0 || VOP_ISLOCKED(vp2)); 285 1.55 yamt KASSERT(vp2 == NULL || vp2 != fs->lfs_unlockvp); 286 1.52 yamt 287 1.51 yamt #ifdef DIAGNOSTIC 288 1.122 perseant mutex_enter(&lfs_lock); 289 1.122 perseant if (fsb > 0) 290 1.122 perseant lfs_rescount++; 291 1.122 perseant else if (fsb < 0) 292 1.122 perseant lfs_rescount--; 293 1.122 perseant if (lfs_rescount < 0) 294 1.122 perseant panic("lfs_rescount"); 295 1.122 perseant mutex_exit(&lfs_lock); 296 1.51 yamt #endif 297 1.53 yamt 298 1.52 yamt error = lfs_reserveavail(fs, vp, vp2, fsb); 299 1.51 yamt if (error) 300 1.135 hannken return error; 301 1.51 yamt 302 1.51 yamt /* 303 1.51 yamt * XXX just a guess. should be more precise. 304 1.51 yamt */ 305 1.125 christos error = lfs_reservebuf(fs, vp, vp2, fsb, lfs_fsbtob(fs, fsb)); 306 1.51 yamt if (error) 307 1.52 yamt lfs_reserveavail(fs, vp, vp2, -fsb); 308 1.52 yamt 309 1.51 yamt return error; 310 1.51 yamt } 311 1.1 mycroft 312 1.1 mycroft int 313 1.140 chs lfs_max_bufs(void) 314 1.140 chs { 315 1.140 chs 316 1.140 chs return LFS_MAX_RESOURCE(buf_nbuf(), 1); 317 1.140 chs } 318 1.140 chs 319 1.140 chs int 320 1.140 chs lfs_wait_bufs(void) 321 1.140 chs { 322 1.140 chs 323 1.140 chs return LFS_WAIT_RESOURCE(buf_nbuf(), 1); 324 1.140 chs } 325 1.140 chs 326 1.140 chs int 327 1.36 perseant lfs_bwrite(void *v) 328 1.5 christos { 329 1.1 mycroft struct vop_bwrite_args /* { 330 1.120 hannken struct vnode *a_vp; 331 1.1 mycroft struct buf *a_bp; 332 1.5 christos } */ *ap = v; 333 1.17 augustss struct buf *bp = ap->a_bp; 334 1.7 perseant 335 1.136 riastrad KASSERTMSG((VTOI(bp->b_vp)->i_lfs->lfs_ronly || 336 1.136 riastrad !(bp->b_flags & B_ASYNC)), 337 1.136 riastrad "bawrite LFS buffer"); 338 1.85 perseant return lfs_bwrite_ext(bp, 0); 339 1.7 perseant } 340 1.7 perseant 341 1.79 perry /* 342 1.44 perseant * Determine if there is enough room currently available to write fsb 343 1.44 perseant * blocks. We need enough blocks for the new blocks, the current 344 1.44 perseant * inode blocks (including potentially the ifile inode), a summary block, 345 1.44 perseant * and the segment usage table, plus an ifile block. 346 1.7 perseant */ 347 1.32 perseant int 348 1.36 perseant lfs_fits(struct lfs *fs, int fsb) 349 1.7 perseant { 350 1.129 dholland int64_t needed; 351 1.26 perseant 352 1.82 perseant ASSERT_NO_SEGLOCK(fs); 353 1.129 dholland needed = fsb + lfs_btofsb(fs, lfs_sb_getsumsize(fs)) + 354 1.129 dholland ((howmany(lfs_sb_getuinodes(fs) + 1, LFS_INOPB(fs)) + 355 1.129 dholland lfs_sb_getsegtabsz(fs) + 356 1.130 dholland 1) << (lfs_sb_getbshift(fs) - lfs_sb_getffshift(fs))); 357 1.27 perseant 358 1.129 dholland if (needed >= lfs_sb_getavail(fs)) { 359 1.80 perseant #ifdef DEBUG 360 1.80 perseant DLOG((DLOG_AVAIL, "lfs_fits: no fit: fsb = %ld, uinodes = %ld, " 361 1.129 dholland "needed = %jd, avail = %jd\n", 362 1.131 martin (long)fsb, (long)lfs_sb_getuinodes(fs), (intmax_t)needed, 363 1.129 dholland (intmax_t)lfs_sb_getavail(fs))); 364 1.26 perseant #endif 365 1.7 perseant return 0; 366 1.7 perseant } 367 1.7 perseant return 1; 368 1.7 perseant } 369 1.7 perseant 370 1.7 perseant int 371 1.44 perseant lfs_availwait(struct lfs *fs, int fsb) 372 1.32 perseant { 373 1.32 perseant int error; 374 1.32 perseant CLEANERINFO *cip; 375 1.32 perseant struct buf *cbp; 376 1.63 perseant 377 1.82 perseant ASSERT_NO_SEGLOCK(fs); 378 1.63 perseant /* Push cleaner blocks through regardless */ 379 1.107 ad mutex_enter(&lfs_lock); 380 1.82 perseant if (LFS_SEGLOCK_HELD(fs) && 381 1.64 perseant fs->lfs_sp->seg_flags & (SEGM_CLEAN | SEGM_FORCE_CKP)) { 382 1.107 ad mutex_exit(&lfs_lock); 383 1.63 perseant return 0; 384 1.64 perseant } 385 1.107 ad mutex_exit(&lfs_lock); 386 1.32 perseant 387 1.44 perseant while (!lfs_fits(fs, fsb)) { 388 1.32 perseant /* 389 1.32 perseant * Out of space, need cleaner to run. 390 1.32 perseant * Update the cleaner info, then wake it up. 391 1.32 perseant * Note the cleanerinfo block is on the ifile 392 1.32 perseant * so it CANT_WAIT. 393 1.32 perseant */ 394 1.32 perseant LFS_CLEANERINFO(cip, fs, cbp); 395 1.32 perseant LFS_SYNC_CLEANERINFO(cip, fs, cbp, 0); 396 1.79 perry 397 1.80 perseant #ifdef DEBUG 398 1.80 perseant DLOG((DLOG_AVAIL, "lfs_availwait: out of available space, " 399 1.80 perseant "waiting on cleaner\n")); 400 1.78 perseant #endif 401 1.79 perry 402 1.94 perseant lfs_wakeup_cleaner(fs); 403 1.136 riastrad KASSERTMSG(!LFS_SEGLOCK_HELD(fs), "lfs_availwait: deadlock"); 404 1.129 dholland error = tsleep(&fs->lfs_availsleep, PCATCH | PUSER, 405 1.129 dholland "cleaner", 0); 406 1.32 perseant if (error) 407 1.32 perseant return (error); 408 1.32 perseant } 409 1.32 perseant return 0; 410 1.32 perseant } 411 1.32 perseant 412 1.32 perseant int 413 1.36 perseant lfs_bwrite_ext(struct buf *bp, int flags) 414 1.7 perseant { 415 1.1 mycroft struct lfs *fs; 416 1.1 mycroft struct inode *ip; 417 1.107 ad struct vnode *vp; 418 1.107 ad int fsb; 419 1.48 yamt 420 1.107 ad vp = bp->b_vp; 421 1.124 dholland fs = VFSTOULFS(vp->v_mount)->um_lfs; 422 1.85 perseant 423 1.85 perseant ASSERT_MAYBE_SEGLOCK(fs); 424 1.107 ad KASSERT(bp->b_cflags & BC_BUSY); 425 1.58 perseant KASSERT(flags & BW_CLEAN || !LFS_IS_MALLOC_BUF(bp)); 426 1.142 zafer KASSERT((bp->b_flags & B_LOCKED) || !(bp->b_oflags & BO_DELWRI)); 427 1.48 yamt 428 1.1 mycroft /* 429 1.85 perseant * Don't write *any* blocks if we're mounted read-only, or 430 1.85 perseant * if we are "already unmounted". 431 1.85 perseant * 432 1.16 perseant * In particular the cleaner can't write blocks either. 433 1.16 perseant */ 434 1.130 dholland if (fs->lfs_ronly || (lfs_sb_getpflags(fs) & LFS_PF_CLEAN)) { 435 1.107 ad bp->b_oflags &= ~BO_DELWRI; 436 1.121 perseant bp->b_flags |= B_READ; /* XXX is this right? --ks */ 437 1.103 ad bp->b_error = 0; 438 1.107 ad mutex_enter(&bufcache_lock); 439 1.32 perseant LFS_UNLOCK_BUF(bp); 440 1.58 perseant if (LFS_IS_MALLOC_BUF(bp)) 441 1.107 ad bp->b_cflags &= ~BC_BUSY; 442 1.16 perseant else 443 1.107 ad brelsel(bp, 0); 444 1.107 ad mutex_exit(&bufcache_lock); 445 1.85 perseant return (fs->lfs_ronly ? EROFS : 0); 446 1.16 perseant } 447 1.16 perseant 448 1.16 perseant /* 449 1.1 mycroft * Set the delayed write flag and use reassignbuf to move the buffer 450 1.1 mycroft * from the clean list to the dirty one. 451 1.1 mycroft * 452 1.109 ad * Set the B_LOCKED flag and unlock the buffer, causing brelse to move 453 1.1 mycroft * the buffer onto the LOCKED free list. This is necessary, otherwise 454 1.1 mycroft * getnewbuf() would try to reclaim the buffers using bawrite, which 455 1.1 mycroft * isn't going to work. 456 1.1 mycroft * 457 1.1 mycroft * XXX we don't let meta-data writes run out of space because they can 458 1.1 mycroft * come from the segment writer. We need to make sure that there is 459 1.1 mycroft * enough space reserved so that there's room to write meta-data 460 1.1 mycroft * blocks. 461 1.1 mycroft */ 462 1.109 ad if ((bp->b_flags & B_LOCKED) == 0) { 463 1.125 christos fsb = lfs_numfrags(fs, bp->b_bcount); 464 1.79 perry 465 1.107 ad ip = VTOI(vp); 466 1.107 ad mutex_enter(&lfs_lock); 467 1.48 yamt if (flags & BW_CLEAN) { 468 1.26 perseant LFS_SET_UINO(ip, IN_CLEANING); 469 1.7 perseant } else { 470 1.36 perseant LFS_SET_UINO(ip, IN_MODIFIED); 471 1.7 perseant } 472 1.107 ad mutex_exit(&lfs_lock); 473 1.129 dholland lfs_sb_subavail(fs, fsb); 474 1.32 perseant 475 1.107 ad mutex_enter(&bufcache_lock); 476 1.119 rmind mutex_enter(vp->v_interlock); 477 1.107 ad bp->b_oflags = (bp->b_oflags | BO_DELWRI) & ~BO_DONE; 478 1.32 perseant LFS_LOCK_BUF(bp); 479 1.107 ad bp->b_flags &= ~B_READ; 480 1.103 ad bp->b_error = 0; 481 1.1 mycroft reassignbuf(bp, bp->b_vp); 482 1.119 rmind mutex_exit(vp->v_interlock); 483 1.107 ad } else { 484 1.107 ad mutex_enter(&bufcache_lock); 485 1.1 mycroft } 486 1.79 perry 487 1.107 ad if (bp->b_iodone != NULL) 488 1.107 ad bp->b_cflags &= ~BC_BUSY; 489 1.7 perseant else 490 1.107 ad brelsel(bp, 0); 491 1.107 ad mutex_exit(&bufcache_lock); 492 1.79 perry 493 1.1 mycroft return (0); 494 1.1 mycroft } 495 1.1 mycroft 496 1.82 perseant /* 497 1.107 ad * Called and return with the lfs_lock held. 498 1.82 perseant */ 499 1.24 perseant void 500 1.36 perseant lfs_flush_fs(struct lfs *fs, int flags) 501 1.11 perseant { 502 1.82 perseant ASSERT_NO_SEGLOCK(fs); 503 1.107 ad KASSERT(mutex_owned(&lfs_lock)); 504 1.58 perseant if (fs->lfs_ronly) 505 1.58 perseant return; 506 1.11 perseant 507 1.82 perseant if (lfs_dostats) 508 1.82 perseant ++lfs_stats.flush_invoked; 509 1.78 perseant 510 1.121 perseant fs->lfs_pdflush = 0; 511 1.107 ad mutex_exit(&lfs_lock); 512 1.67 yamt lfs_writer_enter(fs, "fldirop"); 513 1.58 perseant lfs_segwrite(fs->lfs_ivnode->v_mount, flags); 514 1.82 perseant lfs_writer_leave(fs); 515 1.107 ad mutex_enter(&lfs_lock); 516 1.78 perseant fs->lfs_favail = 0; /* XXX */ 517 1.11 perseant } 518 1.11 perseant 519 1.1 mycroft /* 520 1.81 perseant * This routine initiates segment writes when LFS is consuming too many 521 1.81 perseant * resources. Ideally the pageout daemon would be able to direct LFS 522 1.81 perseant * more subtly. 523 1.81 perseant * XXX We have one static count of locked buffers; 524 1.81 perseant * XXX need to think more about the multiple filesystem case. 525 1.71 yamt * 526 1.107 ad * Called and return with lfs_lock held. 527 1.83 perseant * If fs != NULL, we hold the segment lock for fs. 528 1.1 mycroft */ 529 1.1 mycroft void 530 1.78 perseant lfs_flush(struct lfs *fs, int flags, int only_onefs) 531 1.1 mycroft { 532 1.78 perseant extern u_int64_t locked_fakequeue_count; 533 1.138 hannken mount_iterator_t *iter; 534 1.138 hannken struct mount *mp; 535 1.83 perseant struct lfs *tfs; 536 1.67 yamt 537 1.107 ad KASSERT(mutex_owned(&lfs_lock)); 538 1.67 yamt KDASSERT(fs == NULL || !LFS_SEGLOCK_HELD(fs)); 539 1.150 perseant KASSERT(!(fs == NULL && only_onefs)); 540 1.79 perry 541 1.79 perry if (lfs_dostats) 542 1.7 perseant ++lfs_stats.write_exceeded; 543 1.150 perseant if (lfs_writing && !(flags & (SEGM_SYNC|SEGM_CKP))) { 544 1.80 perseant DLOG((DLOG_FLUSH, "lfs_flush: not flushing because another flush is active\n")); 545 1.1 mycroft return; 546 1.16 perseant } 547 1.81 perseant while (lfs_writing) 548 1.107 ad cv_wait(&lfs_writing_cv, &lfs_lock); 549 1.1 mycroft lfs_writing = 1; 550 1.79 perry 551 1.107 ad mutex_exit(&lfs_lock); 552 1.58 perseant 553 1.150 perseant if (fs != NULL) { 554 1.150 perseant if (!(fs->lfs_flags & LFS_NOTYET) 555 1.150 perseant && vfs_busy(fs->lfs_ivnode->v_mount)) 556 1.78 perseant goto errout; 557 1.107 ad mutex_enter(&lfs_lock); 558 1.78 perseant lfs_flush_fs(fs, flags); 559 1.107 ad mutex_exit(&lfs_lock); 560 1.150 perseant if (!(fs->lfs_flags & LFS_NOTYET)) 561 1.150 perseant vfs_unbusy(fs->lfs_ivnode->v_mount); 562 1.150 perseant } 563 1.150 perseant if (!only_onefs) { 564 1.78 perseant locked_fakequeue_count = 0; 565 1.138 hannken mountlist_iterator_init(&iter); 566 1.138 hannken while ((mp = mountlist_iterator_next(iter)) != NULL) { 567 1.107 ad if (strncmp(&mp->mnt_stat.f_fstypename[0], MOUNT_LFS, 568 1.102 christos sizeof(mp->mnt_stat.f_fstypename)) == 0) { 569 1.124 dholland tfs = VFSTOULFS(mp)->um_lfs; 570 1.150 perseant if (tfs == fs) 571 1.150 perseant continue; 572 1.107 ad mutex_enter(&lfs_lock); 573 1.83 perseant lfs_flush_fs(tfs, flags); 574 1.107 ad mutex_exit(&lfs_lock); 575 1.83 perseant } 576 1.6 fvdl } 577 1.138 hannken mountlist_iterator_destroy(iter); 578 1.1 mycroft } 579 1.78 perseant wakeup(&lfs_subsys_pages); 580 1.7 perseant 581 1.78 perseant errout: 582 1.107 ad mutex_enter(&lfs_lock); 583 1.71 yamt KASSERT(lfs_writing); 584 1.1 mycroft lfs_writing = 0; 585 1.60 yamt wakeup(&lfs_writing); 586 1.1 mycroft } 587 1.1 mycroft 588 1.129 dholland #define INOCOUNT(fs) howmany(lfs_sb_getuinodes(fs), LFS_INOPB(fs)) 589 1.134 dholland #define INOBYTES(fs) (lfs_sb_getuinodes(fs) * DINOSIZE(fs)) 590 1.41 perseant 591 1.70 yamt /* 592 1.70 yamt * make sure that we don't have too many locked buffers. 593 1.70 yamt * flush buffers if needed. 594 1.70 yamt */ 595 1.1 mycroft int 596 1.98 christos lfs_check(struct vnode *vp, daddr_t blkno, int flags) 597 1.1 mycroft { 598 1.1 mycroft int error; 599 1.10 perseant struct lfs *fs; 600 1.20 perseant struct inode *ip; 601 1.137 maya extern kcondvar_t lfs_writerd_cv; 602 1.10 perseant 603 1.1 mycroft error = 0; 604 1.20 perseant ip = VTOI(vp); 605 1.79 perry 606 1.7 perseant /* If out of buffers, wait on writer */ 607 1.7 perseant /* XXX KS - if it's the Ifile, we're probably the cleaner! */ 608 1.20 perseant if (ip->i_number == LFS_IFILE_INUM) 609 1.20 perseant return 0; 610 1.20 perseant /* If we're being called from inside a dirop, don't sleep */ 611 1.141 maya if (ip->i_state & IN_ADIROP) 612 1.7 perseant return 0; 613 1.7 perseant 614 1.20 perseant fs = ip->i_lfs; 615 1.20 perseant 616 1.82 perseant ASSERT_NO_SEGLOCK(fs); 617 1.82 perseant 618 1.20 perseant /* 619 1.20 perseant * If we would flush below, but dirops are active, sleep. 620 1.20 perseant * Note that a dirop cannot ever reach this code! 621 1.20 perseant */ 622 1.107 ad mutex_enter(&lfs_lock); 623 1.20 perseant while (fs->lfs_dirops > 0 && 624 1.150 perseant (locked_queue_count + INOCOUNT(fs) > LFS_WAIT_BUFS || 625 1.150 perseant locked_queue_bytes + INOBYTES(fs) > LFS_WAIT_BYTES || 626 1.150 perseant lfs_subsys_pages > LFS_WAIT_PAGES || 627 1.92 perseant fs->lfs_dirvcount > LFS_MAX_FSDIROP(fs) || 628 1.61 perseant lfs_dirvcount > LFS_MAX_DIROP || fs->lfs_diropwait > 0)) 629 1.20 perseant { 630 1.20 perseant ++fs->lfs_diropwait; 631 1.107 ad mtsleep(&fs->lfs_writer, PRIBIO+1, "bufdirop", 0, 632 1.107 ad &lfs_lock); 633 1.20 perseant --fs->lfs_diropwait; 634 1.20 perseant } 635 1.10 perseant 636 1.80 perseant #ifdef DEBUG 637 1.58 perseant if (locked_queue_count + INOCOUNT(fs) > LFS_MAX_BUFS) 638 1.80 perseant DLOG((DLOG_FLUSH, "lfs_check: lqc = %d, max %d\n", 639 1.80 perseant locked_queue_count + INOCOUNT(fs), LFS_MAX_BUFS)); 640 1.58 perseant if (locked_queue_bytes + INOBYTES(fs) > LFS_MAX_BYTES) 641 1.80 perseant DLOG((DLOG_FLUSH, "lfs_check: lqb = %ld, max %ld\n", 642 1.80 perseant locked_queue_bytes + INOBYTES(fs), LFS_MAX_BYTES)); 643 1.58 perseant if (lfs_subsys_pages > LFS_MAX_PAGES) 644 1.82 perseant DLOG((DLOG_FLUSH, "lfs_check: lssp = %d, max %d\n", 645 1.82 perseant lfs_subsys_pages, LFS_MAX_PAGES)); 646 1.78 perseant if (lfs_fs_pagetrip && fs->lfs_pages > lfs_fs_pagetrip) 647 1.82 perseant DLOG((DLOG_FLUSH, "lfs_check: fssp = %d, trip at %d\n", 648 1.82 perseant fs->lfs_pages, lfs_fs_pagetrip)); 649 1.58 perseant if (lfs_dirvcount > LFS_MAX_DIROP) 650 1.82 perseant DLOG((DLOG_FLUSH, "lfs_check: ldvc = %d, max %d\n", 651 1.82 perseant lfs_dirvcount, LFS_MAX_DIROP)); 652 1.92 perseant if (fs->lfs_dirvcount > LFS_MAX_FSDIROP(fs)) 653 1.92 perseant DLOG((DLOG_FLUSH, "lfs_check: lfdvc = %d, max %d\n", 654 1.92 perseant fs->lfs_dirvcount, LFS_MAX_FSDIROP(fs))); 655 1.58 perseant if (fs->lfs_diropwait > 0) 656 1.82 perseant DLOG((DLOG_FLUSH, "lfs_check: ldvw = %d\n", 657 1.82 perseant fs->lfs_diropwait)); 658 1.58 perseant #endif 659 1.64 perseant 660 1.99 perseant /* If there are too many pending dirops, we have to flush them. */ 661 1.99 perseant if (fs->lfs_dirvcount > LFS_MAX_FSDIROP(fs) || 662 1.99 perseant lfs_dirvcount > LFS_MAX_DIROP || fs->lfs_diropwait > 0) { 663 1.146 riastrad KASSERT(fs->lfs_dirops == 0); 664 1.146 riastrad fs->lfs_writer++; 665 1.121 perseant mutex_exit(&lfs_lock); 666 1.121 perseant lfs_flush_dirops(fs); 667 1.121 perseant mutex_enter(&lfs_lock); 668 1.146 riastrad if (--fs->lfs_writer == 0) 669 1.146 riastrad cv_broadcast(&fs->lfs_diropscv); 670 1.146 riastrad KASSERT(fs->lfs_dirops == 0); 671 1.121 perseant } else if (locked_queue_count + INOCOUNT(fs) > LFS_MAX_BUFS || 672 1.41 perseant locked_queue_bytes + INOBYTES(fs) > LFS_MAX_BYTES || 673 1.58 perseant lfs_subsys_pages > LFS_MAX_PAGES || 674 1.92 perseant fs->lfs_dirvcount > LFS_MAX_FSDIROP(fs) || 675 1.71 yamt lfs_dirvcount > LFS_MAX_DIROP || fs->lfs_diropwait > 0) { 676 1.78 perseant lfs_flush(fs, flags, 0); 677 1.80 perseant } else if (lfs_fs_pagetrip && fs->lfs_pages > lfs_fs_pagetrip) { 678 1.80 perseant /* 679 1.80 perseant * If we didn't flush the whole thing, some filesystems 680 1.80 perseant * still might want to be flushed. 681 1.80 perseant */ 682 1.78 perseant ++fs->lfs_pdflush; 683 1.137 maya cv_broadcast(&lfs_writerd_cv); 684 1.107 ad } 685 1.11 perseant 686 1.117 mlelstv while (locked_queue_count + INOCOUNT(fs) >= LFS_WAIT_BUFS || 687 1.117 mlelstv locked_queue_bytes + INOBYTES(fs) >= LFS_WAIT_BYTES || 688 1.58 perseant lfs_subsys_pages > LFS_WAIT_PAGES || 689 1.92 perseant fs->lfs_dirvcount > LFS_MAX_FSDIROP(fs) || 690 1.71 yamt lfs_dirvcount > LFS_MAX_DIROP) { 691 1.82 perseant 692 1.40 chs if (lfs_dostats) 693 1.7 perseant ++lfs_stats.wait_exceeded; 694 1.80 perseant DLOG((DLOG_AVAIL, "lfs_check: waiting: count=%d, bytes=%ld\n", 695 1.80 perseant locked_queue_count, locked_queue_bytes)); 696 1.121 perseant ++locked_queue_waiters; 697 1.150 perseant cv_broadcast(&lfs_writerd_cv); 698 1.107 ad error = cv_timedwait_sig(&locked_queue_cv, &lfs_lock, 699 1.107 ad hz * LFS_BUFWAIT); 700 1.121 perseant --locked_queue_waiters; 701 1.82 perseant if (error != EWOULDBLOCK) 702 1.33 perseant break; 703 1.82 perseant 704 1.18 perseant /* 705 1.18 perseant * lfs_flush might not flush all the buffers, if some of the 706 1.33 perseant * inodes were locked or if most of them were Ifile blocks 707 1.61 perseant * and we weren't asked to checkpoint. Try flushing again 708 1.33 perseant * to keep us from blocking indefinitely. 709 1.18 perseant */ 710 1.117 mlelstv if (locked_queue_count + INOCOUNT(fs) >= LFS_MAX_BUFS || 711 1.117 mlelstv locked_queue_bytes + INOBYTES(fs) >= LFS_MAX_BYTES) { 712 1.78 perseant lfs_flush(fs, flags | SEGM_CKP, 0); 713 1.18 perseant } 714 1.7 perseant } 715 1.107 ad mutex_exit(&lfs_lock); 716 1.7 perseant return (error); 717 1.7 perseant } 718 1.1 mycroft 719 1.7 perseant /* 720 1.7 perseant * Allocate a new buffer header. 721 1.7 perseant */ 722 1.34 perseant struct buf * 723 1.58 perseant lfs_newbuf(struct lfs *fs, struct vnode *vp, daddr_t daddr, size_t size, int type) 724 1.7 perseant { 725 1.7 perseant struct buf *bp; 726 1.7 perseant size_t nbytes; 727 1.79 perry 728 1.82 perseant ASSERT_MAYBE_SEGLOCK(fs); 729 1.125 christos nbytes = roundup(size, lfs_fsbtob(fs, 1)); 730 1.79 perry 731 1.107 ad bp = getiobuf(NULL, true); 732 1.43 perseant if (nbytes) { 733 1.58 perseant bp->b_data = lfs_malloc(fs, nbytes, type); 734 1.58 perseant /* memset(bp->b_data, 0, nbytes); */ 735 1.7 perseant } 736 1.136 riastrad KASSERT(vp != NULL); 737 1.136 riastrad KASSERT(bp != NULL); 738 1.57 pk 739 1.7 perseant bp->b_bufsize = size; 740 1.7 perseant bp->b_bcount = size; 741 1.7 perseant bp->b_lblkno = daddr; 742 1.7 perseant bp->b_blkno = daddr; 743 1.7 perseant bp->b_error = 0; 744 1.7 perseant bp->b_resid = 0; 745 1.145 chs bp->b_iodone = lfs_free_aiodone; 746 1.147 ad bp->b_cflags |= BC_BUSY | BC_NOCACHE; 747 1.76 yamt bp->b_private = fs; 748 1.79 perry 749 1.107 ad mutex_enter(&bufcache_lock); 750 1.119 rmind mutex_enter(vp->v_interlock); 751 1.107 ad bgetvp(vp, bp); 752 1.119 rmind mutex_exit(vp->v_interlock); 753 1.107 ad mutex_exit(&bufcache_lock); 754 1.107 ad 755 1.7 perseant return (bp); 756 1.7 perseant } 757 1.7 perseant 758 1.7 perseant void 759 1.58 perseant lfs_freebuf(struct lfs *fs, struct buf *bp) 760 1.7 perseant { 761 1.107 ad struct vnode *vp; 762 1.79 perry 763 1.107 ad if ((vp = bp->b_vp) != NULL) { 764 1.107 ad mutex_enter(&bufcache_lock); 765 1.119 rmind mutex_enter(vp->v_interlock); 766 1.7 perseant brelvp(bp); 767 1.119 rmind mutex_exit(vp->v_interlock); 768 1.107 ad mutex_exit(&bufcache_lock); 769 1.107 ad } 770 1.107 ad if (!(bp->b_cflags & BC_INVAL)) { /* BC_INVAL indicates a "fake" buffer */ 771 1.58 perseant lfs_free(fs, bp->b_data, LFS_NB_UNKNOWN); 772 1.7 perseant bp->b_data = NULL; 773 1.1 mycroft } 774 1.88 yamt putiobuf(bp); 775 1.7 perseant } 776 1.1 mycroft 777 1.95 yamt int 778 1.95 yamt lfs_wait_pages(void) 779 1.95 yamt { 780 1.95 yamt int active, inactive; 781 1.95 yamt 782 1.95 yamt uvm_estimatepageable(&active, &inactive); 783 1.148 ad return LFS_WAIT_RESOURCE(active + inactive + uvm_availmem(false), 1); 784 1.95 yamt } 785 1.95 yamt 786 1.95 yamt int 787 1.95 yamt lfs_max_pages(void) 788 1.95 yamt { 789 1.95 yamt int active, inactive; 790 1.95 yamt 791 1.95 yamt uvm_estimatepageable(&active, &inactive); 792 1.148 ad return LFS_MAX_RESOURCE(active + inactive + uvm_availmem(false), 1); 793 1.95 yamt } 794