1 1.96 riastrad /* $NetBSD: lfs_balloc.c,v 1.96 2020/09/05 16:30:13 riastradh Exp $ */ 2 1.2 cgd 3 1.11 perseant /*- 4 1.36 perseant * Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc. 5 1.11 perseant * All rights reserved. 6 1.11 perseant * 7 1.11 perseant * This code is derived from software contributed to The NetBSD Foundation 8 1.11 perseant * by Konrad E. Schroder <perseant (at) hhhh.org>. 9 1.11 perseant * 10 1.11 perseant * Redistribution and use in source and binary forms, with or without 11 1.11 perseant * modification, are permitted provided that the following conditions 12 1.11 perseant * are met: 13 1.11 perseant * 1. Redistributions of source code must retain the above copyright 14 1.11 perseant * notice, this list of conditions and the following disclaimer. 15 1.11 perseant * 2. Redistributions in binary form must reproduce the above copyright 16 1.11 perseant * notice, this list of conditions and the following disclaimer in the 17 1.11 perseant * documentation and/or other materials provided with the distribution. 18 1.11 perseant * 19 1.11 perseant * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 1.11 perseant * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 1.11 perseant * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 1.11 perseant * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 1.11 perseant * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 1.11 perseant * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 1.11 perseant * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 1.11 perseant * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 1.11 perseant * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 1.11 perseant * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 1.11 perseant * POSSIBILITY OF SUCH DAMAGE. 30 1.11 perseant */ 31 1.1 mycroft /* 32 1.1 mycroft * Copyright (c) 1989, 1991, 1993 33 1.1 mycroft * The Regents of the University of California. All rights reserved. 34 1.1 mycroft * 35 1.1 mycroft * Redistribution and use in source and binary forms, with or without 36 1.1 mycroft * modification, are permitted provided that the following conditions 37 1.1 mycroft * are met: 38 1.1 mycroft * 1. Redistributions of source code must retain the above copyright 39 1.1 mycroft * notice, this list of conditions and the following disclaimer. 40 1.1 mycroft * 2. Redistributions in binary form must reproduce the above copyright 41 1.1 mycroft * notice, this list of conditions and the following disclaimer in the 42 1.1 mycroft * documentation and/or other materials provided with the distribution. 43 1.43 agc * 3. Neither the name of the University nor the names of its contributors 44 1.1 mycroft * may be used to endorse or promote products derived from this software 45 1.1 mycroft * without specific prior written permission. 46 1.1 mycroft * 47 1.1 mycroft * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 48 1.1 mycroft * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 49 1.1 mycroft * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 50 1.1 mycroft * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 51 1.1 mycroft * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 52 1.1 mycroft * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 53 1.1 mycroft * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 54 1.1 mycroft * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 55 1.1 mycroft * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 56 1.1 mycroft * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 57 1.1 mycroft * SUCH DAMAGE. 58 1.1 mycroft * 59 1.5 fvdl * @(#)lfs_balloc.c 8.4 (Berkeley) 5/8/95 60 1.1 mycroft */ 61 1.30 lukem 62 1.30 lukem #include <sys/cdefs.h> 63 1.96 riastrad __KERNEL_RCSID(0, "$NetBSD: lfs_balloc.c,v 1.96 2020/09/05 16:30:13 riastradh Exp $"); 64 1.8 scottr 65 1.28 mrg #if defined(_KERNEL_OPT) 66 1.8 scottr #include "opt_quota.h" 67 1.9 scottr #endif 68 1.8 scottr 69 1.1 mycroft #include <sys/param.h> 70 1.3 christos #include <sys/systm.h> 71 1.1 mycroft #include <sys/buf.h> 72 1.1 mycroft #include <sys/proc.h> 73 1.1 mycroft #include <sys/vnode.h> 74 1.1 mycroft #include <sys/mount.h> 75 1.1 mycroft #include <sys/resourcevar.h> 76 1.55 perseant #include <sys/tree.h> 77 1.1 mycroft #include <sys/trace.h> 78 1.61 elad #include <sys/kauth.h> 79 1.1 mycroft 80 1.1 mycroft #include <miscfs/specfs/specdev.h> 81 1.1 mycroft 82 1.73 dholland #include <ufs/lfs/ulfs_quotacommon.h> 83 1.73 dholland #include <ufs/lfs/ulfs_inode.h> 84 1.73 dholland #include <ufs/lfs/ulfsmount.h> 85 1.73 dholland #include <ufs/lfs/ulfs_extern.h> 86 1.1 mycroft 87 1.1 mycroft #include <ufs/lfs/lfs.h> 88 1.84 dholland #include <ufs/lfs/lfs_accessors.h> 89 1.1 mycroft #include <ufs/lfs/lfs_extern.h> 90 1.78 dholland #include <ufs/lfs/lfs_kernel.h> 91 1.5 fvdl 92 1.96 riastrad #include <uvm/uvm_extern.h> 93 1.36 perseant 94 1.89 dholland static int lfs_fragextend(struct vnode *, int, int, daddr_t, struct buf **, 95 1.89 dholland kauth_cred_t); 96 1.5 fvdl 97 1.49 perseant u_int64_t locked_fakequeue_count; 98 1.49 perseant 99 1.16 perseant /* 100 1.90 dholland * Allocate a block, and do inode and filesystem block accounting for 101 1.90 dholland * it and for any indirect blocks that may need to be created in order 102 1.90 dholland * to handle this block. 103 1.90 dholland * 104 1.90 dholland * Blocks which have never been accounted for (i.e., which "do not 105 1.90 dholland * exist") have disk address 0, which is translated by ulfs_bmap to 106 1.90 dholland * the special value UNASSIGNED == -1, as in historical FFS-related 107 1.90 dholland * code. 108 1.90 dholland * 109 1.90 dholland * Blocks which have been accounted for but which have not yet been 110 1.90 dholland * written to disk are given the new special disk address UNWRITTEN == 111 1.90 dholland * -2, so that they can be differentiated from completely new blocks. 112 1.90 dholland * 113 1.90 dholland * Note: it seems that bpp is passed as NULL for blocks that are file 114 1.90 dholland * pages that will be handled by UVM and not the buffer cache. 115 1.90 dholland * 116 1.90 dholland * XXX: locking? 117 1.16 perseant */ 118 1.75 dholland /* VOP_BWRITE ULFS_NIADDR+2 times */ 119 1.1 mycroft int 120 1.61 elad lfs_balloc(struct vnode *vp, off_t startoffset, int iosize, kauth_cred_t cred, 121 1.57 yamt int flags, struct buf **bpp) 122 1.14 fvdl { 123 1.5 fvdl int offset; 124 1.46 mycroft daddr_t daddr, idaddr; 125 1.57 yamt struct buf *ibp, *bp; 126 1.1 mycroft struct inode *ip; 127 1.1 mycroft struct lfs *fs; 128 1.75 dholland struct indir indirs[ULFS_NIADDR+2], *idp; 129 1.35 fvdl daddr_t lbn, lastblock; 130 1.69 mlelstv int bcount; 131 1.11 perseant int error, frags, i, nsize, osize, num; 132 1.14 fvdl 133 1.1 mycroft ip = VTOI(vp); 134 1.1 mycroft fs = ip->i_lfs; 135 1.90 dholland 136 1.90 dholland /* Declare to humans that we might have the seglock here */ 137 1.90 dholland ASSERT_MAYBE_SEGLOCK(fs); 138 1.90 dholland 139 1.90 dholland 140 1.90 dholland /* offset within block */ 141 1.77 christos offset = lfs_blkoff(fs, startoffset); 142 1.90 dholland 143 1.90 dholland /* This is usually but not always exactly the block size */ 144 1.82 dholland KASSERT(iosize <= lfs_sb_getbsize(fs)); 145 1.90 dholland 146 1.90 dholland /* block number (within file) */ 147 1.77 christos lbn = lfs_lblkno(fs, startoffset); 148 1.90 dholland 149 1.90 dholland /* 150 1.90 dholland * This checks for whether pending stuff needs to be flushed 151 1.90 dholland * out and potentially waits. It's been disabled since UBC 152 1.90 dholland * support was added to LFS in 2003. -- dholland 20160806 153 1.90 dholland */ 154 1.36 perseant /* (void)lfs_check(vp, lbn, 0); */ 155 1.36 perseant 156 1.52 perseant 157 1.50 perry /* 158 1.1 mycroft * Three cases: it's a block beyond the end of file, it's a block in 159 1.1 mycroft * the file that may or may not have been assigned a disk address or 160 1.19 perseant * we're writing an entire block. 161 1.19 perseant * 162 1.19 perseant * Note, if the daddr is UNWRITTEN, the block already exists in 163 1.37 perseant * the cache (it was read or written earlier). If so, make sure 164 1.19 perseant * we don't count it as a new block or zero out its contents. If 165 1.19 perseant * it did not, make sure we allocate any necessary indirect 166 1.19 perseant * blocks. 167 1.19 perseant * 168 1.5 fvdl * If we are writing a block beyond the end of the file, we need to 169 1.11 perseant * check if the old last block was a fragment. If it was, we need 170 1.5 fvdl * to rewrite it. 171 1.1 mycroft */ 172 1.50 perry 173 1.36 perseant if (bpp) 174 1.36 perseant *bpp = NULL; 175 1.50 perry 176 1.90 dholland /* Last block number in file */ 177 1.77 christos lastblock = lfs_lblkno(fs, ip->i_size); 178 1.90 dholland 179 1.75 dholland if (lastblock < ULFS_NDADDR && lastblock < lbn) { 180 1.90 dholland /* 181 1.90 dholland * The file is small enough to have fragments, and we're 182 1.90 dholland * allocating past EOF. 183 1.90 dholland * 184 1.90 dholland * If the last block was a fragment we need to rewrite it 185 1.90 dholland * as a full block. 186 1.90 dholland */ 187 1.77 christos osize = lfs_blksize(fs, ip, lastblock); 188 1.82 dholland if (osize < lfs_sb_getbsize(fs) && osize > 0) { 189 1.82 dholland if ((error = lfs_fragextend(vp, osize, lfs_sb_getbsize(fs), 190 1.36 perseant lastblock, 191 1.57 yamt (bpp ? &bp : NULL), cred))) 192 1.31 chs return (error); 193 1.90 dholland /* Update the file size with what we just did (only) */ 194 1.87 dholland ip->i_size = (lastblock + 1) * lfs_sb_getbsize(fs); 195 1.87 dholland lfs_dino_setsize(fs, ip->i_din, ip->i_size); 196 1.40 fvdl uvm_vnp_setsize(vp, ip->i_size); 197 1.94 maya ip->i_state |= IN_CHANGE | IN_UPDATE; 198 1.90 dholland /* if we got a buffer for this, write it out now */ 199 1.36 perseant if (bpp) 200 1.70 hannken (void) VOP_BWRITE(bp->b_vp, bp); 201 1.5 fvdl } 202 1.5 fvdl } 203 1.5 fvdl 204 1.5 fvdl /* 205 1.5 fvdl * If the block we are writing is a direct block, it's the last 206 1.5 fvdl * block in the file, and offset + iosize is less than a full 207 1.5 fvdl * block, we can write one or more fragments. There are two cases: 208 1.5 fvdl * the block is brand new and we should allocate it the correct 209 1.5 fvdl * size or it already exists and contains some fragments and 210 1.5 fvdl * may need to extend it. 211 1.5 fvdl */ 212 1.77 christos if (lbn < ULFS_NDADDR && lfs_lblkno(fs, ip->i_size) <= lbn) { 213 1.77 christos osize = lfs_blksize(fs, ip, lbn); 214 1.77 christos nsize = lfs_fragroundup(fs, offset + iosize); 215 1.77 christos if (lfs_lblktosize(fs, lbn) >= ip->i_size) { 216 1.5 fvdl /* Brand new block or fragment */ 217 1.77 christos frags = lfs_numfrags(fs, nsize); 218 1.69 mlelstv if (!ISSPACE(fs, frags, cred)) 219 1.51 perseant return ENOSPC; 220 1.36 perseant if (bpp) { 221 1.57 yamt *bpp = bp = getblk(vp, lbn, nsize, 0, 0); 222 1.36 perseant bp->b_blkno = UNWRITTEN; 223 1.57 yamt if (flags & B_CLRBUF) 224 1.38 perseant clrbuf(bp); 225 1.36 perseant } 226 1.90 dholland 227 1.90 dholland /* 228 1.90 dholland * Update the effective block count (this count 229 1.90 dholland * includes blocks that don't have an on-disk 230 1.90 dholland * presence or location yet) 231 1.90 dholland */ 232 1.69 mlelstv ip->i_lfs_effnblks += frags; 233 1.90 dholland 234 1.90 dholland /* account for the space we're taking */ 235 1.64 ad mutex_enter(&lfs_lock); 236 1.85 dholland lfs_sb_subbfree(fs, frags); 237 1.64 ad mutex_exit(&lfs_lock); 238 1.90 dholland 239 1.90 dholland /* update the inode */ 240 1.87 dholland lfs_dino_setdb(fs, ip->i_din, lbn, UNWRITTEN); 241 1.19 perseant } else { 242 1.90 dholland /* extending a block that already has fragments */ 243 1.90 dholland 244 1.11 perseant if (nsize <= osize) { 245 1.11 perseant /* No need to extend */ 246 1.67 hannken if (bpp && (error = bread(vp, lbn, osize, 247 1.81 maxv 0, &bp))) 248 1.11 perseant return error; 249 1.11 perseant } else { 250 1.11 perseant /* Extend existing block */ 251 1.11 perseant if ((error = 252 1.36 perseant lfs_fragextend(vp, osize, nsize, lbn, 253 1.57 yamt (bpp ? &bp : NULL), cred))) 254 1.19 perseant return error; 255 1.11 perseant } 256 1.36 perseant if (bpp) 257 1.36 perseant *bpp = bp; 258 1.5 fvdl } 259 1.19 perseant return 0; 260 1.19 perseant } 261 1.19 perseant 262 1.90 dholland /* 263 1.90 dholland * Look up what's already here. 264 1.90 dholland */ 265 1.90 dholland 266 1.75 dholland error = ulfs_bmaparray(vp, lbn, &daddr, &indirs[0], &num, NULL, NULL); 267 1.19 perseant if (error) 268 1.19 perseant return (error); 269 1.49 perseant 270 1.86 dholland KASSERT(daddr <= LFS_MAX_DADDR(fs)); 271 1.49 perseant 272 1.19 perseant /* 273 1.19 perseant * Do byte accounting all at once, so we can gracefully fail *before* 274 1.19 perseant * we start assigning blocks. 275 1.19 perseant */ 276 1.79 dholland frags = fs->um_seqinc; 277 1.90 dholland bcount = 0; /* number of frags we need */ 278 1.19 perseant if (daddr == UNASSIGNED) { 279 1.90 dholland /* no block yet, going to need a whole block */ 280 1.69 mlelstv bcount = frags; 281 1.19 perseant } 282 1.19 perseant for (i = 1; i < num; ++i) { 283 1.19 perseant if (!indirs[i].in_exists) { 284 1.90 dholland /* need an indirect block at this level */ 285 1.69 mlelstv bcount += frags; 286 1.19 perseant } 287 1.19 perseant } 288 1.57 yamt if (ISSPACE(fs, bcount, cred)) { 289 1.90 dholland /* update the superblock's free block count */ 290 1.64 ad mutex_enter(&lfs_lock); 291 1.85 dholland lfs_sb_subbfree(fs, bcount); 292 1.64 ad mutex_exit(&lfs_lock); 293 1.90 dholland /* update the file's effective block count */ 294 1.21 perseant ip->i_lfs_effnblks += bcount; 295 1.5 fvdl } else { 296 1.90 dholland /* whoops, no can do */ 297 1.19 perseant return ENOSPC; 298 1.19 perseant } 299 1.19 perseant 300 1.19 perseant if (daddr == UNASSIGNED) { 301 1.90 dholland /* 302 1.90 dholland * There is nothing here yet. 303 1.90 dholland */ 304 1.90 dholland 305 1.90 dholland /* 306 1.90 dholland * If there's no indirect block in the inode, change it 307 1.90 dholland * to UNWRITTEN to indicate that it exists but doesn't 308 1.90 dholland * have an on-disk address yet. 309 1.90 dholland * 310 1.90 dholland * (Question: where's the block data initialized?) 311 1.90 dholland */ 312 1.87 dholland if (num > 0 && lfs_dino_getib(fs, ip->i_din, indirs[0].in_off) == 0) { 313 1.87 dholland lfs_dino_setib(fs, ip->i_din, indirs[0].in_off, UNWRITTEN); 314 1.19 perseant } 315 1.19 perseant 316 1.5 fvdl /* 317 1.90 dholland * If we need more layers of indirect blocks, create what 318 1.90 dholland * we need. 319 1.5 fvdl */ 320 1.46 mycroft if (num > 1) { 321 1.90 dholland /* 322 1.90 dholland * The outermost indirect block address is the one 323 1.90 dholland * in the inode, so fetch that. 324 1.90 dholland */ 325 1.87 dholland idaddr = lfs_dino_getib(fs, ip->i_din, indirs[0].in_off); 326 1.90 dholland /* 327 1.90 dholland * For each layer of indirection... 328 1.90 dholland */ 329 1.46 mycroft for (i = 1; i < num; ++i) { 330 1.90 dholland /* 331 1.90 dholland * Get a buffer for the indirect block data. 332 1.90 dholland * 333 1.90 dholland * (XXX: the logic here seems twisted. What's 334 1.90 dholland * wrong with testing in_exists first and then 335 1.90 dholland * doing either bread or getblk to get a 336 1.90 dholland * buffer?) 337 1.90 dholland */ 338 1.46 mycroft ibp = getblk(vp, indirs[i].in_lbn, 339 1.82 dholland lfs_sb_getbsize(fs), 0,0); 340 1.46 mycroft if (!indirs[i].in_exists) { 341 1.90 dholland /* 342 1.90 dholland * There isn't actually a block here, 343 1.90 dholland * so clear the buffer data and mark 344 1.90 dholland * the address of the block as 345 1.90 dholland * UNWRITTEN. 346 1.90 dholland */ 347 1.46 mycroft clrbuf(ibp); 348 1.46 mycroft ibp->b_blkno = UNWRITTEN; 349 1.64 ad } else if (!(ibp->b_oflags & (BO_DELWRI | BO_DONE))) { 350 1.90 dholland /* 351 1.90 dholland * Otherwise read it in. 352 1.90 dholland */ 353 1.77 christos ibp->b_blkno = LFS_FSBTODB(fs, idaddr); 354 1.46 mycroft ibp->b_flags |= B_READ; 355 1.48 hannken VOP_STRATEGY(vp, ibp); 356 1.46 mycroft biowait(ibp); 357 1.46 mycroft } 358 1.90 dholland 359 1.46 mycroft /* 360 1.90 dholland * Now this indirect block exists, but 361 1.90 dholland * the next one down may not yet. If 362 1.90 dholland * so, set it to UNWRITTEN. This keeps 363 1.46 mycroft * the accounting straight. 364 1.46 mycroft */ 365 1.88 dholland if (lfs_iblock_get(fs, ibp->b_data, indirs[i].in_off) == 0) 366 1.88 dholland lfs_iblock_set(fs, ibp->b_data, indirs[i].in_off, 367 1.88 dholland UNWRITTEN); 368 1.90 dholland 369 1.90 dholland /* get the block for the next iteration */ 370 1.88 dholland idaddr = lfs_iblock_get(fs, ibp->b_data, indirs[i].in_off); 371 1.92 maya 372 1.52 perseant if (vp == fs->lfs_ivnode) { 373 1.52 perseant LFS_ENTER_LOG("balloc", __FILE__, 374 1.52 perseant __LINE__, indirs[i].in_lbn, 375 1.52 perseant ibp->b_flags, curproc->p_pid); 376 1.52 perseant } 377 1.90 dholland /* 378 1.90 dholland * Write out the updated indirect block. Note 379 1.90 dholland * that this writes it out even if we didn't 380 1.90 dholland * modify it - ultimately because the final 381 1.90 dholland * block didn't exist we'll need to write a 382 1.90 dholland * new version of all the blocks that lead to 383 1.90 dholland * it. Hopefully all that gets in before any 384 1.90 dholland * actual disk I/O so we don't end up writing 385 1.90 dholland * any of them twice... this is currently not 386 1.90 dholland * very clear. 387 1.90 dholland */ 388 1.70 hannken if ((error = VOP_BWRITE(ibp->b_vp, ibp))) 389 1.46 mycroft return error; 390 1.19 perseant } 391 1.19 perseant } 392 1.50 perry } 393 1.19 perseant 394 1.19 perseant 395 1.19 perseant /* 396 1.36 perseant * Get the existing block from the cache, if requested. 397 1.19 perseant */ 398 1.36 perseant if (bpp) 399 1.77 christos *bpp = bp = getblk(vp, lbn, lfs_blksize(fs, ip, lbn), 0, 0); 400 1.50 perry 401 1.49 perseant /* 402 1.49 perseant * Do accounting on blocks that represent pages. 403 1.49 perseant */ 404 1.49 perseant if (!bpp) 405 1.49 perseant lfs_register_block(vp, lbn); 406 1.49 perseant 407 1.50 perry /* 408 1.5 fvdl * The block we are writing may be a brand new block 409 1.19 perseant * in which case we need to do accounting. 410 1.15 perseant * 411 1.75 dholland * We can tell a truly new block because ulfs_bmaparray will say 412 1.90 dholland * it is UNASSIGNED. Once we allocate it we will assign it the 413 1.19 perseant * disk address UNWRITTEN. 414 1.5 fvdl */ 415 1.16 perseant if (daddr == UNASSIGNED) { 416 1.36 perseant if (bpp) { 417 1.57 yamt if (flags & B_CLRBUF) 418 1.36 perseant clrbuf(bp); 419 1.50 perry 420 1.36 perseant /* Note the new address */ 421 1.36 perseant bp->b_blkno = UNWRITTEN; 422 1.36 perseant } 423 1.50 perry 424 1.19 perseant switch (num) { 425 1.19 perseant case 0: 426 1.90 dholland /* direct block - update the inode */ 427 1.87 dholland lfs_dino_setdb(fs, ip->i_din, lbn, UNWRITTEN); 428 1.19 perseant break; 429 1.19 perseant case 1: 430 1.90 dholland /* 431 1.90 dholland * using a single indirect block - update the inode 432 1.90 dholland * 433 1.90 dholland * XXX: is this right? We already set this block 434 1.90 dholland * pointer above. I think we want to be writing *in* 435 1.90 dholland * the single indirect block and this case shouldn't 436 1.90 dholland * exist. (just case 0 and default) 437 1.90 dholland * -- dholland 20160806 438 1.90 dholland */ 439 1.87 dholland lfs_dino_setib(fs, ip->i_din, indirs[0].in_off, UNWRITTEN); 440 1.19 perseant break; 441 1.19 perseant default: 442 1.90 dholland /* 443 1.90 dholland * using multiple indirect blocks - update the 444 1.90 dholland * innermost one 445 1.90 dholland */ 446 1.19 perseant idp = &indirs[num - 1]; 447 1.82 dholland if (bread(vp, idp->in_lbn, lfs_sb_getbsize(fs), 448 1.67 hannken B_MODIFY, &ibp)) 449 1.35 fvdl panic("lfs_balloc: bread bno %lld", 450 1.35 fvdl (long long)idp->in_lbn); 451 1.88 dholland lfs_iblock_set(fs, ibp->b_data, idp->in_off, UNWRITTEN); 452 1.92 maya 453 1.52 perseant if (vp == fs->lfs_ivnode) { 454 1.52 perseant LFS_ENTER_LOG("balloc", __FILE__, 455 1.52 perseant __LINE__, idp->in_lbn, 456 1.52 perseant ibp->b_flags, curproc->p_pid); 457 1.52 perseant } 458 1.92 maya 459 1.70 hannken VOP_BWRITE(ibp->b_vp, ibp); 460 1.15 perseant } 461 1.64 ad } else if (bpp && !(bp->b_oflags & (BO_DONE|BO_DELWRI))) { 462 1.15 perseant /* 463 1.15 perseant * Not a brand new block, also not in the cache; 464 1.15 perseant * read it in from disk. 465 1.15 perseant */ 466 1.82 dholland if (iosize == lfs_sb_getbsize(fs)) 467 1.5 fvdl /* Optimization: I/O is unnecessary. */ 468 1.5 fvdl bp->b_blkno = daddr; 469 1.15 perseant else { 470 1.5 fvdl /* 471 1.5 fvdl * We need to read the block to preserve the 472 1.5 fvdl * existing bytes. 473 1.5 fvdl */ 474 1.1 mycroft bp->b_blkno = daddr; 475 1.1 mycroft bp->b_flags |= B_READ; 476 1.48 hannken VOP_STRATEGY(vp, bp); 477 1.31 chs return (biowait(bp)); 478 1.1 mycroft } 479 1.1 mycroft } 480 1.50 perry 481 1.5 fvdl return (0); 482 1.5 fvdl } 483 1.5 fvdl 484 1.90 dholland /* 485 1.90 dholland * Extend a file that uses fragments with more fragments. 486 1.90 dholland * 487 1.90 dholland * XXX: locking? 488 1.90 dholland */ 489 1.25 perseant /* VOP_BWRITE 1 time */ 490 1.89 dholland static int 491 1.89 dholland lfs_fragextend(struct vnode *vp, int osize, int nsize, daddr_t lbn, 492 1.89 dholland struct buf **bpp, kauth_cred_t cred) 493 1.5 fvdl { 494 1.5 fvdl struct inode *ip; 495 1.5 fvdl struct lfs *fs; 496 1.69 mlelstv long frags; 497 1.5 fvdl int error; 498 1.90 dholland size_t obufsize; 499 1.90 dholland 500 1.90 dholland /* XXX move this to a header file */ 501 1.90 dholland /* (XXX: except it's not clear what purpose it serves) */ 502 1.11 perseant extern long locked_queue_bytes; 503 1.90 dholland 504 1.91 dholland ip = VTOI(vp); 505 1.91 dholland fs = ip->i_lfs; 506 1.91 dholland 507 1.90 dholland /* 508 1.90 dholland * XXX: is there some reason we know more about the seglock 509 1.90 dholland * state here than at the top of lfs_balloc? 510 1.90 dholland */ 511 1.90 dholland ASSERT_NO_SEGLOCK(fs); 512 1.5 fvdl 513 1.90 dholland /* number of frags we're adding */ 514 1.77 christos frags = (long)lfs_numfrags(fs, nsize - osize); 515 1.90 dholland 516 1.18 perseant error = 0; 517 1.18 perseant 518 1.18 perseant /* 519 1.36 perseant * Get the seglock so we don't enlarge blocks while a segment 520 1.36 perseant * is being written. If we're called with bpp==NULL, though, 521 1.36 perseant * we are only pretending to change a buffer, so we don't have to 522 1.36 perseant * lock. 523 1.90 dholland * 524 1.90 dholland * XXX: the above comment is lying, as fs->lfs_fraglock is not 525 1.90 dholland * the segment lock. 526 1.18 perseant */ 527 1.26 perseant top: 528 1.36 perseant if (bpp) { 529 1.62 ad rw_enter(&fs->lfs_fraglock, RW_READER); 530 1.36 perseant } 531 1.36 perseant 532 1.90 dholland /* check if we actually have enough frags available */ 533 1.69 mlelstv if (!ISSPACE(fs, frags, cred)) { 534 1.18 perseant error = ENOSPC; 535 1.18 perseant goto out; 536 1.5 fvdl } 537 1.36 perseant 538 1.36 perseant /* 539 1.36 perseant * If we are not asked to actually return the block, all we need 540 1.36 perseant * to do is allocate space for it. UBC will handle dirtying the 541 1.36 perseant * appropriate things and making sure it all goes to disk. 542 1.36 perseant * Don't bother to read in that case. 543 1.36 perseant */ 544 1.81 maxv if (bpp && (error = bread(vp, lbn, osize, 0, bpp))) { 545 1.18 perseant goto out; 546 1.5 fvdl } 547 1.80 dholland #if defined(LFS_QUOTA) || defined(LFS_QUOTA2) 548 1.76 dholland if ((error = lfs_chkdq(ip, frags, cred, 0))) { 549 1.36 perseant if (bpp) 550 1.63 ad brelse(*bpp, 0); 551 1.19 perseant goto out; 552 1.19 perseant } 553 1.19 perseant #endif 554 1.13 perseant /* 555 1.26 perseant * Adjust accounting for lfs_avail. If there's not enough room, 556 1.26 perseant * we will have to wait for the cleaner, which we can't do while 557 1.26 perseant * holding a block busy or while holding the seglock. In that case, 558 1.26 perseant * release both and start over after waiting. 559 1.26 perseant */ 560 1.33 perseant 561 1.64 ad if (bpp && ((*bpp)->b_oflags & BO_DELWRI)) { 562 1.69 mlelstv if (!lfs_fits(fs, frags)) { 563 1.36 perseant if (bpp) 564 1.63 ad brelse(*bpp, 0); 565 1.80 dholland #if defined(LFS_QUOTA) || defined(LFS_QUOTA2) 566 1.76 dholland lfs_chkdq(ip, -frags, cred, 0); 567 1.26 perseant #endif 568 1.62 ad rw_exit(&fs->lfs_fraglock); 569 1.69 mlelstv lfs_availwait(fs, frags); 570 1.26 perseant goto top; 571 1.26 perseant } 572 1.82 dholland lfs_sb_subavail(fs, frags); 573 1.26 perseant } 574 1.26 perseant 575 1.90 dholland /* decrease the free block count in the superblock */ 576 1.64 ad mutex_enter(&lfs_lock); 577 1.85 dholland lfs_sb_subbfree(fs, frags); 578 1.64 ad mutex_exit(&lfs_lock); 579 1.90 dholland /* increase the file's effective block count */ 580 1.69 mlelstv ip->i_lfs_effnblks += frags; 581 1.90 dholland /* mark the inode dirty */ 582 1.94 maya ip->i_state |= IN_CHANGE | IN_UPDATE; 583 1.26 perseant 584 1.36 perseant if (bpp) { 585 1.36 perseant obufsize = (*bpp)->b_bufsize; 586 1.47 pk allocbuf(*bpp, nsize, 1); 587 1.26 perseant 588 1.36 perseant /* Adjust locked-list accounting */ 589 1.65 ad if (((*bpp)->b_flags & B_LOCKED) != 0 && 590 1.64 ad (*bpp)->b_iodone == NULL) { 591 1.64 ad mutex_enter(&lfs_lock); 592 1.36 perseant locked_queue_bytes += (*bpp)->b_bufsize - obufsize; 593 1.64 ad mutex_exit(&lfs_lock); 594 1.52 perseant } 595 1.26 perseant 596 1.90 dholland /* zero the new space */ 597 1.68 cegger memset((char *)((*bpp)->b_data) + osize, 0, (u_int)(nsize - osize)); 598 1.36 perseant } 599 1.18 perseant 600 1.18 perseant out: 601 1.36 perseant if (bpp) { 602 1.62 ad rw_exit(&fs->lfs_fraglock); 603 1.36 perseant } 604 1.18 perseant return (error); 605 1.1 mycroft } 606 1.49 perseant 607 1.59 perry static inline int 608 1.55 perseant lge(struct lbnentry *a, struct lbnentry *b) 609 1.53 perseant { 610 1.55 perseant return a->lbn - b->lbn; 611 1.53 perseant } 612 1.53 perseant 613 1.55 perseant SPLAY_PROTOTYPE(lfs_splay, lbnentry, entry, lge); 614 1.55 perseant 615 1.55 perseant SPLAY_GENERATE(lfs_splay, lbnentry, entry, lge); 616 1.53 perseant 617 1.49 perseant /* 618 1.49 perseant * Record this lbn as being "write pending". We used to have this information 619 1.49 perseant * on the buffer headers, but since pages don't have buffer headers we 620 1.49 perseant * record it here instead. 621 1.49 perseant */ 622 1.49 perseant void 623 1.49 perseant lfs_register_block(struct vnode *vp, daddr_t lbn) 624 1.49 perseant { 625 1.49 perseant struct lfs *fs; 626 1.49 perseant struct inode *ip; 627 1.49 perseant struct lbnentry *lbp; 628 1.53 perseant 629 1.53 perseant ip = VTOI(vp); 630 1.49 perseant 631 1.49 perseant /* Don't count metadata */ 632 1.53 perseant if (lbn < 0 || vp->v_type != VREG || ip->i_number == LFS_IFILE_INUM) 633 1.49 perseant return; 634 1.49 perseant 635 1.49 perseant fs = ip->i_lfs; 636 1.49 perseant 637 1.52 perseant ASSERT_NO_SEGLOCK(fs); 638 1.52 perseant 639 1.49 perseant /* If no space, wait for the cleaner */ 640 1.83 dholland lfs_availwait(fs, lfs_btofsb(fs, 1 << lfs_sb_getbshift(fs))); 641 1.49 perseant 642 1.49 perseant lbp = (struct lbnentry *)pool_get(&lfs_lbnentry_pool, PR_WAITOK); 643 1.49 perseant lbp->lbn = lbn; 644 1.64 ad mutex_enter(&lfs_lock); 645 1.55 perseant if (SPLAY_INSERT(lfs_splay, &ip->i_lfs_lbtree, lbp) != NULL) { 646 1.64 ad mutex_exit(&lfs_lock); 647 1.55 perseant /* Already there */ 648 1.55 perseant pool_put(&lfs_lbnentry_pool, lbp); 649 1.55 perseant return; 650 1.55 perseant } 651 1.52 perseant 652 1.56 perseant ++ip->i_lfs_nbtree; 653 1.83 dholland fs->lfs_favail += lfs_btofsb(fs, (1 << lfs_sb_getbshift(fs))); 654 1.82 dholland fs->lfs_pages += lfs_sb_getbsize(fs) >> PAGE_SHIFT; 655 1.49 perseant ++locked_fakequeue_count; 656 1.82 dholland lfs_subsys_pages += lfs_sb_getbsize(fs) >> PAGE_SHIFT; 657 1.64 ad mutex_exit(&lfs_lock); 658 1.49 perseant } 659 1.49 perseant 660 1.49 perseant static void 661 1.53 perseant lfs_do_deregister(struct lfs *fs, struct inode *ip, struct lbnentry *lbp) 662 1.49 perseant { 663 1.95 riastrad 664 1.95 riastrad KASSERT(mutex_owned(&lfs_lock)); 665 1.52 perseant ASSERT_MAYBE_SEGLOCK(fs); 666 1.52 perseant 667 1.56 perseant --ip->i_lfs_nbtree; 668 1.55 perseant SPLAY_REMOVE(lfs_splay, &ip->i_lfs_lbtree, lbp); 669 1.83 dholland if (fs->lfs_favail > lfs_btofsb(fs, (1 << lfs_sb_getbshift(fs)))) 670 1.83 dholland fs->lfs_favail -= lfs_btofsb(fs, (1 << lfs_sb_getbshift(fs))); 671 1.82 dholland fs->lfs_pages -= lfs_sb_getbsize(fs) >> PAGE_SHIFT; 672 1.49 perseant if (locked_fakequeue_count > 0) 673 1.49 perseant --locked_fakequeue_count; 674 1.82 dholland lfs_subsys_pages -= lfs_sb_getbsize(fs) >> PAGE_SHIFT; 675 1.95 riastrad 676 1.64 ad mutex_exit(&lfs_lock); 677 1.95 riastrad pool_put(&lfs_lbnentry_pool, lbp); 678 1.95 riastrad mutex_enter(&lfs_lock); 679 1.64 ad 680 1.95 riastrad KASSERT(mutex_owned(&lfs_lock)); 681 1.49 perseant } 682 1.49 perseant 683 1.49 perseant void 684 1.49 perseant lfs_deregister_block(struct vnode *vp, daddr_t lbn) 685 1.49 perseant { 686 1.49 perseant struct lfs *fs; 687 1.49 perseant struct inode *ip; 688 1.49 perseant struct lbnentry *lbp; 689 1.55 perseant struct lbnentry tmp; 690 1.53 perseant 691 1.53 perseant ip = VTOI(vp); 692 1.49 perseant 693 1.49 perseant /* Don't count metadata */ 694 1.53 perseant if (lbn < 0 || vp->v_type != VREG || ip->i_number == LFS_IFILE_INUM) 695 1.49 perseant return; 696 1.49 perseant 697 1.95 riastrad mutex_enter(&lfs_lock); 698 1.49 perseant fs = ip->i_lfs; 699 1.55 perseant tmp.lbn = lbn; 700 1.95 riastrad if ((lbp = SPLAY_FIND(lfs_splay, &ip->i_lfs_lbtree, &tmp)) != NULL) 701 1.95 riastrad lfs_do_deregister(fs, ip, lbp); 702 1.95 riastrad mutex_exit(&lfs_lock); 703 1.49 perseant } 704 1.55 perseant 705 1.55 perseant void 706 1.55 perseant lfs_deregister_all(struct vnode *vp) 707 1.55 perseant { 708 1.95 riastrad struct lbnentry *lbp; 709 1.55 perseant struct lfs_splay *hd; 710 1.55 perseant struct lfs *fs; 711 1.55 perseant struct inode *ip; 712 1.55 perseant 713 1.55 perseant ip = VTOI(vp); 714 1.55 perseant fs = ip->i_lfs; 715 1.55 perseant hd = &ip->i_lfs_lbtree; 716 1.55 perseant 717 1.95 riastrad mutex_enter(&lfs_lock); 718 1.95 riastrad while ((lbp = SPLAY_MIN(lfs_splay, hd)) != NULL) 719 1.55 perseant lfs_do_deregister(fs, ip, lbp); 720 1.95 riastrad mutex_exit(&lfs_lock); 721 1.55 perseant } 722