Home | History | Annotate | Line # | Download | only in ffs
      1 /*	$NetBSD: ffs_balloc.c,v 1.66 2022/11/17 06:40:40 chs Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 2002 Networks Associates Technology, Inc.
      5  * All rights reserved.
      6  *
      7  * This software was developed for the FreeBSD Project by Marshall
      8  * Kirk McKusick and Network Associates Laboratories, the Security
      9  * Research Division of Network Associates, Inc. under DARPA/SPAWAR
     10  * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS
     11  * research program
     12  *
     13  * Copyright (c) 1982, 1986, 1989, 1993
     14  *	The Regents of the University of California.  All rights reserved.
     15  *
     16  * Redistribution and use in source and binary forms, with or without
     17  * modification, are permitted provided that the following conditions
     18  * are met:
     19  * 1. Redistributions of source code must retain the above copyright
     20  *    notice, this list of conditions and the following disclaimer.
     21  * 2. Redistributions in binary form must reproduce the above copyright
     22  *    notice, this list of conditions and the following disclaimer in the
     23  *    documentation and/or other materials provided with the distribution.
     24  * 3. Neither the name of the University nor the names of its contributors
     25  *    may be used to endorse or promote products derived from this software
     26  *    without specific prior written permission.
     27  *
     28  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     29  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     30  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     31  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     32  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     33  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     34  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     35  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     36  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     37  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     38  * SUCH DAMAGE.
     39  *
     40  *	@(#)ffs_balloc.c	8.8 (Berkeley) 6/16/95
     41  */
     42 
     43 #include <sys/cdefs.h>
     44 __KERNEL_RCSID(0, "$NetBSD: ffs_balloc.c,v 1.66 2022/11/17 06:40:40 chs Exp $");
     45 
     46 #if defined(_KERNEL_OPT)
     47 #include "opt_quota.h"
     48 #include "opt_uvmhist.h"
     49 #endif
     50 
     51 #include <sys/param.h>
     52 #include <sys/systm.h>
     53 #include <sys/buf.h>
     54 #include <sys/file.h>
     55 #include <sys/mount.h>
     56 #include <sys/vnode.h>
     57 #include <sys/kauth.h>
     58 #include <sys/fstrans.h>
     59 
     60 #include <ufs/ufs/quota.h>
     61 #include <ufs/ufs/ufsmount.h>
     62 #include <ufs/ufs/inode.h>
     63 #include <ufs/ufs/ufs_extern.h>
     64 #include <ufs/ufs/ufs_bswap.h>
     65 
     66 #include <ufs/ffs/fs.h>
     67 #include <ufs/ffs/ffs_extern.h>
     68 
     69 #ifdef UVMHIST
     70 #include <uvm/uvm.h>
     71 #endif
     72 #include <uvm/uvm_extern.h>
     73 #include <uvm/uvm_stat.h>
     74 
     75 static int ffs_balloc_ufs1(struct vnode *, off_t, int, kauth_cred_t, int,
     76     struct buf **);
     77 static int ffs_balloc_ufs2(struct vnode *, off_t, int, kauth_cred_t, int,
     78     struct buf **);
     79 
     80 static daddr_t
     81 ffs_extb(struct fs *fs, struct ufs2_dinode *dp, daddr_t nb)
     82 {
     83 	return ufs_rw64(dp->di_extb[nb], UFS_FSNEEDSWAP(fs));
     84 }
     85 
     86 /*
     87  * Balloc defines the structure of file system storage
     88  * by allocating the physical blocks on a device given
     89  * the inode and the logical block number in a file.
     90  */
     91 
     92 int
     93 ffs_balloc(struct vnode *vp, off_t off, int size, kauth_cred_t cred, int flags,
     94     struct buf **bpp)
     95 {
     96 	int error;
     97 
     98 	if (VTOI(vp)->i_fs->fs_magic == FS_UFS2_MAGIC)
     99 		error = ffs_balloc_ufs2(vp, off, size, cred, flags, bpp);
    100 	else
    101 		error = ffs_balloc_ufs1(vp, off, size, cred, flags, bpp);
    102 
    103 	if (error == 0 && bpp != NULL && (error = fscow_run(*bpp, false)) != 0)
    104 		brelse(*bpp, 0);
    105 
    106 	return error;
    107 }
    108 
    109 static int
    110 ffs_balloc_ufs1(struct vnode *vp, off_t off, int size, kauth_cred_t cred,
    111     int flags, struct buf **bpp)
    112 {
    113 	daddr_t lbn, lastlbn;
    114 	struct buf *bp, *nbp;
    115 	struct inode *ip = VTOI(vp);
    116 	struct fs *fs = ip->i_fs;
    117 	struct ufsmount *ump = ip->i_ump;
    118 	struct indir indirs[UFS_NIADDR + 2];
    119 	daddr_t newb, pref, nb;
    120 	int32_t *bap;	/* XXX ondisk32 */
    121 	int deallocated, osize, nsize, num, i, error;
    122 	int32_t *blkp, *allocblk, allociblk[UFS_NIADDR + 1];
    123 	int32_t *allocib;
    124 	int unwindidx = -1;
    125 	const int needswap = UFS_FSNEEDSWAP(fs);
    126 	UVMHIST_FUNC("ffs_balloc"); UVMHIST_CALLED(ubchist);
    127 
    128 	lbn = ffs_lblkno(fs, off);
    129 	size = ffs_blkoff(fs, off) + size;
    130 	if (size > fs->fs_bsize)
    131 		panic("ffs_balloc: blk too big");
    132 	if (bpp != NULL) {
    133 		*bpp = NULL;
    134 	}
    135 	UVMHIST_LOG(ubchist, "vp %#jx lbn 0x%jx size 0x%jx", (uintptr_t)vp,
    136 	    lbn, size, 0);
    137 
    138 	if (lbn < 0)
    139 		return (EFBIG);
    140 
    141 	/*
    142 	 * If the next write will extend the file into a new block,
    143 	 * and the file is currently composed of a fragment
    144 	 * this fragment has to be extended to be a full block.
    145 	 */
    146 
    147 	lastlbn = ffs_lblkno(fs, ip->i_size);
    148 	if (lastlbn < UFS_NDADDR && lastlbn < lbn) {
    149 		nb = lastlbn;
    150 		osize = ffs_blksize(fs, ip, nb);
    151 		if (osize < fs->fs_bsize && osize > 0) {
    152 			mutex_enter(&ump->um_lock);
    153 			error = ffs_realloccg(ip, nb, ffs_getdb(fs, ip, nb),
    154 				    ffs_blkpref_ufs1(ip, lastlbn, nb, flags,
    155 					&ip->i_ffs1_db[0]),
    156 				    osize, (int)fs->fs_bsize, flags, cred, bpp,
    157 				    &newb);
    158 			if (error)
    159 				return (error);
    160 			ip->i_size = ffs_lblktosize(fs, nb + 1);
    161 			ip->i_ffs1_size = ip->i_size;
    162 			uvm_vnp_setsize(vp, ip->i_ffs1_size);
    163 			ip->i_ffs1_db[nb] = ufs_rw32((u_int32_t)newb, needswap);
    164 			ip->i_flag |= IN_CHANGE | IN_UPDATE;
    165 			if (bpp && *bpp) {
    166 				if (flags & B_SYNC)
    167 					bwrite(*bpp);
    168 				else
    169 					bawrite(*bpp);
    170 			}
    171 		}
    172 	}
    173 
    174 	/*
    175 	 * The first UFS_NDADDR blocks are direct blocks
    176 	 */
    177 
    178 	if (lbn < UFS_NDADDR) {
    179 		nb = ufs_rw32(ip->i_ffs1_db[lbn], needswap);
    180 		if (nb != 0 && ip->i_size >= ffs_lblktosize(fs, lbn + 1)) {
    181 
    182 			/*
    183 			 * The block is an already-allocated direct block
    184 			 * and the file already extends past this block,
    185 			 * thus this must be a whole block.
    186 			 * Just read the block (if requested).
    187 			 */
    188 
    189 			if (bpp != NULL) {
    190 				error = bread(vp, lbn, fs->fs_bsize,
    191 					      B_MODIFY, bpp);
    192 				if (error) {
    193 					return (error);
    194 				}
    195 			}
    196 			return (0);
    197 		}
    198 		if (nb != 0) {
    199 
    200 			/*
    201 			 * Consider need to reallocate a fragment.
    202 			 */
    203 
    204 			osize = ffs_fragroundup(fs, ffs_blkoff(fs, ip->i_size));
    205 			nsize = ffs_fragroundup(fs, size);
    206 			if (nsize <= osize) {
    207 
    208 				/*
    209 				 * The existing block is already
    210 				 * at least as big as we want.
    211 				 * Just read the block (if requested).
    212 				 */
    213 
    214 				if (bpp != NULL) {
    215 					error = bread(vp, lbn, osize,
    216 						      B_MODIFY, bpp);
    217 					if (error) {
    218 						return (error);
    219 					}
    220 				}
    221 				return 0;
    222 			} else {
    223 
    224 				/*
    225 				 * The existing block is smaller than we want,
    226 				 * grow it.
    227 				 */
    228 				mutex_enter(&ump->um_lock);
    229 				error = ffs_realloccg(ip, lbn,
    230 				    ffs_getdb(fs, ip, lbn),
    231 				    ffs_blkpref_ufs1(ip, lbn, (int)lbn, flags,
    232 					&ip->i_ffs1_db[0]),
    233 				    osize, nsize, flags, cred, bpp, &newb);
    234 				if (error)
    235 					return (error);
    236 			}
    237 		} else {
    238 
    239 			/*
    240 			 * the block was not previously allocated,
    241 			 * allocate a new block or fragment.
    242 			 */
    243 
    244 			if (ip->i_size < ffs_lblktosize(fs, lbn + 1))
    245 				nsize = ffs_fragroundup(fs, size);
    246 			else
    247 				nsize = fs->fs_bsize;
    248 			mutex_enter(&ump->um_lock);
    249 			error = ffs_alloc(ip, lbn,
    250 			    ffs_blkpref_ufs1(ip, lbn, (int)lbn, flags,
    251 				&ip->i_ffs1_db[0]),
    252 			    nsize, flags, cred, &newb);
    253 			if (error)
    254 				return (error);
    255 			if (bpp != NULL) {
    256 				error = ffs_getblk(vp, lbn, FFS_FSBTODB(fs, newb),
    257 				    nsize, (flags & B_CLRBUF) != 0, bpp);
    258 				if (error)
    259 					return error;
    260 			}
    261 		}
    262 		ip->i_ffs1_db[lbn] = ufs_rw32((u_int32_t)newb, needswap);
    263 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
    264 		return (0);
    265 	}
    266 
    267 	/*
    268 	 * Determine the number of levels of indirection.
    269 	 */
    270 
    271 	pref = 0;
    272 	if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
    273 		return (error);
    274 
    275 	/*
    276 	 * Fetch the first indirect block allocating if necessary.
    277 	 */
    278 
    279 	--num;
    280 	nb = ufs_rw32(ip->i_ffs1_ib[indirs[0].in_off], needswap);
    281 	allocib = NULL;
    282 	allocblk = allociblk;
    283 	if (nb == 0) {
    284 		mutex_enter(&ump->um_lock);
    285 		pref = ffs_blkpref_ufs1(ip, lbn, 0, flags | B_METAONLY, NULL);
    286 		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
    287 		    flags | B_METAONLY, cred, &newb);
    288 		if (error)
    289 			goto fail;
    290 		nb = newb;
    291 		*allocblk++ = nb;
    292 		error = ffs_getblk(vp, indirs[1].in_lbn, FFS_FSBTODB(fs, nb),
    293 		    fs->fs_bsize, true, &bp);
    294 		if (error)
    295 			goto fail;
    296 		/*
    297 		 * Write synchronously so that indirect blocks
    298 		 * never point at garbage.
    299 		 */
    300 		if ((error = bwrite(bp)) != 0)
    301 			goto fail;
    302 		unwindidx = 0;
    303 		allocib = &ip->i_ffs1_ib[indirs[0].in_off];
    304 		*allocib = ufs_rw32(nb, needswap);
    305 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
    306 	}
    307 
    308 	/*
    309 	 * Fetch through the indirect blocks, allocating as necessary.
    310 	 */
    311 
    312 	for (i = 1;;) {
    313 		error = bread(vp,
    314 		    indirs[i].in_lbn, (int)fs->fs_bsize, 0, &bp);
    315 		if (error) {
    316 			goto fail;
    317 		}
    318 		bap = (int32_t *)bp->b_data;	/* XXX ondisk32 */
    319 		nb = ufs_rw32(bap[indirs[i].in_off], needswap);
    320 		if (i == num)
    321 			break;
    322 		i++;
    323 		if (nb != 0) {
    324 			brelse(bp, 0);
    325 			continue;
    326 		}
    327 		if (fscow_run(bp, true) != 0) {
    328 			brelse(bp, 0);
    329 			goto fail;
    330 		}
    331 		mutex_enter(&ump->um_lock);
    332 		/* Try to keep snapshot indirect blocks contiguous. */
    333 		if (i == num && (ip->i_flags & SF_SNAPSHOT) != 0)
    334 			pref = ffs_blkpref_ufs1(ip, lbn, indirs[i-1].in_off,
    335 			    flags | B_METAONLY, &bap[0]);
    336 		if (pref == 0)
    337 			pref = ffs_blkpref_ufs1(ip, lbn, 0, flags | B_METAONLY,
    338 			    NULL);
    339 		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
    340 		    flags | B_METAONLY, cred, &newb);
    341 		if (error) {
    342 			brelse(bp, 0);
    343 			goto fail;
    344 		}
    345 		nb = newb;
    346 		*allocblk++ = nb;
    347 		error = ffs_getblk(vp, indirs[i].in_lbn, FFS_FSBTODB(fs, nb),
    348 		    fs->fs_bsize, true, &nbp);
    349 		if (error) {
    350 			brelse(bp, 0);
    351 			goto fail;
    352 		}
    353 		/*
    354 		 * Write synchronously so that indirect blocks
    355 		 * never point at garbage.
    356 		 */
    357 		if ((error = bwrite(nbp)) != 0) {
    358 			brelse(bp, 0);
    359 			goto fail;
    360 		}
    361 		if (unwindidx < 0)
    362 			unwindidx = i - 1;
    363 		bap[indirs[i - 1].in_off] = ufs_rw32(nb, needswap);
    364 
    365 		/*
    366 		 * If required, write synchronously, otherwise use
    367 		 * delayed write.
    368 		 */
    369 
    370 		if (flags & B_SYNC) {
    371 			bwrite(bp);
    372 		} else {
    373 			bdwrite(bp);
    374 		}
    375 	}
    376 
    377 	if (flags & B_METAONLY) {
    378 		KASSERT(bpp != NULL);
    379 		*bpp = bp;
    380 		return (0);
    381 	}
    382 
    383 	/*
    384 	 * Get the data block, allocating if necessary.
    385 	 */
    386 
    387 	if (nb == 0) {
    388 		if (fscow_run(bp, true) != 0) {
    389 			brelse(bp, 0);
    390 			goto fail;
    391 		}
    392 		mutex_enter(&ump->um_lock);
    393 		pref = ffs_blkpref_ufs1(ip, lbn, indirs[num].in_off, flags,
    394 		    &bap[0]);
    395 		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, flags, cred,
    396 		    &newb);
    397 		if (error) {
    398 			brelse(bp, 0);
    399 			goto fail;
    400 		}
    401 		nb = newb;
    402 		*allocblk++ = nb;
    403 		if (bpp != NULL) {
    404 			error = ffs_getblk(vp, lbn, FFS_FSBTODB(fs, nb),
    405 			    fs->fs_bsize, (flags & B_CLRBUF) != 0, bpp);
    406 			if (error) {
    407 				brelse(bp, 0);
    408 				goto fail;
    409 			}
    410 		}
    411 		bap[indirs[num].in_off] = ufs_rw32(nb, needswap);
    412 		if (allocib == NULL && unwindidx < 0) {
    413 			unwindidx = i - 1;
    414 		}
    415 
    416 		/*
    417 		 * If required, write synchronously, otherwise use
    418 		 * delayed write.
    419 		 */
    420 
    421 		if (flags & B_SYNC) {
    422 			bwrite(bp);
    423 		} else {
    424 			bdwrite(bp);
    425 		}
    426 		return (0);
    427 	}
    428 	brelse(bp, 0);
    429 	if (bpp != NULL) {
    430 		if (flags & B_CLRBUF) {
    431 			error = bread(vp, lbn, (int)fs->fs_bsize,
    432 			    B_MODIFY, &nbp);
    433 			if (error) {
    434 				goto fail;
    435 			}
    436 		} else {
    437 			error = ffs_getblk(vp, lbn, FFS_FSBTODB(fs, nb),
    438 			    fs->fs_bsize, true, &nbp);
    439 			if (error)
    440 				goto fail;
    441 		}
    442 		*bpp = nbp;
    443 	}
    444 	return (0);
    445 
    446 fail:
    447 	/*
    448 	 * If we have failed part way through block allocation, we
    449 	 * have to deallocate any indirect blocks that we have allocated.
    450 	 */
    451 
    452 	if (unwindidx >= 0) {
    453 
    454 		/*
    455 		 * First write out any buffers we've created to resolve their
    456 		 * softdeps.  This must be done in reverse order of creation
    457 		 * so that we resolve the dependencies in one pass.
    458 		 * Write the cylinder group buffers for these buffers too.
    459 		 */
    460 
    461 		for (i = num; i >= unwindidx; i--) {
    462 			if (i == 0) {
    463 				break;
    464 			}
    465 			if (ffs_getblk(vp, indirs[i].in_lbn, FFS_NOBLK,
    466 			    fs->fs_bsize, false, &bp) != 0)
    467 				continue;
    468 			if (bp->b_oflags & BO_DELWRI) {
    469 				nb = FFS_FSBTODB(fs, cgtod(fs, dtog(fs,
    470 				    FFS_DBTOFSB(fs, bp->b_blkno))));
    471 				bwrite(bp);
    472 				if (ffs_getblk(ip->i_devvp, nb, FFS_NOBLK,
    473 				    fs->fs_cgsize, false, &bp) != 0)
    474 					continue;
    475 				if (bp->b_oflags & BO_DELWRI) {
    476 					bwrite(bp);
    477 				} else {
    478 					brelse(bp, BC_INVAL);
    479 				}
    480 			} else {
    481 				brelse(bp, BC_INVAL);
    482 			}
    483 		}
    484 
    485 		/*
    486 		 * Undo the partial allocation.
    487 		 */
    488 		if (unwindidx == 0) {
    489 			*allocib = 0;
    490 			ip->i_flag |= IN_CHANGE | IN_UPDATE;
    491 		} else {
    492 			int r;
    493 
    494 			r = bread(vp, indirs[unwindidx].in_lbn,
    495 			    (int)fs->fs_bsize, 0, &bp);
    496 			if (r) {
    497 				panic("Could not unwind indirect block, error %d", r);
    498 			} else {
    499 				bap = (int32_t *)bp->b_data; /* XXX ondisk32 */
    500 				bap[indirs[unwindidx].in_off] = 0;
    501 				bwrite(bp);
    502 			}
    503 		}
    504 		for (i = unwindidx + 1; i <= num; i++) {
    505 			if (ffs_getblk(vp, indirs[i].in_lbn, FFS_NOBLK,
    506 			    fs->fs_bsize, false, &bp) == 0)
    507 				brelse(bp, BC_INVAL);
    508 		}
    509 	}
    510 	for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) {
    511 		ffs_blkfree(fs, ip->i_devvp, *blkp, fs->fs_bsize, ip->i_number);
    512 		deallocated += fs->fs_bsize;
    513 	}
    514 	if (deallocated) {
    515 #if defined(QUOTA) || defined(QUOTA2)
    516 		/*
    517 		 * Restore user's disk quota because allocation failed.
    518 		 */
    519 		(void)chkdq(ip, -btodb(deallocated), cred, FORCE);
    520 #endif
    521 		ip->i_ffs1_blocks -= btodb(deallocated);
    522 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
    523 	}
    524 	return (error);
    525 }
    526 
    527 static int
    528 ffs_balloc_ufs2(struct vnode *vp, off_t off, int size, kauth_cred_t cred,
    529     int flags, struct buf **bpp)
    530 {
    531 	daddr_t lbn, lastlbn;
    532 	struct buf *bp, *nbp;
    533 	struct inode *ip = VTOI(vp);
    534 	struct fs *fs = ip->i_fs;
    535 	struct ufsmount *ump = ip->i_ump;
    536 	struct indir indirs[UFS_NIADDR + 2];
    537 	daddr_t newb, pref, nb;
    538 	int64_t *bap;
    539 	int deallocated, osize, nsize, num, i, error;
    540 	daddr_t *blkp, *allocblk, allociblk[UFS_NIADDR + 1];
    541 	int64_t *allocib;
    542 	int unwindidx = -1;
    543 	const int needswap = UFS_FSNEEDSWAP(fs);
    544 	UVMHIST_FUNC("ffs_balloc"); UVMHIST_CALLED(ubchist);
    545 
    546 	KASSERT((ump->um_flags & UFS_EA) != 0 || (flags & IO_EXT) == 0);
    547 
    548 	lbn = ffs_lblkno(fs, off);
    549 	size = ffs_blkoff(fs, off) + size;
    550 	if (size > fs->fs_bsize)
    551 		panic("ffs_balloc: blk too big");
    552 	if (bpp != NULL) {
    553 		*bpp = NULL;
    554 	}
    555 	UVMHIST_LOG(ubchist, "vp %#jx lbn 0x%jx size 0x%jx", (uintptr_t)vp,
    556 	    lbn, size, 0);
    557 
    558 	if (lbn < 0)
    559 		return (EFBIG);
    560 
    561 	/*
    562 	 * Check for allocating external data.
    563 	 */
    564 	if (flags & IO_EXT) {
    565 		struct ufs2_dinode *dp = ip->i_din.ffs2_din;
    566 		if (lbn >= UFS_NXADDR)
    567 			return (EFBIG);
    568 		/*
    569 		 * If the next write will extend the data into a new block,
    570 		 * and the data is currently composed of a fragment
    571 		 * this fragment has to be extended to be a full block.
    572 		 */
    573 		lastlbn = ffs_lblkno(fs, dp->di_extsize);
    574 		if (lastlbn < lbn) {
    575 			nb = lastlbn;
    576 			osize = ffs_sblksize(fs, dp->di_extsize, nb);
    577 			if (osize < fs->fs_bsize && osize > 0) {
    578 				mutex_enter(&ump->um_lock);
    579 				error = ffs_realloccg(ip, -1 - nb,
    580 				    ffs_extb(fs, dp, nb),
    581 				    ffs_blkpref_ufs2(ip, lastlbn, (int)nb,
    582 					flags, &dp->di_extb[0]),
    583 				    osize, (int)fs->fs_bsize, flags, cred,
    584 				    &bp, &newb);
    585 				if (error)
    586 					return (error);
    587 				dp->di_extsize = ffs_lblktosize(fs, nb + 1);
    588 				dp->di_extb[nb] = FFS_DBTOFSB(fs, bp->b_blkno);
    589 				ip->i_flag |= IN_CHANGE | IN_UPDATE;
    590 				if (flags & IO_SYNC)
    591 					bwrite(bp);
    592 				else
    593 					bawrite(bp);
    594 			}
    595 		}
    596 		/*
    597 		 * All blocks are direct blocks
    598 		 */
    599 		nb = dp->di_extb[lbn];
    600 		if (nb != 0 && dp->di_extsize >= ffs_lblktosize(fs, lbn + 1)) {
    601 			error = bread(vp, -1 - lbn, fs->fs_bsize,
    602 			    0, &bp);
    603 			if (error) {
    604 				return (error);
    605 			}
    606 			mutex_enter(bp->b_objlock);
    607 			bp->b_blkno = FFS_FSBTODB(fs, nb);
    608 			mutex_exit(bp->b_objlock);
    609 			*bpp = bp;
    610 			return (0);
    611 		}
    612 		if (nb != 0) {
    613 			/*
    614 			 * Consider need to reallocate a fragment.
    615 			 */
    616 			osize = ffs_fragroundup(fs, ffs_blkoff(fs, dp->di_extsize));
    617 			nsize = ffs_fragroundup(fs, size);
    618 			if (nsize <= osize) {
    619 				error = bread(vp, -1 - lbn, osize,
    620 				    0, &bp);
    621 				if (error) {
    622 					return (error);
    623 				}
    624 				mutex_enter(bp->b_objlock);
    625 				bp->b_blkno = FFS_FSBTODB(fs, nb);
    626 				mutex_exit(bp->b_objlock);
    627 			} else {
    628 				mutex_enter(&ump->um_lock);
    629 				error = ffs_realloccg(ip, -1 - lbn,
    630 				    ffs_extb(fs, dp, lbn),
    631 				    ffs_blkpref_ufs2(ip, lbn, (int)lbn, flags,
    632 				        &dp->di_extb[0]),
    633 				    osize, nsize, flags, cred, &bp, &newb);
    634 				if (error)
    635 					return (error);
    636 			}
    637 		} else {
    638 			if (dp->di_extsize < ffs_lblktosize(fs, lbn + 1))
    639 				nsize = ffs_fragroundup(fs, size);
    640 			else
    641 				nsize = fs->fs_bsize;
    642 			mutex_enter(&ump->um_lock);
    643 			error = ffs_alloc(ip, lbn,
    644 			   ffs_blkpref_ufs2(ip, lbn, (int)lbn, flags,
    645 			       &dp->di_extb[0]),
    646 			   nsize, flags, cred, &newb);
    647 			if (error)
    648 				return (error);
    649 			error = ffs_getblk(vp, -1 - lbn, FFS_FSBTODB(fs, newb),
    650 			    nsize, (flags & B_CLRBUF) != 0, &bp);
    651 			if (error)
    652 				return error;
    653 		}
    654 		dp->di_extb[lbn] = FFS_DBTOFSB(fs, bp->b_blkno);
    655 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
    656 		*bpp = bp;
    657 		return (0);
    658 	}
    659 	/*
    660 	 * If the next write will extend the file into a new block,
    661 	 * and the file is currently composed of a fragment
    662 	 * this fragment has to be extended to be a full block.
    663 	 */
    664 
    665 	lastlbn = ffs_lblkno(fs, ip->i_size);
    666 	if (lastlbn < UFS_NDADDR && lastlbn < lbn) {
    667 		nb = lastlbn;
    668 		osize = ffs_blksize(fs, ip, nb);
    669 		if (osize < fs->fs_bsize && osize > 0) {
    670 			mutex_enter(&ump->um_lock);
    671 			error = ffs_realloccg(ip, nb, ffs_getdb(fs, ip, lbn),
    672 				    ffs_blkpref_ufs2(ip, lastlbn, nb, flags,
    673 					&ip->i_ffs2_db[0]),
    674 				    osize, (int)fs->fs_bsize, flags, cred, bpp,
    675 				    &newb);
    676 			if (error)
    677 				return (error);
    678 			ip->i_size = ffs_lblktosize(fs, nb + 1);
    679 			ip->i_ffs2_size = ip->i_size;
    680 			uvm_vnp_setsize(vp, ip->i_size);
    681 			ip->i_ffs2_db[nb] = ufs_rw64(newb, needswap);
    682 			ip->i_flag |= IN_CHANGE | IN_UPDATE;
    683 			if (bpp) {
    684 				if (flags & B_SYNC)
    685 					bwrite(*bpp);
    686 				else
    687 					bawrite(*bpp);
    688 			}
    689 		}
    690 	}
    691 
    692 	/*
    693 	 * The first UFS_NDADDR blocks are direct blocks
    694 	 */
    695 
    696 	if (lbn < UFS_NDADDR) {
    697 		nb = ufs_rw64(ip->i_ffs2_db[lbn], needswap);
    698 		if (nb != 0 && ip->i_size >= ffs_lblktosize(fs, lbn + 1)) {
    699 
    700 			/*
    701 			 * The block is an already-allocated direct block
    702 			 * and the file already extends past this block,
    703 			 * thus this must be a whole block.
    704 			 * Just read the block (if requested).
    705 			 */
    706 
    707 			if (bpp != NULL) {
    708 				error = bread(vp, lbn, fs->fs_bsize,
    709 					      B_MODIFY, bpp);
    710 				if (error) {
    711 					return (error);
    712 				}
    713 			}
    714 			return (0);
    715 		}
    716 		if (nb != 0) {
    717 
    718 			/*
    719 			 * Consider need to reallocate a fragment.
    720 			 */
    721 
    722 			osize = ffs_fragroundup(fs, ffs_blkoff(fs, ip->i_size));
    723 			nsize = ffs_fragroundup(fs, size);
    724 			if (nsize <= osize) {
    725 
    726 				/*
    727 				 * The existing block is already
    728 				 * at least as big as we want.
    729 				 * Just read the block (if requested).
    730 				 */
    731 
    732 				if (bpp != NULL) {
    733 					error = bread(vp, lbn, osize,
    734 						      B_MODIFY, bpp);
    735 					if (error) {
    736 						return (error);
    737 					}
    738 				}
    739 				return 0;
    740 			} else {
    741 
    742 				/*
    743 				 * The existing block is smaller than we want,
    744 				 * grow it.
    745 				 */
    746 				mutex_enter(&ump->um_lock);
    747 				error = ffs_realloccg(ip, lbn,
    748 				    ffs_getdb(fs, ip, lbn),
    749 				    ffs_blkpref_ufs2(ip, lbn, (int)lbn, flags,
    750 					&ip->i_ffs2_db[0]),
    751 				    osize, nsize, flags, cred, bpp, &newb);
    752 				if (error)
    753 					return (error);
    754 			}
    755 		} else {
    756 
    757 			/*
    758 			 * the block was not previously allocated,
    759 			 * allocate a new block or fragment.
    760 			 */
    761 
    762 			if (ip->i_size < ffs_lblktosize(fs, lbn + 1))
    763 				nsize = ffs_fragroundup(fs, size);
    764 			else
    765 				nsize = fs->fs_bsize;
    766 			mutex_enter(&ump->um_lock);
    767 			error = ffs_alloc(ip, lbn,
    768 			    ffs_blkpref_ufs2(ip, lbn, (int)lbn, flags,
    769 				&ip->i_ffs2_db[0]),
    770 			    nsize, flags, cred, &newb);
    771 			if (error)
    772 				return (error);
    773 			if (bpp != NULL) {
    774 				error = ffs_getblk(vp, lbn, FFS_FSBTODB(fs, newb),
    775 				    nsize, (flags & B_CLRBUF) != 0, bpp);
    776 				if (error)
    777 					return error;
    778 			}
    779 		}
    780 		ip->i_ffs2_db[lbn] = ufs_rw64(newb, needswap);
    781 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
    782 		return (0);
    783 	}
    784 
    785 	/*
    786 	 * Determine the number of levels of indirection.
    787 	 */
    788 
    789 	pref = 0;
    790 	if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
    791 		return (error);
    792 
    793 	/*
    794 	 * Fetch the first indirect block allocating if necessary.
    795 	 */
    796 
    797 	--num;
    798 	nb = ufs_rw64(ip->i_ffs2_ib[indirs[0].in_off], needswap);
    799 	allocib = NULL;
    800 	allocblk = allociblk;
    801 	if (nb == 0) {
    802 		mutex_enter(&ump->um_lock);
    803 		pref = ffs_blkpref_ufs2(ip, lbn, 0, flags | B_METAONLY, NULL);
    804 		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
    805 		    flags | B_METAONLY, cred, &newb);
    806 		if (error)
    807 			goto fail;
    808 		nb = newb;
    809 		*allocblk++ = nb;
    810 		error = ffs_getblk(vp, indirs[1].in_lbn, FFS_FSBTODB(fs, nb),
    811 		    fs->fs_bsize, true, &bp);
    812 		if (error)
    813 			goto fail;
    814 		/*
    815 		 * Write synchronously so that indirect blocks
    816 		 * never point at garbage.
    817 		 */
    818 		if ((error = bwrite(bp)) != 0)
    819 			goto fail;
    820 		unwindidx = 0;
    821 		allocib = &ip->i_ffs2_ib[indirs[0].in_off];
    822 		*allocib = ufs_rw64(nb, needswap);
    823 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
    824 	}
    825 
    826 	/*
    827 	 * Fetch through the indirect blocks, allocating as necessary.
    828 	 */
    829 
    830 	for (i = 1;;) {
    831 		error = bread(vp,
    832 		    indirs[i].in_lbn, (int)fs->fs_bsize, 0, &bp);
    833 		if (error) {
    834 			goto fail;
    835 		}
    836 		bap = (int64_t *)bp->b_data;
    837 		nb = ufs_rw64(bap[indirs[i].in_off], needswap);
    838 		if (i == num)
    839 			break;
    840 		i++;
    841 		if (nb != 0) {
    842 			brelse(bp, 0);
    843 			continue;
    844 		}
    845 		if (fscow_run(bp, true) != 0) {
    846 			brelse(bp, 0);
    847 			goto fail;
    848 		}
    849 		mutex_enter(&ump->um_lock);
    850 		/* Try to keep snapshot indirect blocks contiguous. */
    851 		if (i == num && (ip->i_flags & SF_SNAPSHOT) != 0)
    852 			pref = ffs_blkpref_ufs2(ip, lbn, indirs[i-1].in_off,
    853 			    flags | B_METAONLY, &bap[0]);
    854 		if (pref == 0)
    855 			pref = ffs_blkpref_ufs2(ip, lbn, 0, flags | B_METAONLY,
    856 			    NULL);
    857 		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
    858 		    flags | B_METAONLY, cred, &newb);
    859 		if (error) {
    860 			brelse(bp, 0);
    861 			goto fail;
    862 		}
    863 		nb = newb;
    864 		*allocblk++ = nb;
    865 		error = ffs_getblk(vp, indirs[i].in_lbn, FFS_FSBTODB(fs, nb),
    866 		    fs->fs_bsize, true, &nbp);
    867 		if (error) {
    868 			brelse(bp, 0);
    869 			goto fail;
    870 		}
    871 		/*
    872 		 * Write synchronously so that indirect blocks
    873 		 * never point at garbage.
    874 		 */
    875 		if ((error = bwrite(nbp)) != 0) {
    876 			brelse(bp, 0);
    877 			goto fail;
    878 		}
    879 		if (unwindidx < 0)
    880 			unwindidx = i - 1;
    881 		bap[indirs[i - 1].in_off] = ufs_rw64(nb, needswap);
    882 
    883 		/*
    884 		 * If required, write synchronously, otherwise use
    885 		 * delayed write.
    886 		 */
    887 
    888 		if (flags & B_SYNC) {
    889 			bwrite(bp);
    890 		} else {
    891 			bdwrite(bp);
    892 		}
    893 	}
    894 
    895 	if (flags & B_METAONLY) {
    896 		KASSERT(bpp != NULL);
    897 		*bpp = bp;
    898 		return (0);
    899 	}
    900 
    901 	/*
    902 	 * Get the data block, allocating if necessary.
    903 	 */
    904 
    905 	if (nb == 0) {
    906 		if (fscow_run(bp, true) != 0) {
    907 			brelse(bp, 0);
    908 			goto fail;
    909 		}
    910 		mutex_enter(&ump->um_lock);
    911 		pref = ffs_blkpref_ufs2(ip, lbn, indirs[num].in_off, flags,
    912 		    &bap[0]);
    913 		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, flags, cred,
    914 		    &newb);
    915 		if (error) {
    916 			brelse(bp, 0);
    917 			goto fail;
    918 		}
    919 		nb = newb;
    920 		*allocblk++ = nb;
    921 		if (bpp != NULL) {
    922 			error = ffs_getblk(vp, lbn, FFS_FSBTODB(fs, nb),
    923 			    fs->fs_bsize, (flags & B_CLRBUF) != 0, bpp);
    924 			if (error) {
    925 				brelse(bp, 0);
    926 				goto fail;
    927 			}
    928 		}
    929 		bap[indirs[num].in_off] = ufs_rw64(nb, needswap);
    930 		if (allocib == NULL && unwindidx < 0) {
    931 			unwindidx = i - 1;
    932 		}
    933 
    934 		/*
    935 		 * If required, write synchronously, otherwise use
    936 		 * delayed write.
    937 		 */
    938 
    939 		if (flags & B_SYNC) {
    940 			bwrite(bp);
    941 		} else {
    942 			bdwrite(bp);
    943 		}
    944 		return (0);
    945 	}
    946 	brelse(bp, 0);
    947 	if (bpp != NULL) {
    948 		if (flags & B_CLRBUF) {
    949 			error = bread(vp, lbn, (int)fs->fs_bsize,
    950 			    B_MODIFY, &nbp);
    951 			if (error) {
    952 				goto fail;
    953 			}
    954 		} else {
    955 			error = ffs_getblk(vp, lbn, FFS_FSBTODB(fs, nb),
    956 			    fs->fs_bsize, true, &nbp);
    957 			if (error)
    958 				goto fail;
    959 		}
    960 		*bpp = nbp;
    961 	}
    962 	return (0);
    963 
    964 fail:
    965 	/*
    966 	 * If we have failed part way through block allocation, we
    967 	 * have to deallocate any indirect blocks that we have allocated.
    968 	 */
    969 
    970 	if (unwindidx >= 0) {
    971 
    972 		/*
    973 		 * First write out any buffers we've created to resolve their
    974 		 * softdeps.  This must be done in reverse order of creation
    975 		 * so that we resolve the dependencies in one pass.
    976 		 * Write the cylinder group buffers for these buffers too.
    977 		 */
    978 
    979 		for (i = num; i >= unwindidx; i--) {
    980 			if (i == 0) {
    981 				break;
    982 			}
    983 			if (ffs_getblk(vp, indirs[i].in_lbn, FFS_NOBLK,
    984 			    fs->fs_bsize, false, &bp) != 0)
    985 				continue;
    986 			if (bp->b_oflags & BO_DELWRI) {
    987 				nb = FFS_FSBTODB(fs, cgtod(fs, dtog(fs,
    988 				    FFS_DBTOFSB(fs, bp->b_blkno))));
    989 				bwrite(bp);
    990 				if (ffs_getblk(ip->i_devvp, nb, FFS_NOBLK,
    991 				    fs->fs_cgsize, false, &bp) != 0)
    992 					continue;
    993 				if (bp->b_oflags & BO_DELWRI) {
    994 					bwrite(bp);
    995 				} else {
    996 					brelse(bp, BC_INVAL);
    997 				}
    998 			} else {
    999 				brelse(bp, BC_INVAL);
   1000 			}
   1001 		}
   1002 
   1003 		/*
   1004 		 * Now that any dependencies that we created have been
   1005 		 * resolved, we can undo the partial allocation.
   1006 		 */
   1007 
   1008 		if (unwindidx == 0) {
   1009 			*allocib = 0;
   1010 			ip->i_flag |= IN_CHANGE | IN_UPDATE;
   1011 		} else {
   1012 			int r;
   1013 
   1014 			r = bread(vp, indirs[unwindidx].in_lbn,
   1015 			    (int)fs->fs_bsize, 0, &bp);
   1016 			if (r) {
   1017 				panic("Could not unwind indirect block, error %d", r);
   1018 			} else {
   1019 				bap = (int64_t *)bp->b_data;
   1020 				bap[indirs[unwindidx].in_off] = 0;
   1021 				bwrite(bp);
   1022 			}
   1023 		}
   1024 		for (i = unwindidx + 1; i <= num; i++) {
   1025 			if (ffs_getblk(vp, indirs[i].in_lbn, FFS_NOBLK,
   1026 			    fs->fs_bsize, false, &bp) == 0)
   1027 				brelse(bp, BC_INVAL);
   1028 		}
   1029 	}
   1030 	for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) {
   1031 		ffs_blkfree(fs, ip->i_devvp, *blkp, fs->fs_bsize, ip->i_number);
   1032 		deallocated += fs->fs_bsize;
   1033 	}
   1034 	if (deallocated) {
   1035 #if defined(QUOTA) || defined(QUOTA2)
   1036 		/*
   1037 		 * Restore user's disk quota because allocation failed.
   1038 		 */
   1039 		(void)chkdq(ip, -btodb(deallocated), cred, FORCE);
   1040 #endif
   1041 		ip->i_ffs2_blocks -= btodb(deallocated);
   1042 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
   1043 	}
   1044 
   1045 	return (error);
   1046 }
   1047