Home | History | Annotate | Line # | Download | only in ffs
ffs_balloc.c revision 1.63
      1 /*	$NetBSD: ffs_balloc.c,v 1.63 2017/10/28 00:37:13 pgoyette Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 2002 Networks Associates Technology, Inc.
      5  * All rights reserved.
      6  *
      7  * This software was developed for the FreeBSD Project by Marshall
      8  * Kirk McKusick and Network Associates Laboratories, the Security
      9  * Research Division of Network Associates, Inc. under DARPA/SPAWAR
     10  * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS
     11  * research program
     12  *
     13  * Copyright (c) 1982, 1986, 1989, 1993
     14  *	The Regents of the University of California.  All rights reserved.
     15  *
     16  * Redistribution and use in source and binary forms, with or without
     17  * modification, are permitted provided that the following conditions
     18  * are met:
     19  * 1. Redistributions of source code must retain the above copyright
     20  *    notice, this list of conditions and the following disclaimer.
     21  * 2. Redistributions in binary form must reproduce the above copyright
     22  *    notice, this list of conditions and the following disclaimer in the
     23  *    documentation and/or other materials provided with the distribution.
     24  * 3. Neither the name of the University nor the names of its contributors
     25  *    may be used to endorse or promote products derived from this software
     26  *    without specific prior written permission.
     27  *
     28  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     29  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     30  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     31  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     32  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     33  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     34  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     35  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     36  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     37  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     38  * SUCH DAMAGE.
     39  *
     40  *	@(#)ffs_balloc.c	8.8 (Berkeley) 6/16/95
     41  */
     42 
     43 #include <sys/cdefs.h>
     44 __KERNEL_RCSID(0, "$NetBSD: ffs_balloc.c,v 1.63 2017/10/28 00:37:13 pgoyette Exp $");
     45 
     46 #if defined(_KERNEL_OPT)
     47 #include "opt_quota.h"
     48 #endif
     49 
     50 #include <sys/param.h>
     51 #include <sys/systm.h>
     52 #include <sys/buf.h>
     53 #include <sys/file.h>
     54 #include <sys/mount.h>
     55 #include <sys/vnode.h>
     56 #include <sys/kauth.h>
     57 #include <sys/fstrans.h>
     58 
     59 #include <ufs/ufs/quota.h>
     60 #include <ufs/ufs/ufsmount.h>
     61 #include <ufs/ufs/inode.h>
     62 #include <ufs/ufs/ufs_extern.h>
     63 #include <ufs/ufs/ufs_bswap.h>
     64 
     65 #include <ufs/ffs/fs.h>
     66 #include <ufs/ffs/ffs_extern.h>
     67 
     68 #include <uvm/uvm.h>
     69 
     70 static int ffs_balloc_ufs1(struct vnode *, off_t, int, kauth_cred_t, int,
     71     struct buf **);
     72 static int ffs_balloc_ufs2(struct vnode *, off_t, int, kauth_cred_t, int,
     73     struct buf **);
     74 
     75 /*
     76  * Balloc defines the structure of file system storage
     77  * by allocating the physical blocks on a device given
     78  * the inode and the logical block number in a file.
     79  */
     80 
     81 int
     82 ffs_balloc(struct vnode *vp, off_t off, int size, kauth_cred_t cred, int flags,
     83     struct buf **bpp)
     84 {
     85 	int error;
     86 
     87 	if (VTOI(vp)->i_fs->fs_magic == FS_UFS2_MAGIC)
     88 		error = ffs_balloc_ufs2(vp, off, size, cred, flags, bpp);
     89 	else
     90 		error = ffs_balloc_ufs1(vp, off, size, cred, flags, bpp);
     91 
     92 	if (error == 0 && bpp != NULL && (error = fscow_run(*bpp, false)) != 0)
     93 		brelse(*bpp, 0);
     94 
     95 	return error;
     96 }
     97 
     98 static int
     99 ffs_balloc_ufs1(struct vnode *vp, off_t off, int size, kauth_cred_t cred,
    100     int flags, struct buf **bpp)
    101 {
    102 	daddr_t lbn, lastlbn;
    103 	struct buf *bp, *nbp;
    104 	struct inode *ip = VTOI(vp);
    105 	struct fs *fs = ip->i_fs;
    106 	struct ufsmount *ump = ip->i_ump;
    107 	struct indir indirs[UFS_NIADDR + 2];
    108 	daddr_t newb, pref, nb;
    109 	int32_t *bap;	/* XXX ondisk32 */
    110 	int deallocated, osize, nsize, num, i, error;
    111 	int32_t *blkp, *allocblk, allociblk[UFS_NIADDR + 1];
    112 	int32_t *allocib;
    113 	int unwindidx = -1;
    114 	const int needswap = UFS_FSNEEDSWAP(fs);
    115 	UVMHIST_FUNC("ffs_balloc"); UVMHIST_CALLED(ubchist);
    116 
    117 	lbn = ffs_lblkno(fs, off);
    118 	size = ffs_blkoff(fs, off) + size;
    119 	if (size > fs->fs_bsize)
    120 		panic("ffs_balloc: blk too big");
    121 	if (bpp != NULL) {
    122 		*bpp = NULL;
    123 	}
    124 	UVMHIST_LOG(ubchist, "vp %#jx lbn 0x%jx size 0x%jx", (uintptr_t)vp,
    125 	    lbn, size, 0);
    126 
    127 	if (lbn < 0)
    128 		return (EFBIG);
    129 
    130 	/*
    131 	 * If the next write will extend the file into a new block,
    132 	 * and the file is currently composed of a fragment
    133 	 * this fragment has to be extended to be a full block.
    134 	 */
    135 
    136 	lastlbn = ffs_lblkno(fs, ip->i_size);
    137 	if (lastlbn < UFS_NDADDR && lastlbn < lbn) {
    138 		nb = lastlbn;
    139 		osize = ffs_blksize(fs, ip, nb);
    140 		if (osize < fs->fs_bsize && osize > 0) {
    141 			mutex_enter(&ump->um_lock);
    142 			error = ffs_realloccg(ip, nb,
    143 				    ffs_blkpref_ufs1(ip, lastlbn, nb, flags,
    144 					&ip->i_ffs1_db[0]),
    145 				    osize, (int)fs->fs_bsize, cred, bpp, &newb);
    146 			if (error)
    147 				return (error);
    148 			ip->i_size = ffs_lblktosize(fs, nb + 1);
    149 			ip->i_ffs1_size = ip->i_size;
    150 			uvm_vnp_setsize(vp, ip->i_ffs1_size);
    151 			ip->i_ffs1_db[nb] = ufs_rw32((u_int32_t)newb, needswap);
    152 			ip->i_flag |= IN_CHANGE | IN_UPDATE;
    153 			if (bpp && *bpp) {
    154 				if (flags & B_SYNC)
    155 					bwrite(*bpp);
    156 				else
    157 					bawrite(*bpp);
    158 			}
    159 		}
    160 	}
    161 
    162 	/*
    163 	 * The first UFS_NDADDR blocks are direct blocks
    164 	 */
    165 
    166 	if (lbn < UFS_NDADDR) {
    167 		nb = ufs_rw32(ip->i_ffs1_db[lbn], needswap);
    168 		if (nb != 0 && ip->i_size >= ffs_lblktosize(fs, lbn + 1)) {
    169 
    170 			/*
    171 			 * The block is an already-allocated direct block
    172 			 * and the file already extends past this block,
    173 			 * thus this must be a whole block.
    174 			 * Just read the block (if requested).
    175 			 */
    176 
    177 			if (bpp != NULL) {
    178 				error = bread(vp, lbn, fs->fs_bsize,
    179 					      B_MODIFY, bpp);
    180 				if (error) {
    181 					return (error);
    182 				}
    183 			}
    184 			return (0);
    185 		}
    186 		if (nb != 0) {
    187 
    188 			/*
    189 			 * Consider need to reallocate a fragment.
    190 			 */
    191 
    192 			osize = ffs_fragroundup(fs, ffs_blkoff(fs, ip->i_size));
    193 			nsize = ffs_fragroundup(fs, size);
    194 			if (nsize <= osize) {
    195 
    196 				/*
    197 				 * The existing block is already
    198 				 * at least as big as we want.
    199 				 * Just read the block (if requested).
    200 				 */
    201 
    202 				if (bpp != NULL) {
    203 					error = bread(vp, lbn, osize,
    204 						      B_MODIFY, bpp);
    205 					if (error) {
    206 						return (error);
    207 					}
    208 				}
    209 				return 0;
    210 			} else {
    211 
    212 				/*
    213 				 * The existing block is smaller than we want,
    214 				 * grow it.
    215 				 */
    216 				mutex_enter(&ump->um_lock);
    217 				error = ffs_realloccg(ip, lbn,
    218 				    ffs_blkpref_ufs1(ip, lbn, (int)lbn, flags,
    219 					&ip->i_ffs1_db[0]),
    220 				    osize, nsize, cred, bpp, &newb);
    221 				if (error)
    222 					return (error);
    223 			}
    224 		} else {
    225 
    226 			/*
    227 			 * the block was not previously allocated,
    228 			 * allocate a new block or fragment.
    229 			 */
    230 
    231 			if (ip->i_size < ffs_lblktosize(fs, lbn + 1))
    232 				nsize = ffs_fragroundup(fs, size);
    233 			else
    234 				nsize = fs->fs_bsize;
    235 			mutex_enter(&ump->um_lock);
    236 			error = ffs_alloc(ip, lbn,
    237 			    ffs_blkpref_ufs1(ip, lbn, (int)lbn, flags,
    238 				&ip->i_ffs1_db[0]),
    239 			    nsize, flags, cred, &newb);
    240 			if (error)
    241 				return (error);
    242 			if (bpp != NULL) {
    243 				error = ffs_getblk(vp, lbn, FFS_FSBTODB(fs, newb),
    244 				    nsize, (flags & B_CLRBUF) != 0, bpp);
    245 				if (error)
    246 					return error;
    247 			}
    248 		}
    249 		ip->i_ffs1_db[lbn] = ufs_rw32((u_int32_t)newb, needswap);
    250 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
    251 		return (0);
    252 	}
    253 
    254 	/*
    255 	 * Determine the number of levels of indirection.
    256 	 */
    257 
    258 	pref = 0;
    259 	if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
    260 		return (error);
    261 
    262 	/*
    263 	 * Fetch the first indirect block allocating if necessary.
    264 	 */
    265 
    266 	--num;
    267 	nb = ufs_rw32(ip->i_ffs1_ib[indirs[0].in_off], needswap);
    268 	allocib = NULL;
    269 	allocblk = allociblk;
    270 	if (nb == 0) {
    271 		mutex_enter(&ump->um_lock);
    272 		pref = ffs_blkpref_ufs1(ip, lbn, 0, flags | B_METAONLY, NULL);
    273 		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
    274 		    flags | B_METAONLY, cred, &newb);
    275 		if (error)
    276 			goto fail;
    277 		nb = newb;
    278 		*allocblk++ = nb;
    279 		error = ffs_getblk(vp, indirs[1].in_lbn, FFS_FSBTODB(fs, nb),
    280 		    fs->fs_bsize, true, &bp);
    281 		if (error)
    282 			goto fail;
    283 		/*
    284 		 * Write synchronously so that indirect blocks
    285 		 * never point at garbage.
    286 		 */
    287 		if ((error = bwrite(bp)) != 0)
    288 			goto fail;
    289 		unwindidx = 0;
    290 		allocib = &ip->i_ffs1_ib[indirs[0].in_off];
    291 		*allocib = ufs_rw32(nb, needswap);
    292 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
    293 	}
    294 
    295 	/*
    296 	 * Fetch through the indirect blocks, allocating as necessary.
    297 	 */
    298 
    299 	for (i = 1;;) {
    300 		error = bread(vp,
    301 		    indirs[i].in_lbn, (int)fs->fs_bsize, 0, &bp);
    302 		if (error) {
    303 			goto fail;
    304 		}
    305 		bap = (int32_t *)bp->b_data;	/* XXX ondisk32 */
    306 		nb = ufs_rw32(bap[indirs[i].in_off], needswap);
    307 		if (i == num)
    308 			break;
    309 		i++;
    310 		if (nb != 0) {
    311 			brelse(bp, 0);
    312 			continue;
    313 		}
    314 		if (fscow_run(bp, true) != 0) {
    315 			brelse(bp, 0);
    316 			goto fail;
    317 		}
    318 		mutex_enter(&ump->um_lock);
    319 		/* Try to keep snapshot indirect blocks contiguous. */
    320 		if (i == num && (ip->i_flags & SF_SNAPSHOT) != 0)
    321 			pref = ffs_blkpref_ufs1(ip, lbn, indirs[i-1].in_off,
    322 			    flags | B_METAONLY, &bap[0]);
    323 		if (pref == 0)
    324 			pref = ffs_blkpref_ufs1(ip, lbn, 0, flags | B_METAONLY,
    325 			    NULL);
    326 		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
    327 		    flags | B_METAONLY, cred, &newb);
    328 		if (error) {
    329 			brelse(bp, 0);
    330 			goto fail;
    331 		}
    332 		nb = newb;
    333 		*allocblk++ = nb;
    334 		error = ffs_getblk(vp, indirs[i].in_lbn, FFS_FSBTODB(fs, nb),
    335 		    fs->fs_bsize, true, &nbp);
    336 		if (error) {
    337 			brelse(bp, 0);
    338 			goto fail;
    339 		}
    340 		/*
    341 		 * Write synchronously so that indirect blocks
    342 		 * never point at garbage.
    343 		 */
    344 		if ((error = bwrite(nbp)) != 0) {
    345 			brelse(bp, 0);
    346 			goto fail;
    347 		}
    348 		if (unwindidx < 0)
    349 			unwindidx = i - 1;
    350 		bap[indirs[i - 1].in_off] = ufs_rw32(nb, needswap);
    351 
    352 		/*
    353 		 * If required, write synchronously, otherwise use
    354 		 * delayed write.
    355 		 */
    356 
    357 		if (flags & B_SYNC) {
    358 			bwrite(bp);
    359 		} else {
    360 			bdwrite(bp);
    361 		}
    362 	}
    363 
    364 	if (flags & B_METAONLY) {
    365 		KASSERT(bpp != NULL);
    366 		*bpp = bp;
    367 		return (0);
    368 	}
    369 
    370 	/*
    371 	 * Get the data block, allocating if necessary.
    372 	 */
    373 
    374 	if (nb == 0) {
    375 		if (fscow_run(bp, true) != 0) {
    376 			brelse(bp, 0);
    377 			goto fail;
    378 		}
    379 		mutex_enter(&ump->um_lock);
    380 		pref = ffs_blkpref_ufs1(ip, lbn, indirs[num].in_off, flags,
    381 		    &bap[0]);
    382 		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, flags, cred,
    383 		    &newb);
    384 		if (error) {
    385 			brelse(bp, 0);
    386 			goto fail;
    387 		}
    388 		nb = newb;
    389 		*allocblk++ = nb;
    390 		if (bpp != NULL) {
    391 			error = ffs_getblk(vp, lbn, FFS_FSBTODB(fs, nb),
    392 			    fs->fs_bsize, (flags & B_CLRBUF) != 0, bpp);
    393 			if (error) {
    394 				brelse(bp, 0);
    395 				goto fail;
    396 			}
    397 		}
    398 		bap[indirs[num].in_off] = ufs_rw32(nb, needswap);
    399 		if (allocib == NULL && unwindidx < 0) {
    400 			unwindidx = i - 1;
    401 		}
    402 
    403 		/*
    404 		 * If required, write synchronously, otherwise use
    405 		 * delayed write.
    406 		 */
    407 
    408 		if (flags & B_SYNC) {
    409 			bwrite(bp);
    410 		} else {
    411 			bdwrite(bp);
    412 		}
    413 		return (0);
    414 	}
    415 	brelse(bp, 0);
    416 	if (bpp != NULL) {
    417 		if (flags & B_CLRBUF) {
    418 			error = bread(vp, lbn, (int)fs->fs_bsize,
    419 			    B_MODIFY, &nbp);
    420 			if (error) {
    421 				goto fail;
    422 			}
    423 		} else {
    424 			error = ffs_getblk(vp, lbn, FFS_FSBTODB(fs, nb),
    425 			    fs->fs_bsize, true, &nbp);
    426 			if (error)
    427 				goto fail;
    428 		}
    429 		*bpp = nbp;
    430 	}
    431 	return (0);
    432 
    433 fail:
    434 	/*
    435 	 * If we have failed part way through block allocation, we
    436 	 * have to deallocate any indirect blocks that we have allocated.
    437 	 */
    438 
    439 	if (unwindidx >= 0) {
    440 
    441 		/*
    442 		 * First write out any buffers we've created to resolve their
    443 		 * softdeps.  This must be done in reverse order of creation
    444 		 * so that we resolve the dependencies in one pass.
    445 		 * Write the cylinder group buffers for these buffers too.
    446 		 */
    447 
    448 		for (i = num; i >= unwindidx; i--) {
    449 			if (i == 0) {
    450 				break;
    451 			}
    452 			if (ffs_getblk(vp, indirs[i].in_lbn, FFS_NOBLK,
    453 			    fs->fs_bsize, false, &bp) != 0)
    454 				continue;
    455 			if (bp->b_oflags & BO_DELWRI) {
    456 				nb = FFS_FSBTODB(fs, cgtod(fs, dtog(fs,
    457 				    FFS_DBTOFSB(fs, bp->b_blkno))));
    458 				bwrite(bp);
    459 				if (ffs_getblk(ip->i_devvp, nb, FFS_NOBLK,
    460 				    fs->fs_cgsize, false, &bp) != 0)
    461 					continue;
    462 				if (bp->b_oflags & BO_DELWRI) {
    463 					bwrite(bp);
    464 				} else {
    465 					brelse(bp, BC_INVAL);
    466 				}
    467 			} else {
    468 				brelse(bp, BC_INVAL);
    469 			}
    470 		}
    471 
    472 		/*
    473 		 * Undo the partial allocation.
    474 		 */
    475 		if (unwindidx == 0) {
    476 			*allocib = 0;
    477 			ip->i_flag |= IN_CHANGE | IN_UPDATE;
    478 		} else {
    479 			int r;
    480 
    481 			r = bread(vp, indirs[unwindidx].in_lbn,
    482 			    (int)fs->fs_bsize, 0, &bp);
    483 			if (r) {
    484 				panic("Could not unwind indirect block, error %d", r);
    485 			} else {
    486 				bap = (int32_t *)bp->b_data; /* XXX ondisk32 */
    487 				bap[indirs[unwindidx].in_off] = 0;
    488 				bwrite(bp);
    489 			}
    490 		}
    491 		for (i = unwindidx + 1; i <= num; i++) {
    492 			if (ffs_getblk(vp, indirs[i].in_lbn, FFS_NOBLK,
    493 			    fs->fs_bsize, false, &bp) == 0)
    494 				brelse(bp, BC_INVAL);
    495 		}
    496 	}
    497 	for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) {
    498 		ffs_blkfree(fs, ip->i_devvp, *blkp, fs->fs_bsize, ip->i_number);
    499 		deallocated += fs->fs_bsize;
    500 	}
    501 	if (deallocated) {
    502 #if defined(QUOTA) || defined(QUOTA2)
    503 		/*
    504 		 * Restore user's disk quota because allocation failed.
    505 		 */
    506 		(void)chkdq(ip, -btodb(deallocated), cred, FORCE);
    507 #endif
    508 		ip->i_ffs1_blocks -= btodb(deallocated);
    509 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
    510 	}
    511 	return (error);
    512 }
    513 
    514 static int
    515 ffs_balloc_ufs2(struct vnode *vp, off_t off, int size, kauth_cred_t cred,
    516     int flags, struct buf **bpp)
    517 {
    518 	daddr_t lbn, lastlbn;
    519 	struct buf *bp, *nbp;
    520 	struct inode *ip = VTOI(vp);
    521 	struct fs *fs = ip->i_fs;
    522 	struct ufsmount *ump = ip->i_ump;
    523 	struct indir indirs[UFS_NIADDR + 2];
    524 	daddr_t newb, pref, nb;
    525 	int64_t *bap;
    526 	int deallocated, osize, nsize, num, i, error;
    527 	daddr_t *blkp, *allocblk, allociblk[UFS_NIADDR + 1];
    528 	int64_t *allocib;
    529 	int unwindidx = -1;
    530 	const int needswap = UFS_FSNEEDSWAP(fs);
    531 	UVMHIST_FUNC("ffs_balloc"); UVMHIST_CALLED(ubchist);
    532 
    533 	lbn = ffs_lblkno(fs, off);
    534 	size = ffs_blkoff(fs, off) + size;
    535 	if (size > fs->fs_bsize)
    536 		panic("ffs_balloc: blk too big");
    537 	if (bpp != NULL) {
    538 		*bpp = NULL;
    539 	}
    540 	UVMHIST_LOG(ubchist, "vp %#jx lbn 0x%jx size 0x%jx", (uintptr_t)vp,
    541 	    lbn, size, 0);
    542 
    543 	if (lbn < 0)
    544 		return (EFBIG);
    545 
    546 #ifdef notyet
    547 	/*
    548 	 * Check for allocating external data.
    549 	 */
    550 	if (flags & IO_EXT) {
    551 		if (lbn >= UFS_NXADDR)
    552 			return (EFBIG);
    553 		/*
    554 		 * If the next write will extend the data into a new block,
    555 		 * and the data is currently composed of a fragment
    556 		 * this fragment has to be extended to be a full block.
    557 		 */
    558 		lastlbn = ffs_lblkno(fs, dp->di_extsize);
    559 		if (lastlbn < lbn) {
    560 			nb = lastlbn;
    561 			osize = ffs_sblksize(fs, dp->di_extsize, nb);
    562 			if (osize < fs->fs_bsize && osize > 0) {
    563 				mutex_enter(&ump->um_lock);
    564 				error = ffs_realloccg(ip, -1 - nb,
    565 				    dp->di_extb[nb],
    566 				    ffs_blkpref_ufs2(ip, lastlbn, (int)nb,
    567 					flags, &dp->di_extb[0]),
    568 				    osize,
    569 				    (int)fs->fs_bsize, cred, &bp);
    570 				if (error)
    571 					return (error);
    572 				dp->di_extsize = smalllblktosize(fs, nb + 1);
    573 				dp->di_extb[nb] = FFS_DBTOFSB(fs, bp->b_blkno);
    574 				bp->b_xflags |= BX_ALTDATA;
    575 				ip->i_flag |= IN_CHANGE | IN_UPDATE;
    576 				if (flags & IO_SYNC)
    577 					bwrite(bp);
    578 				else
    579 					bawrite(bp);
    580 			}
    581 		}
    582 		/*
    583 		 * All blocks are direct blocks
    584 		 */
    585 		if (flags & BA_METAONLY)
    586 			panic("ffs_balloc_ufs2: BA_METAONLY for ext block");
    587 		nb = dp->di_extb[lbn];
    588 		if (nb != 0 && dp->di_extsize >= smalllblktosize(fs, lbn + 1)) {
    589 			error = bread(vp, -1 - lbn, fs->fs_bsize,
    590 			    0, &bp);
    591 			if (error) {
    592 				return (error);
    593 			}
    594 			mutex_enter(&bp->b_interlock);
    595 			bp->b_blkno = FFS_FSBTODB(fs, nb);
    596 			bp->b_xflags |= BX_ALTDATA;
    597 			mutex_exit(&bp->b_interlock);
    598 			*bpp = bp;
    599 			return (0);
    600 		}
    601 		if (nb != 0) {
    602 			/*
    603 			 * Consider need to reallocate a fragment.
    604 			 */
    605 			osize = ffs_fragroundup(fs, ffs_blkoff(fs, dp->di_extsize));
    606 			nsize = ffs_fragroundup(fs, size);
    607 			if (nsize <= osize) {
    608 				error = bread(vp, -1 - lbn, osize,
    609 				    0, &bp);
    610 				if (error) {
    611 					return (error);
    612 				}
    613 				mutex_enter(&bp->b_interlock);
    614 				bp->b_blkno = FFS_FSBTODB(fs, nb);
    615 				bp->b_xflags |= BX_ALTDATA;
    616 				mutex_exit(&bp->b_interlock);
    617 			} else {
    618 				mutex_enter(&ump->um_lock);
    619 				error = ffs_realloccg(ip, -1 - lbn,
    620 				    dp->di_extb[lbn],
    621 				    ffs_blkpref_ufs2(ip, lbn, (int)lbn, flags,
    622 				        &dp->di_extb[0]),
    623 				    osize, nsize, cred, &bp);
    624 				if (error)
    625 					return (error);
    626 				bp->b_xflags |= BX_ALTDATA;
    627 			}
    628 		} else {
    629 			if (dp->di_extsize < smalllblktosize(fs, lbn + 1))
    630 				nsize = ffs_fragroundup(fs, size);
    631 			else
    632 				nsize = fs->fs_bsize;
    633 			mutex_enter(&ump->um_lock);
    634 			error = ffs_alloc(ip, lbn,
    635 			   ffs_blkpref_ufs2(ip, lbn, (int)lbn, flags,
    636 			       &dp->di_extb[0]),
    637 			   nsize, flags, cred, &newb);
    638 			if (error)
    639 				return (error);
    640 			error = ffs_getblk(vp, -1 - lbn, FFS_FSBTODB(fs, newb),
    641 			    nsize, (flags & B_CLRBUF) != 0, &bp);
    642 			if (error)
    643 				return error;
    644 			bp->b_xflags |= BX_ALTDATA;
    645 		}
    646 		dp->di_extb[lbn] = FFS_DBTOFSB(fs, bp->b_blkno);
    647 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
    648 		*bpp = bp;
    649 		return (0);
    650 	}
    651 #endif
    652 	/*
    653 	 * If the next write will extend the file into a new block,
    654 	 * and the file is currently composed of a fragment
    655 	 * this fragment has to be extended to be a full block.
    656 	 */
    657 
    658 	lastlbn = ffs_lblkno(fs, ip->i_size);
    659 	if (lastlbn < UFS_NDADDR && lastlbn < lbn) {
    660 		nb = lastlbn;
    661 		osize = ffs_blksize(fs, ip, nb);
    662 		if (osize < fs->fs_bsize && osize > 0) {
    663 			mutex_enter(&ump->um_lock);
    664 			error = ffs_realloccg(ip, nb,
    665 				    ffs_blkpref_ufs2(ip, lastlbn, nb, flags,
    666 					&ip->i_ffs2_db[0]),
    667 				    osize, (int)fs->fs_bsize, cred, bpp, &newb);
    668 			if (error)
    669 				return (error);
    670 			ip->i_size = ffs_lblktosize(fs, nb + 1);
    671 			ip->i_ffs2_size = ip->i_size;
    672 			uvm_vnp_setsize(vp, ip->i_size);
    673 			ip->i_ffs2_db[nb] = ufs_rw64(newb, needswap);
    674 			ip->i_flag |= IN_CHANGE | IN_UPDATE;
    675 			if (bpp) {
    676 				if (flags & B_SYNC)
    677 					bwrite(*bpp);
    678 				else
    679 					bawrite(*bpp);
    680 			}
    681 		}
    682 	}
    683 
    684 	/*
    685 	 * The first UFS_NDADDR blocks are direct blocks
    686 	 */
    687 
    688 	if (lbn < UFS_NDADDR) {
    689 		nb = ufs_rw64(ip->i_ffs2_db[lbn], needswap);
    690 		if (nb != 0 && ip->i_size >= ffs_lblktosize(fs, lbn + 1)) {
    691 
    692 			/*
    693 			 * The block is an already-allocated direct block
    694 			 * and the file already extends past this block,
    695 			 * thus this must be a whole block.
    696 			 * Just read the block (if requested).
    697 			 */
    698 
    699 			if (bpp != NULL) {
    700 				error = bread(vp, lbn, fs->fs_bsize,
    701 					      B_MODIFY, bpp);
    702 				if (error) {
    703 					return (error);
    704 				}
    705 			}
    706 			return (0);
    707 		}
    708 		if (nb != 0) {
    709 
    710 			/*
    711 			 * Consider need to reallocate a fragment.
    712 			 */
    713 
    714 			osize = ffs_fragroundup(fs, ffs_blkoff(fs, ip->i_size));
    715 			nsize = ffs_fragroundup(fs, size);
    716 			if (nsize <= osize) {
    717 
    718 				/*
    719 				 * The existing block is already
    720 				 * at least as big as we want.
    721 				 * Just read the block (if requested).
    722 				 */
    723 
    724 				if (bpp != NULL) {
    725 					error = bread(vp, lbn, osize,
    726 						      B_MODIFY, bpp);
    727 					if (error) {
    728 						return (error);
    729 					}
    730 				}
    731 				return 0;
    732 			} else {
    733 
    734 				/*
    735 				 * The existing block is smaller than we want,
    736 				 * grow it.
    737 				 */
    738 				mutex_enter(&ump->um_lock);
    739 				error = ffs_realloccg(ip, lbn,
    740 				    ffs_blkpref_ufs2(ip, lbn, (int)lbn, flags,
    741 					&ip->i_ffs2_db[0]),
    742 				    osize, nsize, cred, bpp, &newb);
    743 				if (error)
    744 					return (error);
    745 			}
    746 		} else {
    747 
    748 			/*
    749 			 * the block was not previously allocated,
    750 			 * allocate a new block or fragment.
    751 			 */
    752 
    753 			if (ip->i_size < ffs_lblktosize(fs, lbn + 1))
    754 				nsize = ffs_fragroundup(fs, size);
    755 			else
    756 				nsize = fs->fs_bsize;
    757 			mutex_enter(&ump->um_lock);
    758 			error = ffs_alloc(ip, lbn,
    759 			    ffs_blkpref_ufs2(ip, lbn, (int)lbn, flags,
    760 				&ip->i_ffs2_db[0]),
    761 			    nsize, flags, cred, &newb);
    762 			if (error)
    763 				return (error);
    764 			if (bpp != NULL) {
    765 				error = ffs_getblk(vp, lbn, FFS_FSBTODB(fs, newb),
    766 				    nsize, (flags & B_CLRBUF) != 0, bpp);
    767 				if (error)
    768 					return error;
    769 			}
    770 		}
    771 		ip->i_ffs2_db[lbn] = ufs_rw64(newb, needswap);
    772 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
    773 		return (0);
    774 	}
    775 
    776 	/*
    777 	 * Determine the number of levels of indirection.
    778 	 */
    779 
    780 	pref = 0;
    781 	if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
    782 		return (error);
    783 
    784 	/*
    785 	 * Fetch the first indirect block allocating if necessary.
    786 	 */
    787 
    788 	--num;
    789 	nb = ufs_rw64(ip->i_ffs2_ib[indirs[0].in_off], needswap);
    790 	allocib = NULL;
    791 	allocblk = allociblk;
    792 	if (nb == 0) {
    793 		mutex_enter(&ump->um_lock);
    794 		pref = ffs_blkpref_ufs2(ip, lbn, 0, flags | B_METAONLY, NULL);
    795 		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
    796 		    flags | B_METAONLY, cred, &newb);
    797 		if (error)
    798 			goto fail;
    799 		nb = newb;
    800 		*allocblk++ = nb;
    801 		error = ffs_getblk(vp, indirs[1].in_lbn, FFS_FSBTODB(fs, nb),
    802 		    fs->fs_bsize, true, &bp);
    803 		if (error)
    804 			goto fail;
    805 		/*
    806 		 * Write synchronously so that indirect blocks
    807 		 * never point at garbage.
    808 		 */
    809 		if ((error = bwrite(bp)) != 0)
    810 			goto fail;
    811 		unwindidx = 0;
    812 		allocib = &ip->i_ffs2_ib[indirs[0].in_off];
    813 		*allocib = ufs_rw64(nb, needswap);
    814 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
    815 	}
    816 
    817 	/*
    818 	 * Fetch through the indirect blocks, allocating as necessary.
    819 	 */
    820 
    821 	for (i = 1;;) {
    822 		error = bread(vp,
    823 		    indirs[i].in_lbn, (int)fs->fs_bsize, 0, &bp);
    824 		if (error) {
    825 			goto fail;
    826 		}
    827 		bap = (int64_t *)bp->b_data;
    828 		nb = ufs_rw64(bap[indirs[i].in_off], needswap);
    829 		if (i == num)
    830 			break;
    831 		i++;
    832 		if (nb != 0) {
    833 			brelse(bp, 0);
    834 			continue;
    835 		}
    836 		if (fscow_run(bp, true) != 0) {
    837 			brelse(bp, 0);
    838 			goto fail;
    839 		}
    840 		mutex_enter(&ump->um_lock);
    841 		/* Try to keep snapshot indirect blocks contiguous. */
    842 		if (i == num && (ip->i_flags & SF_SNAPSHOT) != 0)
    843 			pref = ffs_blkpref_ufs2(ip, lbn, indirs[i-1].in_off,
    844 			    flags | B_METAONLY, &bap[0]);
    845 		if (pref == 0)
    846 			pref = ffs_blkpref_ufs2(ip, lbn, 0, flags | B_METAONLY,
    847 			    NULL);
    848 		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
    849 		    flags | B_METAONLY, cred, &newb);
    850 		if (error) {
    851 			brelse(bp, 0);
    852 			goto fail;
    853 		}
    854 		nb = newb;
    855 		*allocblk++ = nb;
    856 		error = ffs_getblk(vp, indirs[i].in_lbn, FFS_FSBTODB(fs, nb),
    857 		    fs->fs_bsize, true, &nbp);
    858 		if (error) {
    859 			brelse(bp, 0);
    860 			goto fail;
    861 		}
    862 		/*
    863 		 * Write synchronously so that indirect blocks
    864 		 * never point at garbage.
    865 		 */
    866 		if ((error = bwrite(nbp)) != 0) {
    867 			brelse(bp, 0);
    868 			goto fail;
    869 		}
    870 		if (unwindidx < 0)
    871 			unwindidx = i - 1;
    872 		bap[indirs[i - 1].in_off] = ufs_rw64(nb, needswap);
    873 
    874 		/*
    875 		 * If required, write synchronously, otherwise use
    876 		 * delayed write.
    877 		 */
    878 
    879 		if (flags & B_SYNC) {
    880 			bwrite(bp);
    881 		} else {
    882 			bdwrite(bp);
    883 		}
    884 	}
    885 
    886 	if (flags & B_METAONLY) {
    887 		KASSERT(bpp != NULL);
    888 		*bpp = bp;
    889 		return (0);
    890 	}
    891 
    892 	/*
    893 	 * Get the data block, allocating if necessary.
    894 	 */
    895 
    896 	if (nb == 0) {
    897 		if (fscow_run(bp, true) != 0) {
    898 			brelse(bp, 0);
    899 			goto fail;
    900 		}
    901 		mutex_enter(&ump->um_lock);
    902 		pref = ffs_blkpref_ufs2(ip, lbn, indirs[num].in_off, flags,
    903 		    &bap[0]);
    904 		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, flags, cred,
    905 		    &newb);
    906 		if (error) {
    907 			brelse(bp, 0);
    908 			goto fail;
    909 		}
    910 		nb = newb;
    911 		*allocblk++ = nb;
    912 		if (bpp != NULL) {
    913 			error = ffs_getblk(vp, lbn, FFS_FSBTODB(fs, nb),
    914 			    fs->fs_bsize, (flags & B_CLRBUF) != 0, bpp);
    915 			if (error) {
    916 				brelse(bp, 0);
    917 				goto fail;
    918 			}
    919 		}
    920 		bap[indirs[num].in_off] = ufs_rw64(nb, needswap);
    921 		if (allocib == NULL && unwindidx < 0) {
    922 			unwindidx = i - 1;
    923 		}
    924 
    925 		/*
    926 		 * If required, write synchronously, otherwise use
    927 		 * delayed write.
    928 		 */
    929 
    930 		if (flags & B_SYNC) {
    931 			bwrite(bp);
    932 		} else {
    933 			bdwrite(bp);
    934 		}
    935 		return (0);
    936 	}
    937 	brelse(bp, 0);
    938 	if (bpp != NULL) {
    939 		if (flags & B_CLRBUF) {
    940 			error = bread(vp, lbn, (int)fs->fs_bsize,
    941 			    B_MODIFY, &nbp);
    942 			if (error) {
    943 				goto fail;
    944 			}
    945 		} else {
    946 			error = ffs_getblk(vp, lbn, FFS_FSBTODB(fs, nb),
    947 			    fs->fs_bsize, true, &nbp);
    948 			if (error)
    949 				goto fail;
    950 		}
    951 		*bpp = nbp;
    952 	}
    953 	return (0);
    954 
    955 fail:
    956 	/*
    957 	 * If we have failed part way through block allocation, we
    958 	 * have to deallocate any indirect blocks that we have allocated.
    959 	 */
    960 
    961 	if (unwindidx >= 0) {
    962 
    963 		/*
    964 		 * First write out any buffers we've created to resolve their
    965 		 * softdeps.  This must be done in reverse order of creation
    966 		 * so that we resolve the dependencies in one pass.
    967 		 * Write the cylinder group buffers for these buffers too.
    968 		 */
    969 
    970 		for (i = num; i >= unwindidx; i--) {
    971 			if (i == 0) {
    972 				break;
    973 			}
    974 			if (ffs_getblk(vp, indirs[i].in_lbn, FFS_NOBLK,
    975 			    fs->fs_bsize, false, &bp) != 0)
    976 				continue;
    977 			if (bp->b_oflags & BO_DELWRI) {
    978 				nb = FFS_FSBTODB(fs, cgtod(fs, dtog(fs,
    979 				    FFS_DBTOFSB(fs, bp->b_blkno))));
    980 				bwrite(bp);
    981 				if (ffs_getblk(ip->i_devvp, nb, FFS_NOBLK,
    982 				    fs->fs_cgsize, false, &bp) != 0)
    983 					continue;
    984 				if (bp->b_oflags & BO_DELWRI) {
    985 					bwrite(bp);
    986 				} else {
    987 					brelse(bp, BC_INVAL);
    988 				}
    989 			} else {
    990 				brelse(bp, BC_INVAL);
    991 			}
    992 		}
    993 
    994 		/*
    995 		 * Now that any dependencies that we created have been
    996 		 * resolved, we can undo the partial allocation.
    997 		 */
    998 
    999 		if (unwindidx == 0) {
   1000 			*allocib = 0;
   1001 			ip->i_flag |= IN_CHANGE | IN_UPDATE;
   1002 		} else {
   1003 			int r;
   1004 
   1005 			r = bread(vp, indirs[unwindidx].in_lbn,
   1006 			    (int)fs->fs_bsize, 0, &bp);
   1007 			if (r) {
   1008 				panic("Could not unwind indirect block, error %d", r);
   1009 			} else {
   1010 				bap = (int64_t *)bp->b_data;
   1011 				bap[indirs[unwindidx].in_off] = 0;
   1012 				bwrite(bp);
   1013 			}
   1014 		}
   1015 		for (i = unwindidx + 1; i <= num; i++) {
   1016 			if (ffs_getblk(vp, indirs[i].in_lbn, FFS_NOBLK,
   1017 			    fs->fs_bsize, false, &bp) == 0)
   1018 				brelse(bp, BC_INVAL);
   1019 		}
   1020 	}
   1021 	for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) {
   1022 		ffs_blkfree(fs, ip->i_devvp, *blkp, fs->fs_bsize, ip->i_number);
   1023 		deallocated += fs->fs_bsize;
   1024 	}
   1025 	if (deallocated) {
   1026 #if defined(QUOTA) || defined(QUOTA2)
   1027 		/*
   1028 		 * Restore user's disk quota because allocation failed.
   1029 		 */
   1030 		(void)chkdq(ip, -btodb(deallocated), cred, FORCE);
   1031 #endif
   1032 		ip->i_ffs2_blocks -= btodb(deallocated);
   1033 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
   1034 	}
   1035 
   1036 	return (error);
   1037 }
   1038