Home | History | Annotate | Line # | Download | only in ffs
ffs_balloc.c revision 1.52.6.1
      1 /*	$NetBSD: ffs_balloc.c,v 1.52.6.1 2011/06/06 09:10:15 jruoho Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 2002 Networks Associates Technology, Inc.
      5  * All rights reserved.
      6  *
      7  * This software was developed for the FreeBSD Project by Marshall
      8  * Kirk McKusick and Network Associates Laboratories, the Security
      9  * Research Division of Network Associates, Inc. under DARPA/SPAWAR
     10  * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS
     11  * research program
     12  *
     13  * Copyright (c) 1982, 1986, 1989, 1993
     14  *	The Regents of the University of California.  All rights reserved.
     15  *
     16  * Redistribution and use in source and binary forms, with or without
     17  * modification, are permitted provided that the following conditions
     18  * are met:
     19  * 1. Redistributions of source code must retain the above copyright
     20  *    notice, this list of conditions and the following disclaimer.
     21  * 2. Redistributions in binary form must reproduce the above copyright
     22  *    notice, this list of conditions and the following disclaimer in the
     23  *    documentation and/or other materials provided with the distribution.
     24  * 3. Neither the name of the University nor the names of its contributors
     25  *    may be used to endorse or promote products derived from this software
     26  *    without specific prior written permission.
     27  *
     28  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     29  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     30  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     31  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     32  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     33  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     34  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     35  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     36  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     37  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     38  * SUCH DAMAGE.
     39  *
     40  *	@(#)ffs_balloc.c	8.8 (Berkeley) 6/16/95
     41  */
     42 
     43 #include <sys/cdefs.h>
     44 __KERNEL_RCSID(0, "$NetBSD: ffs_balloc.c,v 1.52.6.1 2011/06/06 09:10:15 jruoho Exp $");
     45 
     46 #if defined(_KERNEL_OPT)
     47 #include "opt_quota.h"
     48 #endif
     49 
     50 #include <sys/param.h>
     51 #include <sys/systm.h>
     52 #include <sys/buf.h>
     53 #include <sys/file.h>
     54 #include <sys/mount.h>
     55 #include <sys/vnode.h>
     56 #include <sys/kauth.h>
     57 #include <sys/fstrans.h>
     58 
     59 #include <ufs/ufs/quota.h>
     60 #include <ufs/ufs/ufsmount.h>
     61 #include <ufs/ufs/inode.h>
     62 #include <ufs/ufs/ufs_extern.h>
     63 #include <ufs/ufs/ufs_bswap.h>
     64 
     65 #include <ufs/ffs/fs.h>
     66 #include <ufs/ffs/ffs_extern.h>
     67 
     68 #include <uvm/uvm.h>
     69 
     70 static int ffs_balloc_ufs1(struct vnode *, off_t, int, kauth_cred_t, int,
     71     struct buf **);
     72 static int ffs_balloc_ufs2(struct vnode *, off_t, int, kauth_cred_t, int,
     73     struct buf **);
     74 
     75 /*
     76  * Balloc defines the structure of file system storage
     77  * by allocating the physical blocks on a device given
     78  * the inode and the logical block number in a file.
     79  */
     80 
     81 int
     82 ffs_balloc(struct vnode *vp, off_t off, int size, kauth_cred_t cred, int flags,
     83     struct buf **bpp)
     84 {
     85 	int error;
     86 
     87 	if (VTOI(vp)->i_fs->fs_magic == FS_UFS2_MAGIC)
     88 		error = ffs_balloc_ufs2(vp, off, size, cred, flags, bpp);
     89 	else
     90 		error = ffs_balloc_ufs1(vp, off, size, cred, flags, bpp);
     91 
     92 	if (error == 0 && bpp != NULL && (error = fscow_run(*bpp, false)) != 0)
     93 		brelse(*bpp, 0);
     94 
     95 	return error;
     96 }
     97 
     98 static int
     99 ffs_balloc_ufs1(struct vnode *vp, off_t off, int size, kauth_cred_t cred,
    100     int flags, struct buf **bpp)
    101 {
    102 	daddr_t lbn, lastlbn;
    103 	struct buf *bp, *nbp;
    104 	struct inode *ip = VTOI(vp);
    105 	struct fs *fs = ip->i_fs;
    106 	struct ufsmount *ump = ip->i_ump;
    107 	struct indir indirs[NIADDR + 2];
    108 	daddr_t newb, pref, nb;
    109 	int32_t *bap;	/* XXX ondisk32 */
    110 	int deallocated, osize, nsize, num, i, error;
    111 	int32_t *blkp, *allocblk, allociblk[NIADDR + 1];
    112 	int32_t *allocib;
    113 	int unwindidx = -1;
    114 #ifdef FFS_EI
    115 	const int needswap = UFS_FSNEEDSWAP(fs);
    116 #endif
    117 	UVMHIST_FUNC("ffs_balloc"); UVMHIST_CALLED(ubchist);
    118 
    119 	lbn = lblkno(fs, off);
    120 	size = blkoff(fs, off) + size;
    121 	if (size > fs->fs_bsize)
    122 		panic("ffs_balloc: blk too big");
    123 	if (bpp != NULL) {
    124 		*bpp = NULL;
    125 	}
    126 	UVMHIST_LOG(ubchist, "vp %p lbn 0x%x size 0x%x", vp, lbn, size,0);
    127 
    128 	if (lbn < 0)
    129 		return (EFBIG);
    130 
    131 	/*
    132 	 * If the next write will extend the file into a new block,
    133 	 * and the file is currently composed of a fragment
    134 	 * this fragment has to be extended to be a full block.
    135 	 */
    136 
    137 	lastlbn = lblkno(fs, ip->i_size);
    138 	if (lastlbn < NDADDR && lastlbn < lbn) {
    139 		nb = lastlbn;
    140 		osize = blksize(fs, ip, nb);
    141 		if (osize < fs->fs_bsize && osize > 0) {
    142 			mutex_enter(&ump->um_lock);
    143 			error = ffs_realloccg(ip, nb,
    144 				    ffs_blkpref_ufs1(ip, lastlbn, nb, flags,
    145 					&ip->i_ffs1_db[0]),
    146 				    osize, (int)fs->fs_bsize, cred, bpp, &newb);
    147 			if (error)
    148 				return (error);
    149 			ip->i_size = lblktosize(fs, nb + 1);
    150 			ip->i_ffs1_size = ip->i_size;
    151 			uvm_vnp_setsize(vp, ip->i_ffs1_size);
    152 			ip->i_ffs1_db[nb] = ufs_rw32((u_int32_t)newb, needswap);
    153 			ip->i_flag |= IN_CHANGE | IN_UPDATE;
    154 			if (bpp && *bpp) {
    155 				if (flags & B_SYNC)
    156 					bwrite(*bpp);
    157 				else
    158 					bawrite(*bpp);
    159 			}
    160 		}
    161 	}
    162 
    163 	/*
    164 	 * The first NDADDR blocks are direct blocks
    165 	 */
    166 
    167 	if (lbn < NDADDR) {
    168 		nb = ufs_rw32(ip->i_ffs1_db[lbn], needswap);
    169 		if (nb != 0 && ip->i_size >= lblktosize(fs, lbn + 1)) {
    170 
    171 			/*
    172 			 * The block is an already-allocated direct block
    173 			 * and the file already extends past this block,
    174 			 * thus this must be a whole block.
    175 			 * Just read the block (if requested).
    176 			 */
    177 
    178 			if (bpp != NULL) {
    179 				error = bread(vp, lbn, fs->fs_bsize, NOCRED,
    180 					      B_MODIFY, bpp);
    181 				if (error) {
    182 					brelse(*bpp, 0);
    183 					return (error);
    184 				}
    185 			}
    186 			return (0);
    187 		}
    188 		if (nb != 0) {
    189 
    190 			/*
    191 			 * Consider need to reallocate a fragment.
    192 			 */
    193 
    194 			osize = fragroundup(fs, blkoff(fs, ip->i_size));
    195 			nsize = fragroundup(fs, size);
    196 			if (nsize <= osize) {
    197 
    198 				/*
    199 				 * The existing block is already
    200 				 * at least as big as we want.
    201 				 * Just read the block (if requested).
    202 				 */
    203 
    204 				if (bpp != NULL) {
    205 					error = bread(vp, lbn, osize, NOCRED,
    206 						      B_MODIFY, bpp);
    207 					if (error) {
    208 						brelse(*bpp, 0);
    209 						return (error);
    210 					}
    211 				}
    212 				return 0;
    213 			} else {
    214 
    215 				/*
    216 				 * The existing block is smaller than we want,
    217 				 * grow it.
    218 				 */
    219 				mutex_enter(&ump->um_lock);
    220 				error = ffs_realloccg(ip, lbn,
    221 				    ffs_blkpref_ufs1(ip, lbn, (int)lbn, flags,
    222 					&ip->i_ffs1_db[0]),
    223 				    osize, nsize, cred, bpp, &newb);
    224 				if (error)
    225 					return (error);
    226 			}
    227 		} else {
    228 
    229 			/*
    230 			 * the block was not previously allocated,
    231 			 * allocate a new block or fragment.
    232 			 */
    233 
    234 			if (ip->i_size < lblktosize(fs, lbn + 1))
    235 				nsize = fragroundup(fs, size);
    236 			else
    237 				nsize = fs->fs_bsize;
    238 			mutex_enter(&ump->um_lock);
    239 			error = ffs_alloc(ip, lbn,
    240 			    ffs_blkpref_ufs1(ip, lbn, (int)lbn, flags,
    241 				&ip->i_ffs1_db[0]),
    242 			    nsize, flags, cred, &newb);
    243 			if (error)
    244 				return (error);
    245 			if (bpp != NULL) {
    246 				error = ffs_getblk(vp, lbn, fsbtodb(fs, newb),
    247 				    nsize, (flags & B_CLRBUF) != 0, bpp);
    248 				if (error)
    249 					return error;
    250 			}
    251 		}
    252 		ip->i_ffs1_db[lbn] = ufs_rw32((u_int32_t)newb, needswap);
    253 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
    254 		return (0);
    255 	}
    256 
    257 	/*
    258 	 * Determine the number of levels of indirection.
    259 	 */
    260 
    261 	pref = 0;
    262 	if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
    263 		return (error);
    264 
    265 	/*
    266 	 * Fetch the first indirect block allocating if necessary.
    267 	 */
    268 
    269 	--num;
    270 	nb = ufs_rw32(ip->i_ffs1_ib[indirs[0].in_off], needswap);
    271 	allocib = NULL;
    272 	allocblk = allociblk;
    273 	if (nb == 0) {
    274 		mutex_enter(&ump->um_lock);
    275 		pref = ffs_blkpref_ufs1(ip, lbn, 0, flags | B_METAONLY, NULL);
    276 		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
    277 		    flags | B_METAONLY, cred, &newb);
    278 		if (error)
    279 			goto fail;
    280 		nb = newb;
    281 		*allocblk++ = nb;
    282 		error = ffs_getblk(vp, indirs[1].in_lbn, fsbtodb(fs, nb),
    283 		    fs->fs_bsize, true, &bp);
    284 		if (error)
    285 			goto fail;
    286 		/*
    287 		 * Write synchronously so that indirect blocks
    288 		 * never point at garbage.
    289 		 */
    290 		if ((error = bwrite(bp)) != 0)
    291 			goto fail;
    292 		unwindidx = 0;
    293 		allocib = &ip->i_ffs1_ib[indirs[0].in_off];
    294 		*allocib = ufs_rw32(nb, needswap);
    295 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
    296 	}
    297 
    298 	/*
    299 	 * Fetch through the indirect blocks, allocating as necessary.
    300 	 */
    301 
    302 	for (i = 1;;) {
    303 		error = bread(vp,
    304 		    indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, 0, &bp);
    305 		if (error) {
    306 			brelse(bp, 0);
    307 			goto fail;
    308 		}
    309 		bap = (int32_t *)bp->b_data;	/* XXX ondisk32 */
    310 		nb = ufs_rw32(bap[indirs[i].in_off], needswap);
    311 		if (i == num)
    312 			break;
    313 		i++;
    314 		if (nb != 0) {
    315 			brelse(bp, 0);
    316 			continue;
    317 		}
    318 		if (fscow_run(bp, true) != 0) {
    319 			brelse(bp, 0);
    320 			goto fail;
    321 		}
    322 		mutex_enter(&ump->um_lock);
    323 		/* Try to keep snapshot indirect blocks contiguous. */
    324 		if (i == num && (ip->i_flags & SF_SNAPSHOT) != 0)
    325 			pref = ffs_blkpref_ufs1(ip, lbn, indirs[i-1].in_off,
    326 			    flags | B_METAONLY, &bap[0]);
    327 		if (pref == 0)
    328 			pref = ffs_blkpref_ufs1(ip, lbn, 0, flags | B_METAONLY,
    329 			    NULL);
    330 		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
    331 		    flags | B_METAONLY, cred, &newb);
    332 		if (error) {
    333 			brelse(bp, 0);
    334 			goto fail;
    335 		}
    336 		nb = newb;
    337 		*allocblk++ = nb;
    338 		error = ffs_getblk(vp, indirs[i].in_lbn, fsbtodb(fs, nb),
    339 		    fs->fs_bsize, true, &nbp);
    340 		if (error) {
    341 			brelse(bp, 0);
    342 			goto fail;
    343 		}
    344 		/*
    345 		 * Write synchronously so that indirect blocks
    346 		 * never point at garbage.
    347 		 */
    348 		if ((error = bwrite(nbp)) != 0) {
    349 			brelse(bp, 0);
    350 			goto fail;
    351 		}
    352 		if (unwindidx < 0)
    353 			unwindidx = i - 1;
    354 		bap[indirs[i - 1].in_off] = ufs_rw32(nb, needswap);
    355 
    356 		/*
    357 		 * If required, write synchronously, otherwise use
    358 		 * delayed write.
    359 		 */
    360 
    361 		if (flags & B_SYNC) {
    362 			bwrite(bp);
    363 		} else {
    364 			bdwrite(bp);
    365 		}
    366 	}
    367 
    368 	if (flags & B_METAONLY) {
    369 		KASSERT(bpp != NULL);
    370 		*bpp = bp;
    371 		return (0);
    372 	}
    373 
    374 	/*
    375 	 * Get the data block, allocating if necessary.
    376 	 */
    377 
    378 	if (nb == 0) {
    379 		if (fscow_run(bp, true) != 0) {
    380 			brelse(bp, 0);
    381 			goto fail;
    382 		}
    383 		mutex_enter(&ump->um_lock);
    384 		pref = ffs_blkpref_ufs1(ip, lbn, indirs[num].in_off, flags,
    385 		    &bap[0]);
    386 		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, flags, cred,
    387 		    &newb);
    388 		if (error) {
    389 			brelse(bp, 0);
    390 			goto fail;
    391 		}
    392 		nb = newb;
    393 		*allocblk++ = nb;
    394 		if (bpp != NULL) {
    395 			error = ffs_getblk(vp, lbn, fsbtodb(fs, nb),
    396 			    fs->fs_bsize, (flags & B_CLRBUF) != 0, bpp);
    397 			if (error) {
    398 				brelse(bp, 0);
    399 				goto fail;
    400 			}
    401 		}
    402 		bap[indirs[num].in_off] = ufs_rw32(nb, needswap);
    403 		if (allocib == NULL && unwindidx < 0) {
    404 			unwindidx = i - 1;
    405 		}
    406 
    407 		/*
    408 		 * If required, write synchronously, otherwise use
    409 		 * delayed write.
    410 		 */
    411 
    412 		if (flags & B_SYNC) {
    413 			bwrite(bp);
    414 		} else {
    415 			bdwrite(bp);
    416 		}
    417 		return (0);
    418 	}
    419 	brelse(bp, 0);
    420 	if (bpp != NULL) {
    421 		if (flags & B_CLRBUF) {
    422 			error = bread(vp, lbn, (int)fs->fs_bsize,
    423 			    NOCRED, B_MODIFY, &nbp);
    424 			if (error) {
    425 				brelse(nbp, 0);
    426 				goto fail;
    427 			}
    428 		} else {
    429 			error = ffs_getblk(vp, lbn, fsbtodb(fs, nb),
    430 			    fs->fs_bsize, true, &nbp);
    431 			if (error)
    432 				goto fail;
    433 		}
    434 		*bpp = nbp;
    435 	}
    436 	return (0);
    437 
    438 fail:
    439 	/*
    440 	 * If we have failed part way through block allocation, we
    441 	 * have to deallocate any indirect blocks that we have allocated.
    442 	 */
    443 
    444 	if (unwindidx >= 0) {
    445 
    446 		/*
    447 		 * First write out any buffers we've created to resolve their
    448 		 * softdeps.  This must be done in reverse order of creation
    449 		 * so that we resolve the dependencies in one pass.
    450 		 * Write the cylinder group buffers for these buffers too.
    451 		 */
    452 
    453 		for (i = num; i >= unwindidx; i--) {
    454 			if (i == 0) {
    455 				break;
    456 			}
    457 			if (ffs_getblk(vp, indirs[i].in_lbn, FFS_NOBLK,
    458 			    fs->fs_bsize, false, &bp) != 0)
    459 				continue;
    460 			if (bp->b_oflags & BO_DELWRI) {
    461 				nb = fsbtodb(fs, cgtod(fs, dtog(fs,
    462 				    dbtofsb(fs, bp->b_blkno))));
    463 				bwrite(bp);
    464 				if (ffs_getblk(ip->i_devvp, nb, FFS_NOBLK,
    465 				    fs->fs_cgsize, false, &bp) != 0)
    466 					continue;
    467 				if (bp->b_oflags & BO_DELWRI) {
    468 					bwrite(bp);
    469 				} else {
    470 					brelse(bp, BC_INVAL);
    471 				}
    472 			} else {
    473 				brelse(bp, BC_INVAL);
    474 			}
    475 		}
    476 
    477 		/*
    478 		 * Undo the partial allocation.
    479 		 */
    480 		if (unwindidx == 0) {
    481 			*allocib = 0;
    482 			ip->i_flag |= IN_CHANGE | IN_UPDATE;
    483 		} else {
    484 			int r;
    485 
    486 			r = bread(vp, indirs[unwindidx].in_lbn,
    487 			    (int)fs->fs_bsize, NOCRED, 0, &bp);
    488 			if (r) {
    489 				panic("Could not unwind indirect block, error %d", r);
    490 				brelse(bp, 0);
    491 			} else {
    492 				bap = (int32_t *)bp->b_data; /* XXX ondisk32 */
    493 				bap[indirs[unwindidx].in_off] = 0;
    494 				bwrite(bp);
    495 			}
    496 		}
    497 		for (i = unwindidx + 1; i <= num; i++) {
    498 			if (ffs_getblk(vp, indirs[i].in_lbn, FFS_NOBLK,
    499 			    fs->fs_bsize, false, &bp) == 0)
    500 				brelse(bp, BC_INVAL);
    501 		}
    502 	}
    503 	for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) {
    504 		ffs_blkfree(fs, ip->i_devvp, *blkp, fs->fs_bsize, ip->i_number);
    505 		deallocated += fs->fs_bsize;
    506 	}
    507 	if (deallocated) {
    508 #if defined(QUOTA) || defined(QUOTA2)
    509 		/*
    510 		 * Restore user's disk quota because allocation failed.
    511 		 */
    512 		(void)chkdq(ip, -btodb(deallocated), cred, FORCE);
    513 #endif
    514 		ip->i_ffs1_blocks -= btodb(deallocated);
    515 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
    516 	}
    517 	return (error);
    518 }
    519 
    520 static int
    521 ffs_balloc_ufs2(struct vnode *vp, off_t off, int size, kauth_cred_t cred,
    522     int flags, struct buf **bpp)
    523 {
    524 	daddr_t lbn, lastlbn;
    525 	struct buf *bp, *nbp;
    526 	struct inode *ip = VTOI(vp);
    527 	struct fs *fs = ip->i_fs;
    528 	struct ufsmount *ump = ip->i_ump;
    529 	struct indir indirs[NIADDR + 2];
    530 	daddr_t newb, pref, nb;
    531 	int64_t *bap;
    532 	int deallocated, osize, nsize, num, i, error;
    533 	daddr_t *blkp, *allocblk, allociblk[NIADDR + 1];
    534 	int64_t *allocib;
    535 	int unwindidx = -1;
    536 #ifdef FFS_EI
    537 	const int needswap = UFS_FSNEEDSWAP(fs);
    538 #endif
    539 	UVMHIST_FUNC("ffs_balloc"); UVMHIST_CALLED(ubchist);
    540 
    541 	lbn = lblkno(fs, off);
    542 	size = blkoff(fs, off) + size;
    543 	if (size > fs->fs_bsize)
    544 		panic("ffs_balloc: blk too big");
    545 	if (bpp != NULL) {
    546 		*bpp = NULL;
    547 	}
    548 	UVMHIST_LOG(ubchist, "vp %p lbn 0x%x size 0x%x", vp, lbn, size,0);
    549 
    550 	if (lbn < 0)
    551 		return (EFBIG);
    552 
    553 #ifdef notyet
    554 	/*
    555 	 * Check for allocating external data.
    556 	 */
    557 	if (flags & IO_EXT) {
    558 		if (lbn >= NXADDR)
    559 			return (EFBIG);
    560 		/*
    561 		 * If the next write will extend the data into a new block,
    562 		 * and the data is currently composed of a fragment
    563 		 * this fragment has to be extended to be a full block.
    564 		 */
    565 		lastlbn = lblkno(fs, dp->di_extsize);
    566 		if (lastlbn < lbn) {
    567 			nb = lastlbn;
    568 			osize = sblksize(fs, dp->di_extsize, nb);
    569 			if (osize < fs->fs_bsize && osize > 0) {
    570 				mutex_enter(&ump->um_lock);
    571 				error = ffs_realloccg(ip, -1 - nb,
    572 				    dp->di_extb[nb],
    573 				    ffs_blkpref_ufs2(ip, lastlbn, (int)nb,
    574 					flags, &dp->di_extb[0]),
    575 				    osize,
    576 				    (int)fs->fs_bsize, cred, &bp);
    577 				if (error)
    578 					return (error);
    579 				dp->di_extsize = smalllblktosize(fs, nb + 1);
    580 				dp->di_extb[nb] = dbtofsb(fs, bp->b_blkno);
    581 				bp->b_xflags |= BX_ALTDATA;
    582 				ip->i_flag |= IN_CHANGE | IN_UPDATE;
    583 				if (flags & IO_SYNC)
    584 					bwrite(bp);
    585 				else
    586 					bawrite(bp);
    587 			}
    588 		}
    589 		/*
    590 		 * All blocks are direct blocks
    591 		 */
    592 		if (flags & BA_METAONLY)
    593 			panic("ffs_balloc_ufs2: BA_METAONLY for ext block");
    594 		nb = dp->di_extb[lbn];
    595 		if (nb != 0 && dp->di_extsize >= smalllblktosize(fs, lbn + 1)) {
    596 			error = bread(vp, -1 - lbn, fs->fs_bsize,
    597 			    NOCRED, 0, &bp);
    598 			if (error) {
    599 				brelse(bp, 0);
    600 				return (error);
    601 			}
    602 			mutex_enter(&bp->b_interlock);
    603 			bp->b_blkno = fsbtodb(fs, nb);
    604 			bp->b_xflags |= BX_ALTDATA;
    605 			mutex_exit(&bp->b_interlock);
    606 			*bpp = bp;
    607 			return (0);
    608 		}
    609 		if (nb != 0) {
    610 			/*
    611 			 * Consider need to reallocate a fragment.
    612 			 */
    613 			osize = fragroundup(fs, blkoff(fs, dp->di_extsize));
    614 			nsize = fragroundup(fs, size);
    615 			if (nsize <= osize) {
    616 				error = bread(vp, -1 - lbn, osize,
    617 				    NOCRED, 0, &bp);
    618 				if (error) {
    619 					brelse(bp, 0);
    620 					return (error);
    621 				}
    622 				mutex_enter(&bp->b_interlock);
    623 				bp->b_blkno = fsbtodb(fs, nb);
    624 				bp->b_xflags |= BX_ALTDATA;
    625 				mutex_exit(&bp->b_interlock);
    626 			} else {
    627 				mutex_enter(&ump->um_lock);
    628 				error = ffs_realloccg(ip, -1 - lbn,
    629 				    dp->di_extb[lbn],
    630 				    ffs_blkpref_ufs2(ip, lbn, (int)lbn, flags,
    631 				        &dp->di_extb[0]),
    632 				    osize, nsize, cred, &bp);
    633 				if (error)
    634 					return (error);
    635 				bp->b_xflags |= BX_ALTDATA;
    636 			}
    637 		} else {
    638 			if (dp->di_extsize < smalllblktosize(fs, lbn + 1))
    639 				nsize = fragroundup(fs, size);
    640 			else
    641 				nsize = fs->fs_bsize;
    642 			mutex_enter(&ump->um_lock);
    643 			error = ffs_alloc(ip, lbn,
    644 			   ffs_blkpref_ufs2(ip, lbn, (int)lbn, flags,
    645 			       &dp->di_extb[0]),
    646 			   nsize, flags, cred, &newb);
    647 			if (error)
    648 				return (error);
    649 			error = ffs_getblk(vp, -1 - lbn, fsbtodb(fs, newb),
    650 			    nsize, (flags & BA_CLRBUF) != 0, &bp);
    651 			if (error)
    652 				return error;
    653 			bp->b_xflags |= BX_ALTDATA;
    654 		}
    655 		dp->di_extb[lbn] = dbtofsb(fs, bp->b_blkno);
    656 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
    657 		*bpp = bp;
    658 		return (0);
    659 	}
    660 #endif
    661 	/*
    662 	 * If the next write will extend the file into a new block,
    663 	 * and the file is currently composed of a fragment
    664 	 * this fragment has to be extended to be a full block.
    665 	 */
    666 
    667 	lastlbn = lblkno(fs, ip->i_size);
    668 	if (lastlbn < NDADDR && lastlbn < lbn) {
    669 		nb = lastlbn;
    670 		osize = blksize(fs, ip, nb);
    671 		if (osize < fs->fs_bsize && osize > 0) {
    672 			mutex_enter(&ump->um_lock);
    673 			error = ffs_realloccg(ip, nb,
    674 				    ffs_blkpref_ufs2(ip, lastlbn, nb, flags,
    675 					&ip->i_ffs2_db[0]),
    676 				    osize, (int)fs->fs_bsize, cred, bpp, &newb);
    677 			if (error)
    678 				return (error);
    679 			ip->i_size = lblktosize(fs, nb + 1);
    680 			ip->i_ffs2_size = ip->i_size;
    681 			uvm_vnp_setsize(vp, ip->i_size);
    682 			ip->i_ffs2_db[nb] = ufs_rw64(newb, needswap);
    683 			ip->i_flag |= IN_CHANGE | IN_UPDATE;
    684 			if (bpp) {
    685 				if (flags & B_SYNC)
    686 					bwrite(*bpp);
    687 				else
    688 					bawrite(*bpp);
    689 			}
    690 		}
    691 	}
    692 
    693 	/*
    694 	 * The first NDADDR blocks are direct blocks
    695 	 */
    696 
    697 	if (lbn < NDADDR) {
    698 		nb = ufs_rw64(ip->i_ffs2_db[lbn], needswap);
    699 		if (nb != 0 && ip->i_size >= lblktosize(fs, lbn + 1)) {
    700 
    701 			/*
    702 			 * The block is an already-allocated direct block
    703 			 * and the file already extends past this block,
    704 			 * thus this must be a whole block.
    705 			 * Just read the block (if requested).
    706 			 */
    707 
    708 			if (bpp != NULL) {
    709 				error = bread(vp, lbn, fs->fs_bsize, NOCRED,
    710 					      B_MODIFY, bpp);
    711 				if (error) {
    712 					brelse(*bpp, 0);
    713 					return (error);
    714 				}
    715 			}
    716 			return (0);
    717 		}
    718 		if (nb != 0) {
    719 
    720 			/*
    721 			 * Consider need to reallocate a fragment.
    722 			 */
    723 
    724 			osize = fragroundup(fs, blkoff(fs, ip->i_size));
    725 			nsize = fragroundup(fs, size);
    726 			if (nsize <= osize) {
    727 
    728 				/*
    729 				 * The existing block is already
    730 				 * at least as big as we want.
    731 				 * Just read the block (if requested).
    732 				 */
    733 
    734 				if (bpp != NULL) {
    735 					error = bread(vp, lbn, osize, NOCRED,
    736 						      B_MODIFY, bpp);
    737 					if (error) {
    738 						brelse(*bpp, 0);
    739 						return (error);
    740 					}
    741 				}
    742 				return 0;
    743 			} else {
    744 
    745 				/*
    746 				 * The existing block is smaller than we want,
    747 				 * grow it.
    748 				 */
    749 				mutex_enter(&ump->um_lock);
    750 				error = ffs_realloccg(ip, lbn,
    751 				    ffs_blkpref_ufs2(ip, lbn, (int)lbn, flags,
    752 					&ip->i_ffs2_db[0]),
    753 				    osize, nsize, cred, bpp, &newb);
    754 				if (error)
    755 					return (error);
    756 			}
    757 		} else {
    758 
    759 			/*
    760 			 * the block was not previously allocated,
    761 			 * allocate a new block or fragment.
    762 			 */
    763 
    764 			if (ip->i_size < lblktosize(fs, lbn + 1))
    765 				nsize = fragroundup(fs, size);
    766 			else
    767 				nsize = fs->fs_bsize;
    768 			mutex_enter(&ump->um_lock);
    769 			error = ffs_alloc(ip, lbn,
    770 			    ffs_blkpref_ufs2(ip, lbn, (int)lbn, flags,
    771 				&ip->i_ffs2_db[0]),
    772 			    nsize, flags, cred, &newb);
    773 			if (error)
    774 				return (error);
    775 			if (bpp != NULL) {
    776 				error = ffs_getblk(vp, lbn, fsbtodb(fs, newb),
    777 				    nsize, (flags & B_CLRBUF) != 0, bpp);
    778 				if (error)
    779 					return error;
    780 			}
    781 		}
    782 		ip->i_ffs2_db[lbn] = ufs_rw64(newb, needswap);
    783 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
    784 		return (0);
    785 	}
    786 
    787 	/*
    788 	 * Determine the number of levels of indirection.
    789 	 */
    790 
    791 	pref = 0;
    792 	if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
    793 		return (error);
    794 
    795 	/*
    796 	 * Fetch the first indirect block allocating if necessary.
    797 	 */
    798 
    799 	--num;
    800 	nb = ufs_rw64(ip->i_ffs2_ib[indirs[0].in_off], needswap);
    801 	allocib = NULL;
    802 	allocblk = allociblk;
    803 	if (nb == 0) {
    804 		mutex_enter(&ump->um_lock);
    805 		pref = ffs_blkpref_ufs2(ip, lbn, 0, flags | B_METAONLY, NULL);
    806 		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
    807 		    flags | B_METAONLY, cred, &newb);
    808 		if (error)
    809 			goto fail;
    810 		nb = newb;
    811 		*allocblk++ = nb;
    812 		error = ffs_getblk(vp, indirs[1].in_lbn, fsbtodb(fs, nb),
    813 		    fs->fs_bsize, true, &bp);
    814 		if (error)
    815 			goto fail;
    816 		/*
    817 		 * Write synchronously so that indirect blocks
    818 		 * never point at garbage.
    819 		 */
    820 		if ((error = bwrite(bp)) != 0)
    821 			goto fail;
    822 		unwindidx = 0;
    823 		allocib = &ip->i_ffs2_ib[indirs[0].in_off];
    824 		*allocib = ufs_rw64(nb, needswap);
    825 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
    826 	}
    827 
    828 	/*
    829 	 * Fetch through the indirect blocks, allocating as necessary.
    830 	 */
    831 
    832 	for (i = 1;;) {
    833 		error = bread(vp,
    834 		    indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, 0, &bp);
    835 		if (error) {
    836 			brelse(bp, 0);
    837 			goto fail;
    838 		}
    839 		bap = (int64_t *)bp->b_data;
    840 		nb = ufs_rw64(bap[indirs[i].in_off], needswap);
    841 		if (i == num)
    842 			break;
    843 		i++;
    844 		if (nb != 0) {
    845 			brelse(bp, 0);
    846 			continue;
    847 		}
    848 		if (fscow_run(bp, true) != 0) {
    849 			brelse(bp, 0);
    850 			goto fail;
    851 		}
    852 		mutex_enter(&ump->um_lock);
    853 		/* Try to keep snapshot indirect blocks contiguous. */
    854 		if (i == num && (ip->i_flags & SF_SNAPSHOT) != 0)
    855 			pref = ffs_blkpref_ufs2(ip, lbn, indirs[i-1].in_off,
    856 			    flags | B_METAONLY, &bap[0]);
    857 		if (pref == 0)
    858 			pref = ffs_blkpref_ufs2(ip, lbn, 0, flags | B_METAONLY,
    859 			    NULL);
    860 		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
    861 		    flags | B_METAONLY, cred, &newb);
    862 		if (error) {
    863 			brelse(bp, 0);
    864 			goto fail;
    865 		}
    866 		nb = newb;
    867 		*allocblk++ = nb;
    868 		error = ffs_getblk(vp, indirs[i].in_lbn, fsbtodb(fs, nb),
    869 		    fs->fs_bsize, true, &nbp);
    870 		if (error) {
    871 			brelse(bp, 0);
    872 			goto fail;
    873 		}
    874 		/*
    875 		 * Write synchronously so that indirect blocks
    876 		 * never point at garbage.
    877 		 */
    878 		if ((error = bwrite(nbp)) != 0) {
    879 			brelse(bp, 0);
    880 			goto fail;
    881 		}
    882 		if (unwindidx < 0)
    883 			unwindidx = i - 1;
    884 		bap[indirs[i - 1].in_off] = ufs_rw64(nb, needswap);
    885 
    886 		/*
    887 		 * If required, write synchronously, otherwise use
    888 		 * delayed write.
    889 		 */
    890 
    891 		if (flags & B_SYNC) {
    892 			bwrite(bp);
    893 		} else {
    894 			bdwrite(bp);
    895 		}
    896 	}
    897 
    898 	if (flags & B_METAONLY) {
    899 		KASSERT(bpp != NULL);
    900 		*bpp = bp;
    901 		return (0);
    902 	}
    903 
    904 	/*
    905 	 * Get the data block, allocating if necessary.
    906 	 */
    907 
    908 	if (nb == 0) {
    909 		if (fscow_run(bp, true) != 0) {
    910 			brelse(bp, 0);
    911 			goto fail;
    912 		}
    913 		mutex_enter(&ump->um_lock);
    914 		pref = ffs_blkpref_ufs2(ip, lbn, indirs[num].in_off, flags,
    915 		    &bap[0]);
    916 		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, flags, cred,
    917 		    &newb);
    918 		if (error) {
    919 			brelse(bp, 0);
    920 			goto fail;
    921 		}
    922 		nb = newb;
    923 		*allocblk++ = nb;
    924 		if (bpp != NULL) {
    925 			error = ffs_getblk(vp, lbn, fsbtodb(fs, nb),
    926 			    fs->fs_bsize, (flags & B_CLRBUF) != 0, bpp);
    927 			if (error) {
    928 				brelse(bp, 0);
    929 				goto fail;
    930 			}
    931 		}
    932 		bap[indirs[num].in_off] = ufs_rw64(nb, needswap);
    933 		if (allocib == NULL && unwindidx < 0) {
    934 			unwindidx = i - 1;
    935 		}
    936 
    937 		/*
    938 		 * If required, write synchronously, otherwise use
    939 		 * delayed write.
    940 		 */
    941 
    942 		if (flags & B_SYNC) {
    943 			bwrite(bp);
    944 		} else {
    945 			bdwrite(bp);
    946 		}
    947 		return (0);
    948 	}
    949 	brelse(bp, 0);
    950 	if (bpp != NULL) {
    951 		if (flags & B_CLRBUF) {
    952 			error = bread(vp, lbn, (int)fs->fs_bsize,
    953 			    NOCRED, B_MODIFY, &nbp);
    954 			if (error) {
    955 				brelse(nbp, 0);
    956 				goto fail;
    957 			}
    958 		} else {
    959 			error = ffs_getblk(vp, lbn, fsbtodb(fs, nb),
    960 			    fs->fs_bsize, true, &nbp);
    961 			if (error)
    962 				goto fail;
    963 		}
    964 		*bpp = nbp;
    965 	}
    966 	return (0);
    967 
    968 fail:
    969 	/*
    970 	 * If we have failed part way through block allocation, we
    971 	 * have to deallocate any indirect blocks that we have allocated.
    972 	 */
    973 
    974 	if (unwindidx >= 0) {
    975 
    976 		/*
    977 		 * First write out any buffers we've created to resolve their
    978 		 * softdeps.  This must be done in reverse order of creation
    979 		 * so that we resolve the dependencies in one pass.
    980 		 * Write the cylinder group buffers for these buffers too.
    981 		 */
    982 
    983 		for (i = num; i >= unwindidx; i--) {
    984 			if (i == 0) {
    985 				break;
    986 			}
    987 			if (ffs_getblk(vp, indirs[i].in_lbn, FFS_NOBLK,
    988 			    fs->fs_bsize, false, &bp) != 0)
    989 				continue;
    990 			if (bp->b_oflags & BO_DELWRI) {
    991 				nb = fsbtodb(fs, cgtod(fs, dtog(fs,
    992 				    dbtofsb(fs, bp->b_blkno))));
    993 				bwrite(bp);
    994 				if (ffs_getblk(ip->i_devvp, nb, FFS_NOBLK,
    995 				    fs->fs_cgsize, false, &bp) != 0)
    996 					continue;
    997 				if (bp->b_oflags & BO_DELWRI) {
    998 					bwrite(bp);
    999 				} else {
   1000 					brelse(bp, BC_INVAL);
   1001 				}
   1002 			} else {
   1003 				brelse(bp, BC_INVAL);
   1004 			}
   1005 		}
   1006 
   1007 		/*
   1008 		 * Now that any dependencies that we created have been
   1009 		 * resolved, we can undo the partial allocation.
   1010 		 */
   1011 
   1012 		if (unwindidx == 0) {
   1013 			*allocib = 0;
   1014 			ip->i_flag |= IN_CHANGE | IN_UPDATE;
   1015 		} else {
   1016 			int r;
   1017 
   1018 			r = bread(vp, indirs[unwindidx].in_lbn,
   1019 			    (int)fs->fs_bsize, NOCRED, 0, &bp);
   1020 			if (r) {
   1021 				panic("Could not unwind indirect block, error %d", r);
   1022 				brelse(bp, 0);
   1023 			} else {
   1024 				bap = (int64_t *)bp->b_data;
   1025 				bap[indirs[unwindidx].in_off] = 0;
   1026 				bwrite(bp);
   1027 			}
   1028 		}
   1029 		for (i = unwindidx + 1; i <= num; i++) {
   1030 			if (ffs_getblk(vp, indirs[i].in_lbn, FFS_NOBLK,
   1031 			    fs->fs_bsize, false, &bp) == 0)
   1032 				brelse(bp, BC_INVAL);
   1033 		}
   1034 	}
   1035 	for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) {
   1036 		ffs_blkfree(fs, ip->i_devvp, *blkp, fs->fs_bsize, ip->i_number);
   1037 		deallocated += fs->fs_bsize;
   1038 	}
   1039 	if (deallocated) {
   1040 #if defined(QUOTA) || defined(QUOTA2)
   1041 		/*
   1042 		 * Restore user's disk quota because allocation failed.
   1043 		 */
   1044 		(void)chkdq(ip, -btodb(deallocated), cred, FORCE);
   1045 #endif
   1046 		ip->i_ffs2_blocks -= btodb(deallocated);
   1047 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
   1048 	}
   1049 
   1050 	return (error);
   1051 }
   1052