Home | History | Annotate | Line # | Download | only in ffs
ffs_balloc.c revision 1.40.4.1
      1 /*	$NetBSD: ffs_balloc.c,v 1.40.4.1 2006/09/09 03:00:00 rpaulo Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 2002 Networks Associates Technology, Inc.
      5  * All rights reserved.
      6  *
      7  * This software was developed for the FreeBSD Project by Marshall
      8  * Kirk McKusick and Network Associates Laboratories, the Security
      9  * Research Division of Network Associates, Inc. under DARPA/SPAWAR
     10  * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS
     11  * research program
     12  *
     13  * Copyright (c) 1982, 1986, 1989, 1993
     14  *	The Regents of the University of California.  All rights reserved.
     15  *
     16  * Redistribution and use in source and binary forms, with or without
     17  * modification, are permitted provided that the following conditions
     18  * are met:
     19  * 1. Redistributions of source code must retain the above copyright
     20  *    notice, this list of conditions and the following disclaimer.
     21  * 2. Redistributions in binary form must reproduce the above copyright
     22  *    notice, this list of conditions and the following disclaimer in the
     23  *    documentation and/or other materials provided with the distribution.
     24  * 3. Neither the name of the University nor the names of its contributors
     25  *    may be used to endorse or promote products derived from this software
     26  *    without specific prior written permission.
     27  *
     28  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     29  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     30  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     31  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     32  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     33  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     34  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     35  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     36  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     37  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     38  * SUCH DAMAGE.
     39  *
     40  *	@(#)ffs_balloc.c	8.8 (Berkeley) 6/16/95
     41  */
     42 
     43 #include <sys/cdefs.h>
     44 __KERNEL_RCSID(0, "$NetBSD: ffs_balloc.c,v 1.40.4.1 2006/09/09 03:00:00 rpaulo Exp $");
     45 
     46 #if defined(_KERNEL_OPT)
     47 #include "opt_quota.h"
     48 #endif
     49 
     50 #include <sys/param.h>
     51 #include <sys/systm.h>
     52 #include <sys/buf.h>
     53 #include <sys/file.h>
     54 #include <sys/mount.h>
     55 #include <sys/vnode.h>
     56 #include <sys/mount.h>
     57 #include <sys/kauth.h>
     58 
     59 #include <ufs/ufs/quota.h>
     60 #include <ufs/ufs/ufsmount.h>
     61 #include <ufs/ufs/inode.h>
     62 #include <ufs/ufs/ufs_extern.h>
     63 #include <ufs/ufs/ufs_bswap.h>
     64 
     65 #include <ufs/ffs/fs.h>
     66 #include <ufs/ffs/ffs_extern.h>
     67 
     68 #include <uvm/uvm.h>
     69 
     70 static int ffs_balloc_ufs1(struct vnode *, off_t, int, kauth_cred_t, int,
     71     struct buf **);
     72 static int ffs_balloc_ufs2(struct vnode *, off_t, int, kauth_cred_t, int,
     73     struct buf **);
     74 
     75 /*
     76  * Balloc defines the structure of file system storage
     77  * by allocating the physical blocks on a device given
     78  * the inode and the logical block number in a file.
     79  */
     80 
     81 int
     82 ffs_balloc(struct vnode *vp, off_t off, int size, kauth_cred_t cred, int flags,
     83     struct buf **bpp)
     84 {
     85 
     86 	if (VTOI(vp)->i_fs->fs_magic == FS_UFS2_MAGIC)
     87 		return ffs_balloc_ufs2(vp, off, size, cred, flags, bpp);
     88 	else
     89 		return ffs_balloc_ufs1(vp, off, size, cred, flags, bpp);
     90 }
     91 
     92 static int
     93 ffs_balloc_ufs1(struct vnode *vp, off_t off, int size, kauth_cred_t cred,
     94     int flags, struct buf **bpp)
     95 {
     96 	daddr_t lbn, lastlbn;
     97 	struct buf *bp, *nbp;
     98 	struct inode *ip = VTOI(vp);
     99 	struct fs *fs = ip->i_fs;
    100 	struct indir indirs[NIADDR + 2];
    101 	daddr_t newb, pref, nb;
    102 	int32_t *bap;	/* XXX ondisk32 */
    103 	int deallocated, osize, nsize, num, i, error;
    104 	int32_t *blkp, *allocblk, allociblk[NIADDR + 1];
    105 	int32_t *allocib;
    106 	int unwindidx = -1;
    107 #ifdef FFS_EI
    108 	const int needswap = UFS_FSNEEDSWAP(fs);
    109 #endif
    110 	UVMHIST_FUNC("ffs_balloc"); UVMHIST_CALLED(ubchist);
    111 
    112 	lbn = lblkno(fs, off);
    113 	size = blkoff(fs, off) + size;
    114 	if (size > fs->fs_bsize)
    115 		panic("ffs_balloc: blk too big");
    116 	if (bpp != NULL) {
    117 		*bpp = NULL;
    118 	}
    119 	UVMHIST_LOG(ubchist, "vp %p lbn 0x%x size 0x%x", vp, lbn, size,0);
    120 
    121 	KASSERT(size <= fs->fs_bsize);
    122 	if (lbn < 0)
    123 		return (EFBIG);
    124 
    125 	/*
    126 	 * If the next write will extend the file into a new block,
    127 	 * and the file is currently composed of a fragment
    128 	 * this fragment has to be extended to be a full block.
    129 	 */
    130 
    131 	lastlbn = lblkno(fs, ip->i_size);
    132 	if (lastlbn < NDADDR && lastlbn < lbn) {
    133 		nb = lastlbn;
    134 		osize = blksize(fs, ip, nb);
    135 		if (osize < fs->fs_bsize && osize > 0) {
    136 			error = ffs_realloccg(ip, nb,
    137 				    ffs_blkpref_ufs1(ip, lastlbn, nb,
    138 					&ip->i_ffs1_db[0]),
    139 				    osize, (int)fs->fs_bsize, cred, bpp, &newb);
    140 			if (error)
    141 				return (error);
    142 			if (DOINGSOFTDEP(vp))
    143 				softdep_setup_allocdirect(ip, nb, newb,
    144 				    ufs_rw32(ip->i_ffs1_db[nb], needswap),
    145 				    fs->fs_bsize, osize, bpp ? *bpp : NULL);
    146 			ip->i_size = lblktosize(fs, nb + 1);
    147 			ip->i_ffs1_size = ip->i_size;
    148 			uvm_vnp_setsize(vp, ip->i_ffs1_size);
    149 			ip->i_ffs1_db[nb] = ufs_rw32((u_int32_t)newb, needswap);
    150 			ip->i_flag |= IN_CHANGE | IN_UPDATE;
    151 			if (bpp && *bpp) {
    152 				if (flags & B_SYNC)
    153 					bwrite(*bpp);
    154 				else
    155 					bawrite(*bpp);
    156 			}
    157 		}
    158 	}
    159 
    160 	/*
    161 	 * The first NDADDR blocks are direct blocks
    162 	 */
    163 
    164 	if (lbn < NDADDR) {
    165 		nb = ufs_rw32(ip->i_ffs1_db[lbn], needswap);
    166 		if (nb != 0 && ip->i_size >= lblktosize(fs, lbn + 1)) {
    167 
    168 			/*
    169 			 * The block is an already-allocated direct block
    170 			 * and the file already extends past this block,
    171 			 * thus this must be a whole block.
    172 			 * Just read the block (if requested).
    173 			 */
    174 
    175 			if (bpp != NULL) {
    176 				error = bread(vp, lbn, fs->fs_bsize, NOCRED,
    177 					      bpp);
    178 				if (error) {
    179 					brelse(*bpp);
    180 					return (error);
    181 				}
    182 			}
    183 			return (0);
    184 		}
    185 		if (nb != 0) {
    186 
    187 			/*
    188 			 * Consider need to reallocate a fragment.
    189 			 */
    190 
    191 			osize = fragroundup(fs, blkoff(fs, ip->i_size));
    192 			nsize = fragroundup(fs, size);
    193 			if (nsize <= osize) {
    194 
    195 				/*
    196 				 * The existing block is already
    197 				 * at least as big as we want.
    198 				 * Just read the block (if requested).
    199 				 */
    200 
    201 				if (bpp != NULL) {
    202 					error = bread(vp, lbn, osize, NOCRED,
    203 						      bpp);
    204 					if (error) {
    205 						brelse(*bpp);
    206 						return (error);
    207 					}
    208 				}
    209 				return 0;
    210 			} else {
    211 
    212 				/*
    213 				 * The existing block is smaller than we want,
    214 				 * grow it.
    215 				 */
    216 
    217 				error = ffs_realloccg(ip, lbn,
    218 				    ffs_blkpref_ufs1(ip, lbn, (int)lbn,
    219 					&ip->i_ffs1_db[0]), osize, nsize, cred,
    220 					bpp, &newb);
    221 				if (error)
    222 					return (error);
    223 				if (DOINGSOFTDEP(vp))
    224 					softdep_setup_allocdirect(ip, lbn,
    225 					    newb, nb, nsize, osize,
    226 					    bpp ? *bpp : NULL);
    227 			}
    228 		} else {
    229 
    230 			/*
    231 			 * the block was not previously allocated,
    232 			 * allocate a new block or fragment.
    233 			 */
    234 
    235 			if (ip->i_size < lblktosize(fs, lbn + 1))
    236 				nsize = fragroundup(fs, size);
    237 			else
    238 				nsize = fs->fs_bsize;
    239 			error = ffs_alloc(ip, lbn,
    240 			    ffs_blkpref_ufs1(ip, lbn, (int)lbn,
    241 				&ip->i_ffs1_db[0]),
    242 				nsize, cred, &newb);
    243 			if (error)
    244 				return (error);
    245 			if (bpp != NULL) {
    246 				bp = getblk(vp, lbn, nsize, 0, 0);
    247 				bp->b_blkno = fsbtodb(fs, newb);
    248 				if (flags & B_CLRBUF)
    249 					clrbuf(bp);
    250 				*bpp = bp;
    251 			}
    252 			if (DOINGSOFTDEP(vp)) {
    253 				softdep_setup_allocdirect(ip, lbn, newb, 0,
    254 				    nsize, 0, bpp ? *bpp : NULL);
    255 			}
    256 		}
    257 		ip->i_ffs1_db[lbn] = ufs_rw32((u_int32_t)newb, needswap);
    258 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
    259 		return (0);
    260 	}
    261 
    262 	/*
    263 	 * Determine the number of levels of indirection.
    264 	 */
    265 
    266 	pref = 0;
    267 	if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
    268 		return (error);
    269 
    270 	/*
    271 	 * Fetch the first indirect block allocating if necessary.
    272 	 */
    273 
    274 	--num;
    275 	nb = ufs_rw32(ip->i_ffs1_ib[indirs[0].in_off], needswap);
    276 	allocib = NULL;
    277 	allocblk = allociblk;
    278 	if (nb == 0) {
    279 		pref = ffs_blkpref_ufs1(ip, lbn, 0, (int32_t *)0);
    280 		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred,
    281 		    &newb);
    282 		if (error)
    283 			goto fail;
    284 		nb = newb;
    285 		*allocblk++ = nb;
    286 		bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0);
    287 		bp->b_blkno = fsbtodb(fs, nb);
    288 		clrbuf(bp);
    289 		if (DOINGSOFTDEP(vp)) {
    290 			softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off,
    291 			    newb, 0, fs->fs_bsize, 0, bp);
    292 			bdwrite(bp);
    293 		} else {
    294 
    295 			/*
    296 			 * Write synchronously so that indirect blocks
    297 			 * never point at garbage.
    298 			 */
    299 
    300 			if ((error = bwrite(bp)) != 0)
    301 				goto fail;
    302 		}
    303 		unwindidx = 0;
    304 		allocib = &ip->i_ffs1_ib[indirs[0].in_off];
    305 		*allocib = ufs_rw32(nb, needswap);
    306 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
    307 	}
    308 
    309 	/*
    310 	 * Fetch through the indirect blocks, allocating as necessary.
    311 	 */
    312 
    313 	for (i = 1;;) {
    314 		error = bread(vp,
    315 		    indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp);
    316 		if (error) {
    317 			brelse(bp);
    318 			goto fail;
    319 		}
    320 		bap = (int32_t *)bp->b_data;	/* XXX ondisk32 */
    321 		nb = ufs_rw32(bap[indirs[i].in_off], needswap);
    322 		if (i == num)
    323 			break;
    324 		i++;
    325 		if (nb != 0) {
    326 			brelse(bp);
    327 			continue;
    328 		}
    329 		if (pref == 0)
    330 			pref = ffs_blkpref_ufs1(ip, lbn, 0, (int32_t *)0);
    331 		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred,
    332 		    &newb);
    333 		if (error) {
    334 			brelse(bp);
    335 			goto fail;
    336 		}
    337 		nb = newb;
    338 		*allocblk++ = nb;
    339 		nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0);
    340 		nbp->b_blkno = fsbtodb(fs, nb);
    341 		clrbuf(nbp);
    342 		if (DOINGSOFTDEP(vp)) {
    343 			softdep_setup_allocindir_meta(nbp, ip, bp,
    344 			    indirs[i - 1].in_off, nb);
    345 			bdwrite(nbp);
    346 		} else {
    347 
    348 			/*
    349 			 * Write synchronously so that indirect blocks
    350 			 * never point at garbage.
    351 			 */
    352 
    353 			if ((error = bwrite(nbp)) != 0) {
    354 				brelse(bp);
    355 				goto fail;
    356 			}
    357 		}
    358 		if (unwindidx < 0)
    359 			unwindidx = i - 1;
    360 		bap[indirs[i - 1].in_off] = ufs_rw32(nb, needswap);
    361 
    362 		/*
    363 		 * If required, write synchronously, otherwise use
    364 		 * delayed write.
    365 		 */
    366 
    367 		if (flags & B_SYNC) {
    368 			bwrite(bp);
    369 		} else {
    370 			bdwrite(bp);
    371 		}
    372 	}
    373 
    374 	if (flags & B_METAONLY) {
    375 		KASSERT(bpp != NULL);
    376 		*bpp = bp;
    377 		return (0);
    378 	}
    379 
    380 	/*
    381 	 * Get the data block, allocating if necessary.
    382 	 */
    383 
    384 	if (nb == 0) {
    385 		pref = ffs_blkpref_ufs1(ip, lbn, indirs[num].in_off, &bap[0]);
    386 		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred,
    387 		    &newb);
    388 		if (error) {
    389 			brelse(bp);
    390 			goto fail;
    391 		}
    392 		nb = newb;
    393 		*allocblk++ = nb;
    394 		if (bpp != NULL) {
    395 			nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0);
    396 			nbp->b_blkno = fsbtodb(fs, nb);
    397 			if (flags & B_CLRBUF)
    398 				clrbuf(nbp);
    399 			*bpp = nbp;
    400 		}
    401 		if (DOINGSOFTDEP(vp))
    402 			softdep_setup_allocindir_page(ip, lbn, bp,
    403 			    indirs[num].in_off, nb, 0, bpp ? *bpp : NULL);
    404 		bap[indirs[num].in_off] = ufs_rw32(nb, needswap);
    405 		if (allocib == NULL && unwindidx < 0) {
    406 			unwindidx = i - 1;
    407 		}
    408 
    409 		/*
    410 		 * If required, write synchronously, otherwise use
    411 		 * delayed write.
    412 		 */
    413 
    414 		if (flags & B_SYNC) {
    415 			bwrite(bp);
    416 		} else {
    417 			bdwrite(bp);
    418 		}
    419 		return (0);
    420 	}
    421 	brelse(bp);
    422 	if (bpp != NULL) {
    423 		if (flags & B_CLRBUF) {
    424 			error = bread(vp, lbn, (int)fs->fs_bsize, NOCRED, &nbp);
    425 			if (error) {
    426 				brelse(nbp);
    427 				goto fail;
    428 			}
    429 		} else {
    430 			nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0);
    431 			nbp->b_blkno = fsbtodb(fs, nb);
    432 			clrbuf(nbp);
    433 		}
    434 		*bpp = nbp;
    435 	}
    436 	return (0);
    437 
    438 fail:
    439 	/*
    440 	 * If we have failed part way through block allocation, we
    441 	 * have to deallocate any indirect blocks that we have allocated.
    442 	 */
    443 
    444 	if (unwindidx >= 0) {
    445 
    446 		/*
    447 		 * First write out any buffers we've created to resolve their
    448 		 * softdeps.  This must be done in reverse order of creation
    449 		 * so that we resolve the dependencies in one pass.
    450 		 * Write the cylinder group buffers for these buffers too.
    451 		 */
    452 
    453 		for (i = num; i >= unwindidx; i--) {
    454 			if (i == 0) {
    455 				break;
    456 			}
    457 			bp = getblk(vp, indirs[i].in_lbn, (int)fs->fs_bsize, 0,
    458 			    0);
    459 			if (bp->b_flags & B_DELWRI) {
    460 				nb = fsbtodb(fs, cgtod(fs, dtog(fs,
    461 				    dbtofsb(fs, bp->b_blkno))));
    462 				bwrite(bp);
    463 				bp = getblk(ip->i_devvp, nb, (int)fs->fs_cgsize,
    464 				    0, 0);
    465 				if (bp->b_flags & B_DELWRI) {
    466 					bwrite(bp);
    467 				} else {
    468 					bp->b_flags |= B_INVAL;
    469 					brelse(bp);
    470 				}
    471 			} else {
    472 				bp->b_flags |= B_INVAL;
    473 				brelse(bp);
    474 			}
    475 		}
    476 		if (DOINGSOFTDEP(vp) && unwindidx == 0) {
    477 			ip->i_flag |= IN_CHANGE | IN_UPDATE;
    478 			ffs_update(vp, NULL, NULL, UPDATE_WAIT);
    479 		}
    480 
    481 		/*
    482 		 * Now that any dependencies that we created have been
    483 		 * resolved, we can undo the partial allocation.
    484 		 */
    485 
    486 		if (unwindidx == 0) {
    487 			*allocib = 0;
    488 			ip->i_flag |= IN_CHANGE | IN_UPDATE;
    489 			if (DOINGSOFTDEP(vp))
    490 				ffs_update(vp, NULL, NULL, UPDATE_WAIT);
    491 		} else {
    492 			int r;
    493 
    494 			r = bread(vp, indirs[unwindidx].in_lbn,
    495 			    (int)fs->fs_bsize, NOCRED, &bp);
    496 			if (r) {
    497 				panic("Could not unwind indirect block, error %d", r);
    498 				brelse(bp);
    499 			} else {
    500 				bap = (int32_t *)bp->b_data; /* XXX ondisk32 */
    501 				bap[indirs[unwindidx].in_off] = 0;
    502 				bwrite(bp);
    503 			}
    504 		}
    505 		for (i = unwindidx + 1; i <= num; i++) {
    506 			bp = getblk(vp, indirs[i].in_lbn, (int)fs->fs_bsize, 0,
    507 			    0);
    508 			bp->b_flags |= B_INVAL;
    509 			brelse(bp);
    510 		}
    511 	}
    512 	for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) {
    513 		ffs_blkfree(fs, ip->i_devvp, *blkp, fs->fs_bsize, ip->i_number);
    514 		deallocated += fs->fs_bsize;
    515 	}
    516 	if (deallocated) {
    517 #ifdef QUOTA
    518 		/*
    519 		 * Restore user's disk quota because allocation failed.
    520 		 */
    521 		(void)chkdq(ip, -btodb(deallocated), cred, FORCE);
    522 #endif
    523 		ip->i_ffs1_blocks -= btodb(deallocated);
    524 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
    525 	}
    526 	return (error);
    527 }
    528 
    529 static int
    530 ffs_balloc_ufs2(struct vnode *vp, off_t off, int size, kauth_cred_t cred,
    531     int flags, struct buf **bpp)
    532 {
    533 	daddr_t lbn, lastlbn;
    534 	struct buf *bp, *nbp;
    535 	struct inode *ip = VTOI(vp);
    536 	struct fs *fs = ip->i_fs;
    537 	struct indir indirs[NIADDR + 2];
    538 	daddr_t newb, pref, nb;
    539 	int64_t *bap;
    540 	int deallocated, osize, nsize, num, i, error;
    541 	daddr_t *blkp, *allocblk, allociblk[NIADDR + 1];
    542 	int64_t *allocib;
    543 	int unwindidx = -1;
    544 #ifdef FFS_EI
    545 	const int needswap = UFS_FSNEEDSWAP(fs);
    546 #endif
    547 	UVMHIST_FUNC("ffs_balloc"); UVMHIST_CALLED(ubchist);
    548 
    549 	lbn = lblkno(fs, off);
    550 	size = blkoff(fs, off) + size;
    551 	if (size > fs->fs_bsize)
    552 		panic("ffs_balloc: blk too big");
    553 	if (bpp != NULL) {
    554 		*bpp = NULL;
    555 	}
    556 	UVMHIST_LOG(ubchist, "vp %p lbn 0x%x size 0x%x", vp, lbn, size,0);
    557 
    558 	KASSERT(size <= fs->fs_bsize);
    559 	if (lbn < 0)
    560 		return (EFBIG);
    561 
    562 #ifdef notyet
    563 	/*
    564 	 * Check for allocating external data.
    565 	 */
    566 	if (flags & IO_EXT) {
    567 		if (lbn >= NXADDR)
    568 			return (EFBIG);
    569 		/*
    570 		 * If the next write will extend the data into a new block,
    571 		 * and the data is currently composed of a fragment
    572 		 * this fragment has to be extended to be a full block.
    573 		 */
    574 		lastlbn = lblkno(fs, dp->di_extsize);
    575 		if (lastlbn < lbn) {
    576 			nb = lastlbn;
    577 			osize = sblksize(fs, dp->di_extsize, nb);
    578 			if (osize < fs->fs_bsize && osize > 0) {
    579 				error = ffs_realloccg(ip, -1 - nb,
    580 				    dp->di_extb[nb],
    581 				    ffs_blkpref_ufs2(ip, lastlbn, (int)nb,
    582 				    &dp->di_extb[0]), osize,
    583 				    (int)fs->fs_bsize, cred, &bp);
    584 				if (error)
    585 					return (error);
    586 				if (DOINGSOFTDEP(vp))
    587 					softdep_setup_allocext(ip, nb,
    588 					    dbtofsb(fs, bp->b_blkno),
    589 					    dp->di_extb[nb],
    590 					    fs->fs_bsize, osize, bp);
    591 				dp->di_extsize = smalllblktosize(fs, nb + 1);
    592 				dp->di_extb[nb] = dbtofsb(fs, bp->b_blkno);
    593 				bp->b_xflags |= BX_ALTDATA;
    594 				ip->i_flag |= IN_CHANGE | IN_UPDATE;
    595 				if (flags & IO_SYNC)
    596 					bwrite(bp);
    597 				else
    598 					bawrite(bp);
    599 			}
    600 		}
    601 		/*
    602 		 * All blocks are direct blocks
    603 		 */
    604 		if (flags & BA_METAONLY)
    605 			panic("ffs_balloc_ufs2: BA_METAONLY for ext block");
    606 		nb = dp->di_extb[lbn];
    607 		if (nb != 0 && dp->di_extsize >= smalllblktosize(fs, lbn + 1)) {
    608 			error = bread(vp, -1 - lbn, fs->fs_bsize, NOCRED, &bp);
    609 			if (error) {
    610 				brelse(bp);
    611 				return (error);
    612 			}
    613 			bp->b_blkno = fsbtodb(fs, nb);
    614 			bp->b_xflags |= BX_ALTDATA;
    615 			*bpp = bp;
    616 			return (0);
    617 		}
    618 		if (nb != 0) {
    619 			/*
    620 			 * Consider need to reallocate a fragment.
    621 			 */
    622 			osize = fragroundup(fs, blkoff(fs, dp->di_extsize));
    623 			nsize = fragroundup(fs, size);
    624 			if (nsize <= osize) {
    625 				error = bread(vp, -1 - lbn, osize, NOCRED, &bp);
    626 				if (error) {
    627 					brelse(bp);
    628 					return (error);
    629 				}
    630 				bp->b_blkno = fsbtodb(fs, nb);
    631 				bp->b_xflags |= BX_ALTDATA;
    632 			} else {
    633 				error = ffs_realloccg(ip, -1 - lbn,
    634 				    dp->di_extb[lbn],
    635 				    ffs_blkpref_ufs2(ip, lbn, (int)lbn,
    636 				    &dp->di_extb[0]), osize, nsize, cred, &bp);
    637 				if (error)
    638 					return (error);
    639 				bp->b_xflags |= BX_ALTDATA;
    640 				if (DOINGSOFTDEP(vp))
    641 					softdep_setup_allocext(ip, lbn,
    642 					    dbtofsb(fs, bp->b_blkno), nb,
    643 					    nsize, osize, bp);
    644 			}
    645 		} else {
    646 			if (dp->di_extsize < smalllblktosize(fs, lbn + 1))
    647 				nsize = fragroundup(fs, size);
    648 			else
    649 				nsize = fs->fs_bsize;
    650 			error = ffs_alloc(ip, lbn,
    651 			   ffs_blkpref_ufs2(ip, lbn, (int)lbn, &dp->di_extb[0]),
    652 			   nsize, cred, &newb);
    653 			if (error)
    654 				return (error);
    655 			bp = getblk(vp, -1 - lbn, nsize, 0, 0);
    656 			bp->b_blkno = fsbtodb(fs, newb);
    657 			bp->b_xflags |= BX_ALTDATA;
    658 			if (flags & BA_CLRBUF)
    659 				vfs_bio_clrbuf(bp);
    660 			if (DOINGSOFTDEP(vp))
    661 				softdep_setup_allocext(ip, lbn, newb, 0,
    662 				    nsize, 0, bp);
    663 		}
    664 		dp->di_extb[lbn] = dbtofsb(fs, bp->b_blkno);
    665 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
    666 		*bpp = bp;
    667 		return (0);
    668 	}
    669 #endif
    670 	/*
    671 	 * If the next write will extend the file into a new block,
    672 	 * and the file is currently composed of a fragment
    673 	 * this fragment has to be extended to be a full block.
    674 	 */
    675 
    676 	lastlbn = lblkno(fs, ip->i_size);
    677 	if (lastlbn < NDADDR && lastlbn < lbn) {
    678 		nb = lastlbn;
    679 		osize = blksize(fs, ip, nb);
    680 		if (osize < fs->fs_bsize && osize > 0) {
    681 			error = ffs_realloccg(ip, nb,
    682 				    ffs_blkpref_ufs2(ip, lastlbn, nb,
    683 					&ip->i_ffs2_db[0]),
    684 				    osize, (int)fs->fs_bsize, cred, bpp, &newb);
    685 			if (error)
    686 				return (error);
    687 			if (DOINGSOFTDEP(vp))
    688 				softdep_setup_allocdirect(ip, nb, newb,
    689 				    ufs_rw64(ip->i_ffs2_db[nb], needswap),
    690 				    fs->fs_bsize, osize, bpp ? *bpp : NULL);
    691 			ip->i_size = lblktosize(fs, nb + 1);
    692 			ip->i_ffs2_size = ip->i_size;
    693 			uvm_vnp_setsize(vp, ip->i_size);
    694 			ip->i_ffs2_db[nb] = ufs_rw64(newb, needswap);
    695 			ip->i_flag |= IN_CHANGE | IN_UPDATE;
    696 			if (bpp) {
    697 				if (flags & B_SYNC)
    698 					bwrite(*bpp);
    699 				else
    700 					bawrite(*bpp);
    701 			}
    702 		}
    703 	}
    704 
    705 	/*
    706 	 * The first NDADDR blocks are direct blocks
    707 	 */
    708 
    709 	if (lbn < NDADDR) {
    710 		nb = ufs_rw64(ip->i_ffs2_db[lbn], needswap);
    711 		if (nb != 0 && ip->i_size >= lblktosize(fs, lbn + 1)) {
    712 
    713 			/*
    714 			 * The block is an already-allocated direct block
    715 			 * and the file already extends past this block,
    716 			 * thus this must be a whole block.
    717 			 * Just read the block (if requested).
    718 			 */
    719 
    720 			if (bpp != NULL) {
    721 				error = bread(vp, lbn, fs->fs_bsize, NOCRED,
    722 					      bpp);
    723 				if (error) {
    724 					brelse(*bpp);
    725 					return (error);
    726 				}
    727 			}
    728 			return (0);
    729 		}
    730 		if (nb != 0) {
    731 
    732 			/*
    733 			 * Consider need to reallocate a fragment.
    734 			 */
    735 
    736 			osize = fragroundup(fs, blkoff(fs, ip->i_size));
    737 			nsize = fragroundup(fs, size);
    738 			if (nsize <= osize) {
    739 
    740 				/*
    741 				 * The existing block is already
    742 				 * at least as big as we want.
    743 				 * Just read the block (if requested).
    744 				 */
    745 
    746 				if (bpp != NULL) {
    747 					error = bread(vp, lbn, osize, NOCRED,
    748 						      bpp);
    749 					if (error) {
    750 						brelse(*bpp);
    751 						return (error);
    752 					}
    753 				}
    754 				return 0;
    755 			} else {
    756 
    757 				/*
    758 				 * The existing block is smaller than we want,
    759 				 * grow it.
    760 				 */
    761 
    762 				error = ffs_realloccg(ip, lbn,
    763 				    ffs_blkpref_ufs2(ip, lbn, (int)lbn,
    764 					&ip->i_ffs2_db[0]), osize, nsize, cred,
    765 					bpp, &newb);
    766 				if (error)
    767 					return (error);
    768 				if (DOINGSOFTDEP(vp))
    769 					softdep_setup_allocdirect(ip, lbn,
    770 					    newb, nb, nsize, osize,
    771 					    bpp ? *bpp : NULL);
    772 			}
    773 		} else {
    774 
    775 			/*
    776 			 * the block was not previously allocated,
    777 			 * allocate a new block or fragment.
    778 			 */
    779 
    780 			if (ip->i_size < lblktosize(fs, lbn + 1))
    781 				nsize = fragroundup(fs, size);
    782 			else
    783 				nsize = fs->fs_bsize;
    784 			error = ffs_alloc(ip, lbn,
    785 			    ffs_blkpref_ufs2(ip, lbn, (int)lbn,
    786 				&ip->i_ffs2_db[0]), nsize, cred, &newb);
    787 			if (error)
    788 				return (error);
    789 			if (bpp != NULL) {
    790 				bp = getblk(vp, lbn, nsize, 0, 0);
    791 				bp->b_blkno = fsbtodb(fs, newb);
    792 				if (flags & B_CLRBUF)
    793 					clrbuf(bp);
    794 				*bpp = bp;
    795 			}
    796 			if (DOINGSOFTDEP(vp)) {
    797 				softdep_setup_allocdirect(ip, lbn, newb, 0,
    798 				    nsize, 0, bpp ? *bpp : NULL);
    799 			}
    800 		}
    801 		ip->i_ffs2_db[lbn] = ufs_rw64(newb, needswap);
    802 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
    803 		return (0);
    804 	}
    805 
    806 	/*
    807 	 * Determine the number of levels of indirection.
    808 	 */
    809 
    810 	pref = 0;
    811 	if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
    812 		return (error);
    813 
    814 	/*
    815 	 * Fetch the first indirect block allocating if necessary.
    816 	 */
    817 
    818 	--num;
    819 	nb = ufs_rw64(ip->i_ffs2_ib[indirs[0].in_off], needswap);
    820 	allocib = NULL;
    821 	allocblk = allociblk;
    822 	if (nb == 0) {
    823 		pref = ffs_blkpref_ufs2(ip, lbn, 0, (int64_t *)0);
    824 		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred,
    825 		    &newb);
    826 		if (error)
    827 			goto fail;
    828 		nb = newb;
    829 		*allocblk++ = nb;
    830 		bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0);
    831 		bp->b_blkno = fsbtodb(fs, nb);
    832 		clrbuf(bp);
    833 		if (DOINGSOFTDEP(vp)) {
    834 			softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off,
    835 			    newb, 0, fs->fs_bsize, 0, bp);
    836 			bdwrite(bp);
    837 		} else {
    838 
    839 			/*
    840 			 * Write synchronously so that indirect blocks
    841 			 * never point at garbage.
    842 			 */
    843 
    844 			if ((error = bwrite(bp)) != 0)
    845 				goto fail;
    846 		}
    847 		unwindidx = 0;
    848 		allocib = &ip->i_ffs2_ib[indirs[0].in_off];
    849 		*allocib = ufs_rw64(nb, needswap);
    850 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
    851 	}
    852 
    853 	/*
    854 	 * Fetch through the indirect blocks, allocating as necessary.
    855 	 */
    856 
    857 	for (i = 1;;) {
    858 		error = bread(vp,
    859 		    indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp);
    860 		if (error) {
    861 			brelse(bp);
    862 			goto fail;
    863 		}
    864 		bap = (int64_t *)bp->b_data;
    865 		nb = ufs_rw64(bap[indirs[i].in_off], needswap);
    866 		if (i == num)
    867 			break;
    868 		i++;
    869 		if (nb != 0) {
    870 			brelse(bp);
    871 			continue;
    872 		}
    873 		if (pref == 0)
    874 			pref = ffs_blkpref_ufs2(ip, lbn, 0, (int64_t *)0);
    875 		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred,
    876 		    &newb);
    877 		if (error) {
    878 			brelse(bp);
    879 			goto fail;
    880 		}
    881 		nb = newb;
    882 		*allocblk++ = nb;
    883 		nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0);
    884 		nbp->b_blkno = fsbtodb(fs, nb);
    885 		clrbuf(nbp);
    886 		if (DOINGSOFTDEP(vp)) {
    887 			softdep_setup_allocindir_meta(nbp, ip, bp,
    888 			    indirs[i - 1].in_off, nb);
    889 			bdwrite(nbp);
    890 		} else {
    891 
    892 			/*
    893 			 * Write synchronously so that indirect blocks
    894 			 * never point at garbage.
    895 			 */
    896 
    897 			if ((error = bwrite(nbp)) != 0) {
    898 				brelse(bp);
    899 				goto fail;
    900 			}
    901 		}
    902 		if (unwindidx < 0)
    903 			unwindidx = i - 1;
    904 		bap[indirs[i - 1].in_off] = ufs_rw64(nb, needswap);
    905 
    906 		/*
    907 		 * If required, write synchronously, otherwise use
    908 		 * delayed write.
    909 		 */
    910 
    911 		if (flags & B_SYNC) {
    912 			bwrite(bp);
    913 		} else {
    914 			bdwrite(bp);
    915 		}
    916 	}
    917 
    918 	if (flags & B_METAONLY) {
    919 		KASSERT(bpp != NULL);
    920 		*bpp = bp;
    921 		return (0);
    922 	}
    923 
    924 	/*
    925 	 * Get the data block, allocating if necessary.
    926 	 */
    927 
    928 	if (nb == 0) {
    929 		pref = ffs_blkpref_ufs2(ip, lbn, indirs[num].in_off, &bap[0]);
    930 		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred,
    931 		    &newb);
    932 		if (error) {
    933 			brelse(bp);
    934 			goto fail;
    935 		}
    936 		nb = newb;
    937 		*allocblk++ = nb;
    938 		if (bpp != NULL) {
    939 			nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0);
    940 			nbp->b_blkno = fsbtodb(fs, nb);
    941 			if (flags & B_CLRBUF)
    942 				clrbuf(nbp);
    943 			*bpp = nbp;
    944 		}
    945 		if (DOINGSOFTDEP(vp))
    946 			softdep_setup_allocindir_page(ip, lbn, bp,
    947 			    indirs[num].in_off, nb, 0, bpp ? *bpp : NULL);
    948 		bap[indirs[num].in_off] = ufs_rw64(nb, needswap);
    949 		if (allocib == NULL && unwindidx < 0) {
    950 			unwindidx = i - 1;
    951 		}
    952 
    953 		/*
    954 		 * If required, write synchronously, otherwise use
    955 		 * delayed write.
    956 		 */
    957 
    958 		if (flags & B_SYNC) {
    959 			bwrite(bp);
    960 		} else {
    961 			bdwrite(bp);
    962 		}
    963 		return (0);
    964 	}
    965 	brelse(bp);
    966 	if (bpp != NULL) {
    967 		if (flags & B_CLRBUF) {
    968 			error = bread(vp, lbn, (int)fs->fs_bsize, NOCRED, &nbp);
    969 			if (error) {
    970 				brelse(nbp);
    971 				goto fail;
    972 			}
    973 		} else {
    974 			nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0);
    975 			nbp->b_blkno = fsbtodb(fs, nb);
    976 			clrbuf(nbp);
    977 		}
    978 		*bpp = nbp;
    979 	}
    980 	return (0);
    981 
    982 fail:
    983 	/*
    984 	 * If we have failed part way through block allocation, we
    985 	 * have to deallocate any indirect blocks that we have allocated.
    986 	 */
    987 
    988 	if (unwindidx >= 0) {
    989 
    990 		/*
    991 		 * First write out any buffers we've created to resolve their
    992 		 * softdeps.  This must be done in reverse order of creation
    993 		 * so that we resolve the dependencies in one pass.
    994 		 * Write the cylinder group buffers for these buffers too.
    995 		 */
    996 
    997 		for (i = num; i >= unwindidx; i--) {
    998 			if (i == 0) {
    999 				break;
   1000 			}
   1001 			bp = getblk(vp, indirs[i].in_lbn, (int)fs->fs_bsize, 0,
   1002 			    0);
   1003 			if (bp->b_flags & B_DELWRI) {
   1004 				nb = fsbtodb(fs, cgtod(fs, dtog(fs,
   1005 				    dbtofsb(fs, bp->b_blkno))));
   1006 				bwrite(bp);
   1007 				bp = getblk(ip->i_devvp, nb, (int)fs->fs_cgsize,
   1008 				    0, 0);
   1009 				if (bp->b_flags & B_DELWRI) {
   1010 					bwrite(bp);
   1011 				} else {
   1012 					bp->b_flags |= B_INVAL;
   1013 					brelse(bp);
   1014 				}
   1015 			} else {
   1016 				bp->b_flags |= B_INVAL;
   1017 				brelse(bp);
   1018 			}
   1019 		}
   1020 		if (DOINGSOFTDEP(vp) && unwindidx == 0) {
   1021 			ip->i_flag |= IN_CHANGE | IN_UPDATE;
   1022 			ffs_update(vp, NULL, NULL, UPDATE_WAIT);
   1023 		}
   1024 
   1025 		/*
   1026 		 * Now that any dependencies that we created have been
   1027 		 * resolved, we can undo the partial allocation.
   1028 		 */
   1029 
   1030 		if (unwindidx == 0) {
   1031 			*allocib = 0;
   1032 			ip->i_flag |= IN_CHANGE | IN_UPDATE;
   1033 			if (DOINGSOFTDEP(vp))
   1034 				ffs_update(vp, NULL, NULL, UPDATE_WAIT);
   1035 		} else {
   1036 			int r;
   1037 
   1038 			r = bread(vp, indirs[unwindidx].in_lbn,
   1039 			    (int)fs->fs_bsize, NOCRED, &bp);
   1040 			if (r) {
   1041 				panic("Could not unwind indirect block, error %d", r);
   1042 				brelse(bp);
   1043 			} else {
   1044 				bap = (int64_t *)bp->b_data;
   1045 				bap[indirs[unwindidx].in_off] = 0;
   1046 				bwrite(bp);
   1047 			}
   1048 		}
   1049 		for (i = unwindidx + 1; i <= num; i++) {
   1050 			bp = getblk(vp, indirs[i].in_lbn, (int)fs->fs_bsize, 0,
   1051 			    0);
   1052 			bp->b_flags |= B_INVAL;
   1053 			brelse(bp);
   1054 		}
   1055 	}
   1056 	for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) {
   1057 		ffs_blkfree(fs, ip->i_devvp, *blkp, fs->fs_bsize, ip->i_number);
   1058 		deallocated += fs->fs_bsize;
   1059 	}
   1060 	if (deallocated) {
   1061 #ifdef QUOTA
   1062 		/*
   1063 		 * Restore user's disk quota because allocation failed.
   1064 		 */
   1065 		(void)chkdq(ip, -btodb(deallocated), cred, FORCE);
   1066 #endif
   1067 		ip->i_ffs2_blocks -= btodb(deallocated);
   1068 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
   1069 	}
   1070 	return (error);
   1071 }
   1072