Home | History | Annotate | Line # | Download | only in ffs
ffs_balloc.c revision 1.54.4.1
      1 /*	$NetBSD: ffs_balloc.c,v 1.54.4.1 2013/01/23 00:06:32 yamt Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 2002 Networks Associates Technology, Inc.
      5  * All rights reserved.
      6  *
      7  * This software was developed for the FreeBSD Project by Marshall
      8  * Kirk McKusick and Network Associates Laboratories, the Security
      9  * Research Division of Network Associates, Inc. under DARPA/SPAWAR
     10  * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS
     11  * research program
     12  *
     13  * Copyright (c) 1982, 1986, 1989, 1993
     14  *	The Regents of the University of California.  All rights reserved.
     15  *
     16  * Redistribution and use in source and binary forms, with or without
     17  * modification, are permitted provided that the following conditions
     18  * are met:
     19  * 1. Redistributions of source code must retain the above copyright
     20  *    notice, this list of conditions and the following disclaimer.
     21  * 2. Redistributions in binary form must reproduce the above copyright
     22  *    notice, this list of conditions and the following disclaimer in the
     23  *    documentation and/or other materials provided with the distribution.
     24  * 3. Neither the name of the University nor the names of its contributors
     25  *    may be used to endorse or promote products derived from this software
     26  *    without specific prior written permission.
     27  *
     28  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     29  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     30  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     31  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     32  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     33  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     34  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     35  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     36  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     37  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     38  * SUCH DAMAGE.
     39  *
     40  *	@(#)ffs_balloc.c	8.8 (Berkeley) 6/16/95
     41  */
     42 
     43 #include <sys/cdefs.h>
     44 __KERNEL_RCSID(0, "$NetBSD: ffs_balloc.c,v 1.54.4.1 2013/01/23 00:06:32 yamt Exp $");
     45 
     46 #if defined(_KERNEL_OPT)
     47 #include "opt_quota.h"
     48 #endif
     49 
     50 #include <sys/param.h>
     51 #include <sys/systm.h>
     52 #include <sys/buf.h>
     53 #include <sys/file.h>
     54 #include <sys/mount.h>
     55 #include <sys/vnode.h>
     56 #include <sys/kauth.h>
     57 #include <sys/fstrans.h>
     58 
     59 #include <ufs/ufs/quota.h>
     60 #include <ufs/ufs/ufsmount.h>
     61 #include <ufs/ufs/inode.h>
     62 #include <ufs/ufs/ufs_extern.h>
     63 #include <ufs/ufs/ufs_bswap.h>
     64 
     65 #include <ufs/ffs/fs.h>
     66 #include <ufs/ffs/ffs_extern.h>
     67 
     68 #include <uvm/uvm.h>
     69 
     70 static int ffs_balloc_ufs1(struct vnode *, off_t, int, kauth_cred_t, int,
     71     struct buf **);
     72 static int ffs_balloc_ufs2(struct vnode *, off_t, int, kauth_cred_t, int,
     73     struct buf **);
     74 
     75 /*
     76  * Balloc defines the structure of file system storage
     77  * by allocating the physical blocks on a device given
     78  * the inode and the logical block number in a file.
     79  */
     80 
     81 int
     82 ffs_balloc(struct vnode *vp, off_t off, int size, kauth_cred_t cred, int flags,
     83     struct buf **bpp)
     84 {
     85 	int error;
     86 
     87 	if (VTOI(vp)->i_fs->fs_magic == FS_UFS2_MAGIC)
     88 		error = ffs_balloc_ufs2(vp, off, size, cred, flags, bpp);
     89 	else
     90 		error = ffs_balloc_ufs1(vp, off, size, cred, flags, bpp);
     91 
     92 	if (error == 0 && bpp != NULL && (error = fscow_run(*bpp, false)) != 0)
     93 		brelse(*bpp, 0);
     94 
     95 	return error;
     96 }
     97 
     98 static int
     99 ffs_balloc_ufs1(struct vnode *vp, off_t off, int size, kauth_cred_t cred,
    100     int flags, struct buf **bpp)
    101 {
    102 	daddr_t lbn, lastlbn;
    103 	struct buf *bp, *nbp;
    104 	struct inode *ip = VTOI(vp);
    105 	struct fs *fs = ip->i_fs;
    106 	struct ufsmount *ump = ip->i_ump;
    107 	struct indir indirs[UFS_NIADDR + 2];
    108 	daddr_t newb, pref, nb;
    109 	int32_t *bap;	/* XXX ondisk32 */
    110 	int deallocated, osize, nsize, num, i, error;
    111 	int32_t *blkp, *allocblk, allociblk[UFS_NIADDR + 1];
    112 	int32_t *allocib;
    113 	int unwindidx = -1;
    114 #ifdef FFS_EI
    115 	const int needswap = UFS_FSNEEDSWAP(fs);
    116 #endif
    117 	UVMHIST_FUNC("ffs_balloc"); UVMHIST_CALLED(ubchist);
    118 
    119 	lbn = lblkno(fs, off);
    120 	size = blkoff(fs, off) + size;
    121 	if (size > fs->fs_bsize)
    122 		panic("ffs_balloc: blk too big");
    123 	if (bpp != NULL) {
    124 		*bpp = NULL;
    125 	}
    126 	UVMHIST_LOG(ubchist, "vp %p lbn 0x%x size 0x%x", vp, lbn, size,0);
    127 
    128 	if (lbn < 0)
    129 		return (EFBIG);
    130 
    131 	/*
    132 	 * If the next write will extend the file into a new block,
    133 	 * and the file is currently composed of a fragment
    134 	 * this fragment has to be extended to be a full block.
    135 	 */
    136 
    137 	lastlbn = lblkno(fs, ip->i_size);
    138 	if (lastlbn < UFS_NDADDR && lastlbn < lbn) {
    139 		nb = lastlbn;
    140 		osize = blksize(fs, ip, nb);
    141 		if (osize < fs->fs_bsize && osize > 0) {
    142 			mutex_enter(&ump->um_lock);
    143 			error = ffs_realloccg(ip, nb,
    144 				    ffs_blkpref_ufs1(ip, lastlbn, nb, flags,
    145 					&ip->i_ffs1_db[0]),
    146 				    osize, (int)fs->fs_bsize, cred, bpp, &newb);
    147 			if (error)
    148 				return (error);
    149 			ip->i_size = lblktosize(fs, nb + 1);
    150 			ip->i_ffs1_size = ip->i_size;
    151 			uvm_vnp_setsize(vp, ip->i_ffs1_size);
    152 			ip->i_ffs1_db[nb] = ufs_rw32((u_int32_t)newb, needswap);
    153 			ip->i_flag |= IN_CHANGE | IN_UPDATE;
    154 			if (bpp && *bpp) {
    155 				if (flags & B_SYNC)
    156 					bwrite(*bpp);
    157 				else
    158 					bawrite(*bpp);
    159 			}
    160 		}
    161 	}
    162 
    163 	/*
    164 	 * The first UFS_NDADDR blocks are direct blocks
    165 	 */
    166 
    167 	if (lbn < UFS_NDADDR) {
    168 		nb = ufs_rw32(ip->i_ffs1_db[lbn], needswap);
    169 		if (nb != 0 && ip->i_size >= lblktosize(fs, lbn + 1)) {
    170 
    171 			/*
    172 			 * The block is an already-allocated direct block
    173 			 * and the file already extends past this block,
    174 			 * thus this must be a whole block.
    175 			 * Just read the block (if requested).
    176 			 */
    177 
    178 			if (bpp != NULL) {
    179 				error = bread(vp, lbn, fs->fs_bsize, NOCRED,
    180 					      B_MODIFY, bpp);
    181 				if (error) {
    182 					return (error);
    183 				}
    184 			}
    185 			return (0);
    186 		}
    187 		if (nb != 0) {
    188 
    189 			/*
    190 			 * Consider need to reallocate a fragment.
    191 			 */
    192 
    193 			osize = fragroundup(fs, blkoff(fs, ip->i_size));
    194 			nsize = fragroundup(fs, size);
    195 			if (nsize <= osize) {
    196 
    197 				/*
    198 				 * The existing block is already
    199 				 * at least as big as we want.
    200 				 * Just read the block (if requested).
    201 				 */
    202 
    203 				if (bpp != NULL) {
    204 					error = bread(vp, lbn, osize, NOCRED,
    205 						      B_MODIFY, bpp);
    206 					if (error) {
    207 						return (error);
    208 					}
    209 				}
    210 				return 0;
    211 			} else {
    212 
    213 				/*
    214 				 * The existing block is smaller than we want,
    215 				 * grow it.
    216 				 */
    217 				mutex_enter(&ump->um_lock);
    218 				error = ffs_realloccg(ip, lbn,
    219 				    ffs_blkpref_ufs1(ip, lbn, (int)lbn, flags,
    220 					&ip->i_ffs1_db[0]),
    221 				    osize, nsize, cred, bpp, &newb);
    222 				if (error)
    223 					return (error);
    224 			}
    225 		} else {
    226 
    227 			/*
    228 			 * the block was not previously allocated,
    229 			 * allocate a new block or fragment.
    230 			 */
    231 
    232 			if (ip->i_size < lblktosize(fs, lbn + 1))
    233 				nsize = fragroundup(fs, size);
    234 			else
    235 				nsize = fs->fs_bsize;
    236 			mutex_enter(&ump->um_lock);
    237 			error = ffs_alloc(ip, lbn,
    238 			    ffs_blkpref_ufs1(ip, lbn, (int)lbn, flags,
    239 				&ip->i_ffs1_db[0]),
    240 			    nsize, flags, cred, &newb);
    241 			if (error)
    242 				return (error);
    243 			if (bpp != NULL) {
    244 				error = ffs_getblk(vp, lbn, fsbtodb(fs, newb),
    245 				    nsize, (flags & B_CLRBUF) != 0, bpp);
    246 				if (error)
    247 					return error;
    248 			}
    249 		}
    250 		ip->i_ffs1_db[lbn] = ufs_rw32((u_int32_t)newb, needswap);
    251 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
    252 		return (0);
    253 	}
    254 
    255 	/*
    256 	 * Determine the number of levels of indirection.
    257 	 */
    258 
    259 	pref = 0;
    260 	if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
    261 		return (error);
    262 
    263 	/*
    264 	 * Fetch the first indirect block allocating if necessary.
    265 	 */
    266 
    267 	--num;
    268 	nb = ufs_rw32(ip->i_ffs1_ib[indirs[0].in_off], needswap);
    269 	allocib = NULL;
    270 	allocblk = allociblk;
    271 	if (nb == 0) {
    272 		mutex_enter(&ump->um_lock);
    273 		pref = ffs_blkpref_ufs1(ip, lbn, 0, flags | B_METAONLY, NULL);
    274 		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
    275 		    flags | B_METAONLY, cred, &newb);
    276 		if (error)
    277 			goto fail;
    278 		nb = newb;
    279 		*allocblk++ = nb;
    280 		error = ffs_getblk(vp, indirs[1].in_lbn, fsbtodb(fs, nb),
    281 		    fs->fs_bsize, true, &bp);
    282 		if (error)
    283 			goto fail;
    284 		/*
    285 		 * Write synchronously so that indirect blocks
    286 		 * never point at garbage.
    287 		 */
    288 		if ((error = bwrite(bp)) != 0)
    289 			goto fail;
    290 		unwindidx = 0;
    291 		allocib = &ip->i_ffs1_ib[indirs[0].in_off];
    292 		*allocib = ufs_rw32(nb, needswap);
    293 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
    294 	}
    295 
    296 	/*
    297 	 * Fetch through the indirect blocks, allocating as necessary.
    298 	 */
    299 
    300 	for (i = 1;;) {
    301 		error = bread(vp,
    302 		    indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, 0, &bp);
    303 		if (error) {
    304 			goto fail;
    305 		}
    306 		bap = (int32_t *)bp->b_data;	/* XXX ondisk32 */
    307 		nb = ufs_rw32(bap[indirs[i].in_off], needswap);
    308 		if (i == num)
    309 			break;
    310 		i++;
    311 		if (nb != 0) {
    312 			brelse(bp, 0);
    313 			continue;
    314 		}
    315 		if (fscow_run(bp, true) != 0) {
    316 			brelse(bp, 0);
    317 			goto fail;
    318 		}
    319 		mutex_enter(&ump->um_lock);
    320 		/* Try to keep snapshot indirect blocks contiguous. */
    321 		if (i == num && (ip->i_flags & SF_SNAPSHOT) != 0)
    322 			pref = ffs_blkpref_ufs1(ip, lbn, indirs[i-1].in_off,
    323 			    flags | B_METAONLY, &bap[0]);
    324 		if (pref == 0)
    325 			pref = ffs_blkpref_ufs1(ip, lbn, 0, flags | B_METAONLY,
    326 			    NULL);
    327 		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
    328 		    flags | B_METAONLY, cred, &newb);
    329 		if (error) {
    330 			brelse(bp, 0);
    331 			goto fail;
    332 		}
    333 		nb = newb;
    334 		*allocblk++ = nb;
    335 		error = ffs_getblk(vp, indirs[i].in_lbn, fsbtodb(fs, nb),
    336 		    fs->fs_bsize, true, &nbp);
    337 		if (error) {
    338 			brelse(bp, 0);
    339 			goto fail;
    340 		}
    341 		/*
    342 		 * Write synchronously so that indirect blocks
    343 		 * never point at garbage.
    344 		 */
    345 		if ((error = bwrite(nbp)) != 0) {
    346 			brelse(bp, 0);
    347 			goto fail;
    348 		}
    349 		if (unwindidx < 0)
    350 			unwindidx = i - 1;
    351 		bap[indirs[i - 1].in_off] = ufs_rw32(nb, needswap);
    352 
    353 		/*
    354 		 * If required, write synchronously, otherwise use
    355 		 * delayed write.
    356 		 */
    357 
    358 		if (flags & B_SYNC) {
    359 			bwrite(bp);
    360 		} else {
    361 			bdwrite(bp);
    362 		}
    363 	}
    364 
    365 	if (flags & B_METAONLY) {
    366 		KASSERT(bpp != NULL);
    367 		*bpp = bp;
    368 		return (0);
    369 	}
    370 
    371 	/*
    372 	 * Get the data block, allocating if necessary.
    373 	 */
    374 
    375 	if (nb == 0) {
    376 		if (fscow_run(bp, true) != 0) {
    377 			brelse(bp, 0);
    378 			goto fail;
    379 		}
    380 		mutex_enter(&ump->um_lock);
    381 		pref = ffs_blkpref_ufs1(ip, lbn, indirs[num].in_off, flags,
    382 		    &bap[0]);
    383 		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, flags, cred,
    384 		    &newb);
    385 		if (error) {
    386 			brelse(bp, 0);
    387 			goto fail;
    388 		}
    389 		nb = newb;
    390 		*allocblk++ = nb;
    391 		if (bpp != NULL) {
    392 			error = ffs_getblk(vp, lbn, fsbtodb(fs, nb),
    393 			    fs->fs_bsize, (flags & B_CLRBUF) != 0, bpp);
    394 			if (error) {
    395 				brelse(bp, 0);
    396 				goto fail;
    397 			}
    398 		}
    399 		bap[indirs[num].in_off] = ufs_rw32(nb, needswap);
    400 		if (allocib == NULL && unwindidx < 0) {
    401 			unwindidx = i - 1;
    402 		}
    403 
    404 		/*
    405 		 * If required, write synchronously, otherwise use
    406 		 * delayed write.
    407 		 */
    408 
    409 		if (flags & B_SYNC) {
    410 			bwrite(bp);
    411 		} else {
    412 			bdwrite(bp);
    413 		}
    414 		return (0);
    415 	}
    416 	brelse(bp, 0);
    417 	if (bpp != NULL) {
    418 		if (flags & B_CLRBUF) {
    419 			error = bread(vp, lbn, (int)fs->fs_bsize,
    420 			    NOCRED, B_MODIFY, &nbp);
    421 			if (error) {
    422 				goto fail;
    423 			}
    424 		} else {
    425 			error = ffs_getblk(vp, lbn, fsbtodb(fs, nb),
    426 			    fs->fs_bsize, true, &nbp);
    427 			if (error)
    428 				goto fail;
    429 		}
    430 		*bpp = nbp;
    431 	}
    432 	return (0);
    433 
    434 fail:
    435 	/*
    436 	 * If we have failed part way through block allocation, we
    437 	 * have to deallocate any indirect blocks that we have allocated.
    438 	 */
    439 
    440 	if (unwindidx >= 0) {
    441 
    442 		/*
    443 		 * First write out any buffers we've created to resolve their
    444 		 * softdeps.  This must be done in reverse order of creation
    445 		 * so that we resolve the dependencies in one pass.
    446 		 * Write the cylinder group buffers for these buffers too.
    447 		 */
    448 
    449 		for (i = num; i >= unwindidx; i--) {
    450 			if (i == 0) {
    451 				break;
    452 			}
    453 			if (ffs_getblk(vp, indirs[i].in_lbn, FFS_NOBLK,
    454 			    fs->fs_bsize, false, &bp) != 0)
    455 				continue;
    456 			if (bp->b_oflags & BO_DELWRI) {
    457 				nb = fsbtodb(fs, cgtod(fs, dtog(fs,
    458 				    dbtofsb(fs, bp->b_blkno))));
    459 				bwrite(bp);
    460 				if (ffs_getblk(ip->i_devvp, nb, FFS_NOBLK,
    461 				    fs->fs_cgsize, false, &bp) != 0)
    462 					continue;
    463 				if (bp->b_oflags & BO_DELWRI) {
    464 					bwrite(bp);
    465 				} else {
    466 					brelse(bp, BC_INVAL);
    467 				}
    468 			} else {
    469 				brelse(bp, BC_INVAL);
    470 			}
    471 		}
    472 
    473 		/*
    474 		 * Undo the partial allocation.
    475 		 */
    476 		if (unwindidx == 0) {
    477 			*allocib = 0;
    478 			ip->i_flag |= IN_CHANGE | IN_UPDATE;
    479 		} else {
    480 			int r;
    481 
    482 			r = bread(vp, indirs[unwindidx].in_lbn,
    483 			    (int)fs->fs_bsize, NOCRED, 0, &bp);
    484 			if (r) {
    485 				panic("Could not unwind indirect block, error %d", r);
    486 			} else {
    487 				bap = (int32_t *)bp->b_data; /* XXX ondisk32 */
    488 				bap[indirs[unwindidx].in_off] = 0;
    489 				bwrite(bp);
    490 			}
    491 		}
    492 		for (i = unwindidx + 1; i <= num; i++) {
    493 			if (ffs_getblk(vp, indirs[i].in_lbn, FFS_NOBLK,
    494 			    fs->fs_bsize, false, &bp) == 0)
    495 				brelse(bp, BC_INVAL);
    496 		}
    497 	}
    498 	for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) {
    499 		ffs_blkfree(fs, ip->i_devvp, *blkp, fs->fs_bsize, ip->i_number);
    500 		deallocated += fs->fs_bsize;
    501 	}
    502 	if (deallocated) {
    503 #if defined(QUOTA) || defined(QUOTA2)
    504 		/*
    505 		 * Restore user's disk quota because allocation failed.
    506 		 */
    507 		(void)chkdq(ip, -btodb(deallocated), cred, FORCE);
    508 #endif
    509 		ip->i_ffs1_blocks -= btodb(deallocated);
    510 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
    511 	}
    512 	return (error);
    513 }
    514 
    515 static int
    516 ffs_balloc_ufs2(struct vnode *vp, off_t off, int size, kauth_cred_t cred,
    517     int flags, struct buf **bpp)
    518 {
    519 	daddr_t lbn, lastlbn;
    520 	struct buf *bp, *nbp;
    521 	struct inode *ip = VTOI(vp);
    522 	struct fs *fs = ip->i_fs;
    523 	struct ufsmount *ump = ip->i_ump;
    524 	struct indir indirs[UFS_NIADDR + 2];
    525 	daddr_t newb, pref, nb;
    526 	int64_t *bap;
    527 	int deallocated, osize, nsize, num, i, error;
    528 	daddr_t *blkp, *allocblk, allociblk[UFS_NIADDR + 1];
    529 	int64_t *allocib;
    530 	int unwindidx = -1;
    531 #ifdef FFS_EI
    532 	const int needswap = UFS_FSNEEDSWAP(fs);
    533 #endif
    534 	UVMHIST_FUNC("ffs_balloc"); UVMHIST_CALLED(ubchist);
    535 
    536 	lbn = lblkno(fs, off);
    537 	size = blkoff(fs, off) + size;
    538 	if (size > fs->fs_bsize)
    539 		panic("ffs_balloc: blk too big");
    540 	if (bpp != NULL) {
    541 		*bpp = NULL;
    542 	}
    543 	UVMHIST_LOG(ubchist, "vp %p lbn 0x%x size 0x%x", vp, lbn, size,0);
    544 
    545 	if (lbn < 0)
    546 		return (EFBIG);
    547 
    548 #ifdef notyet
    549 	/*
    550 	 * Check for allocating external data.
    551 	 */
    552 	if (flags & IO_EXT) {
    553 		if (lbn >= UFS_NXADDR)
    554 			return (EFBIG);
    555 		/*
    556 		 * If the next write will extend the data into a new block,
    557 		 * and the data is currently composed of a fragment
    558 		 * this fragment has to be extended to be a full block.
    559 		 */
    560 		lastlbn = lblkno(fs, dp->di_extsize);
    561 		if (lastlbn < lbn) {
    562 			nb = lastlbn;
    563 			osize = sblksize(fs, dp->di_extsize, nb);
    564 			if (osize < fs->fs_bsize && osize > 0) {
    565 				mutex_enter(&ump->um_lock);
    566 				error = ffs_realloccg(ip, -1 - nb,
    567 				    dp->di_extb[nb],
    568 				    ffs_blkpref_ufs2(ip, lastlbn, (int)nb,
    569 					flags, &dp->di_extb[0]),
    570 				    osize,
    571 				    (int)fs->fs_bsize, cred, &bp);
    572 				if (error)
    573 					return (error);
    574 				dp->di_extsize = smalllblktosize(fs, nb + 1);
    575 				dp->di_extb[nb] = dbtofsb(fs, bp->b_blkno);
    576 				bp->b_xflags |= BX_ALTDATA;
    577 				ip->i_flag |= IN_CHANGE | IN_UPDATE;
    578 				if (flags & IO_SYNC)
    579 					bwrite(bp);
    580 				else
    581 					bawrite(bp);
    582 			}
    583 		}
    584 		/*
    585 		 * All blocks are direct blocks
    586 		 */
    587 		if (flags & BA_METAONLY)
    588 			panic("ffs_balloc_ufs2: BA_METAONLY for ext block");
    589 		nb = dp->di_extb[lbn];
    590 		if (nb != 0 && dp->di_extsize >= smalllblktosize(fs, lbn + 1)) {
    591 			error = bread(vp, -1 - lbn, fs->fs_bsize,
    592 			    NOCRED, 0, &bp);
    593 			if (error) {
    594 				return (error);
    595 			}
    596 			mutex_enter(&bp->b_interlock);
    597 			bp->b_blkno = fsbtodb(fs, nb);
    598 			bp->b_xflags |= BX_ALTDATA;
    599 			mutex_exit(&bp->b_interlock);
    600 			*bpp = bp;
    601 			return (0);
    602 		}
    603 		if (nb != 0) {
    604 			/*
    605 			 * Consider need to reallocate a fragment.
    606 			 */
    607 			osize = fragroundup(fs, blkoff(fs, dp->di_extsize));
    608 			nsize = fragroundup(fs, size);
    609 			if (nsize <= osize) {
    610 				error = bread(vp, -1 - lbn, osize,
    611 				    NOCRED, 0, &bp);
    612 				if (error) {
    613 					return (error);
    614 				}
    615 				mutex_enter(&bp->b_interlock);
    616 				bp->b_blkno = fsbtodb(fs, nb);
    617 				bp->b_xflags |= BX_ALTDATA;
    618 				mutex_exit(&bp->b_interlock);
    619 			} else {
    620 				mutex_enter(&ump->um_lock);
    621 				error = ffs_realloccg(ip, -1 - lbn,
    622 				    dp->di_extb[lbn],
    623 				    ffs_blkpref_ufs2(ip, lbn, (int)lbn, flags,
    624 				        &dp->di_extb[0]),
    625 				    osize, nsize, cred, &bp);
    626 				if (error)
    627 					return (error);
    628 				bp->b_xflags |= BX_ALTDATA;
    629 			}
    630 		} else {
    631 			if (dp->di_extsize < smalllblktosize(fs, lbn + 1))
    632 				nsize = fragroundup(fs, size);
    633 			else
    634 				nsize = fs->fs_bsize;
    635 			mutex_enter(&ump->um_lock);
    636 			error = ffs_alloc(ip, lbn,
    637 			   ffs_blkpref_ufs2(ip, lbn, (int)lbn, flags,
    638 			       &dp->di_extb[0]),
    639 			   nsize, flags, cred, &newb);
    640 			if (error)
    641 				return (error);
    642 			error = ffs_getblk(vp, -1 - lbn, fsbtodb(fs, newb),
    643 			    nsize, (flags & BA_CLRBUF) != 0, &bp);
    644 			if (error)
    645 				return error;
    646 			bp->b_xflags |= BX_ALTDATA;
    647 		}
    648 		dp->di_extb[lbn] = dbtofsb(fs, bp->b_blkno);
    649 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
    650 		*bpp = bp;
    651 		return (0);
    652 	}
    653 #endif
    654 	/*
    655 	 * If the next write will extend the file into a new block,
    656 	 * and the file is currently composed of a fragment
    657 	 * this fragment has to be extended to be a full block.
    658 	 */
    659 
    660 	lastlbn = lblkno(fs, ip->i_size);
    661 	if (lastlbn < UFS_NDADDR && lastlbn < lbn) {
    662 		nb = lastlbn;
    663 		osize = blksize(fs, ip, nb);
    664 		if (osize < fs->fs_bsize && osize > 0) {
    665 			mutex_enter(&ump->um_lock);
    666 			error = ffs_realloccg(ip, nb,
    667 				    ffs_blkpref_ufs2(ip, lastlbn, nb, flags,
    668 					&ip->i_ffs2_db[0]),
    669 				    osize, (int)fs->fs_bsize, cred, bpp, &newb);
    670 			if (error)
    671 				return (error);
    672 			ip->i_size = lblktosize(fs, nb + 1);
    673 			ip->i_ffs2_size = ip->i_size;
    674 			uvm_vnp_setsize(vp, ip->i_size);
    675 			ip->i_ffs2_db[nb] = ufs_rw64(newb, needswap);
    676 			ip->i_flag |= IN_CHANGE | IN_UPDATE;
    677 			if (bpp) {
    678 				if (flags & B_SYNC)
    679 					bwrite(*bpp);
    680 				else
    681 					bawrite(*bpp);
    682 			}
    683 		}
    684 	}
    685 
    686 	/*
    687 	 * The first UFS_NDADDR blocks are direct blocks
    688 	 */
    689 
    690 	if (lbn < UFS_NDADDR) {
    691 		nb = ufs_rw64(ip->i_ffs2_db[lbn], needswap);
    692 		if (nb != 0 && ip->i_size >= lblktosize(fs, lbn + 1)) {
    693 
    694 			/*
    695 			 * The block is an already-allocated direct block
    696 			 * and the file already extends past this block,
    697 			 * thus this must be a whole block.
    698 			 * Just read the block (if requested).
    699 			 */
    700 
    701 			if (bpp != NULL) {
    702 				error = bread(vp, lbn, fs->fs_bsize, NOCRED,
    703 					      B_MODIFY, bpp);
    704 				if (error) {
    705 					return (error);
    706 				}
    707 			}
    708 			return (0);
    709 		}
    710 		if (nb != 0) {
    711 
    712 			/*
    713 			 * Consider need to reallocate a fragment.
    714 			 */
    715 
    716 			osize = fragroundup(fs, blkoff(fs, ip->i_size));
    717 			nsize = fragroundup(fs, size);
    718 			if (nsize <= osize) {
    719 
    720 				/*
    721 				 * The existing block is already
    722 				 * at least as big as we want.
    723 				 * Just read the block (if requested).
    724 				 */
    725 
    726 				if (bpp != NULL) {
    727 					error = bread(vp, lbn, osize, NOCRED,
    728 						      B_MODIFY, bpp);
    729 					if (error) {
    730 						return (error);
    731 					}
    732 				}
    733 				return 0;
    734 			} else {
    735 
    736 				/*
    737 				 * The existing block is smaller than we want,
    738 				 * grow it.
    739 				 */
    740 				mutex_enter(&ump->um_lock);
    741 				error = ffs_realloccg(ip, lbn,
    742 				    ffs_blkpref_ufs2(ip, lbn, (int)lbn, flags,
    743 					&ip->i_ffs2_db[0]),
    744 				    osize, nsize, cred, bpp, &newb);
    745 				if (error)
    746 					return (error);
    747 			}
    748 		} else {
    749 
    750 			/*
    751 			 * the block was not previously allocated,
    752 			 * allocate a new block or fragment.
    753 			 */
    754 
    755 			if (ip->i_size < lblktosize(fs, lbn + 1))
    756 				nsize = fragroundup(fs, size);
    757 			else
    758 				nsize = fs->fs_bsize;
    759 			mutex_enter(&ump->um_lock);
    760 			error = ffs_alloc(ip, lbn,
    761 			    ffs_blkpref_ufs2(ip, lbn, (int)lbn, flags,
    762 				&ip->i_ffs2_db[0]),
    763 			    nsize, flags, cred, &newb);
    764 			if (error)
    765 				return (error);
    766 			if (bpp != NULL) {
    767 				error = ffs_getblk(vp, lbn, fsbtodb(fs, newb),
    768 				    nsize, (flags & B_CLRBUF) != 0, bpp);
    769 				if (error)
    770 					return error;
    771 			}
    772 		}
    773 		ip->i_ffs2_db[lbn] = ufs_rw64(newb, needswap);
    774 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
    775 		return (0);
    776 	}
    777 
    778 	/*
    779 	 * Determine the number of levels of indirection.
    780 	 */
    781 
    782 	pref = 0;
    783 	if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
    784 		return (error);
    785 
    786 	/*
    787 	 * Fetch the first indirect block allocating if necessary.
    788 	 */
    789 
    790 	--num;
    791 	nb = ufs_rw64(ip->i_ffs2_ib[indirs[0].in_off], needswap);
    792 	allocib = NULL;
    793 	allocblk = allociblk;
    794 	if (nb == 0) {
    795 		mutex_enter(&ump->um_lock);
    796 		pref = ffs_blkpref_ufs2(ip, lbn, 0, flags | B_METAONLY, NULL);
    797 		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
    798 		    flags | B_METAONLY, cred, &newb);
    799 		if (error)
    800 			goto fail;
    801 		nb = newb;
    802 		*allocblk++ = nb;
    803 		error = ffs_getblk(vp, indirs[1].in_lbn, fsbtodb(fs, nb),
    804 		    fs->fs_bsize, true, &bp);
    805 		if (error)
    806 			goto fail;
    807 		/*
    808 		 * Write synchronously so that indirect blocks
    809 		 * never point at garbage.
    810 		 */
    811 		if ((error = bwrite(bp)) != 0)
    812 			goto fail;
    813 		unwindidx = 0;
    814 		allocib = &ip->i_ffs2_ib[indirs[0].in_off];
    815 		*allocib = ufs_rw64(nb, needswap);
    816 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
    817 	}
    818 
    819 	/*
    820 	 * Fetch through the indirect blocks, allocating as necessary.
    821 	 */
    822 
    823 	for (i = 1;;) {
    824 		error = bread(vp,
    825 		    indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, 0, &bp);
    826 		if (error) {
    827 			goto fail;
    828 		}
    829 		bap = (int64_t *)bp->b_data;
    830 		nb = ufs_rw64(bap[indirs[i].in_off], needswap);
    831 		if (i == num)
    832 			break;
    833 		i++;
    834 		if (nb != 0) {
    835 			brelse(bp, 0);
    836 			continue;
    837 		}
    838 		if (fscow_run(bp, true) != 0) {
    839 			brelse(bp, 0);
    840 			goto fail;
    841 		}
    842 		mutex_enter(&ump->um_lock);
    843 		/* Try to keep snapshot indirect blocks contiguous. */
    844 		if (i == num && (ip->i_flags & SF_SNAPSHOT) != 0)
    845 			pref = ffs_blkpref_ufs2(ip, lbn, indirs[i-1].in_off,
    846 			    flags | B_METAONLY, &bap[0]);
    847 		if (pref == 0)
    848 			pref = ffs_blkpref_ufs2(ip, lbn, 0, flags | B_METAONLY,
    849 			    NULL);
    850 		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
    851 		    flags | B_METAONLY, cred, &newb);
    852 		if (error) {
    853 			brelse(bp, 0);
    854 			goto fail;
    855 		}
    856 		nb = newb;
    857 		*allocblk++ = nb;
    858 		error = ffs_getblk(vp, indirs[i].in_lbn, fsbtodb(fs, nb),
    859 		    fs->fs_bsize, true, &nbp);
    860 		if (error) {
    861 			brelse(bp, 0);
    862 			goto fail;
    863 		}
    864 		/*
    865 		 * Write synchronously so that indirect blocks
    866 		 * never point at garbage.
    867 		 */
    868 		if ((error = bwrite(nbp)) != 0) {
    869 			brelse(bp, 0);
    870 			goto fail;
    871 		}
    872 		if (unwindidx < 0)
    873 			unwindidx = i - 1;
    874 		bap[indirs[i - 1].in_off] = ufs_rw64(nb, needswap);
    875 
    876 		/*
    877 		 * If required, write synchronously, otherwise use
    878 		 * delayed write.
    879 		 */
    880 
    881 		if (flags & B_SYNC) {
    882 			bwrite(bp);
    883 		} else {
    884 			bdwrite(bp);
    885 		}
    886 	}
    887 
    888 	if (flags & B_METAONLY) {
    889 		KASSERT(bpp != NULL);
    890 		*bpp = bp;
    891 		return (0);
    892 	}
    893 
    894 	/*
    895 	 * Get the data block, allocating if necessary.
    896 	 */
    897 
    898 	if (nb == 0) {
    899 		if (fscow_run(bp, true) != 0) {
    900 			brelse(bp, 0);
    901 			goto fail;
    902 		}
    903 		mutex_enter(&ump->um_lock);
    904 		pref = ffs_blkpref_ufs2(ip, lbn, indirs[num].in_off, flags,
    905 		    &bap[0]);
    906 		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, flags, cred,
    907 		    &newb);
    908 		if (error) {
    909 			brelse(bp, 0);
    910 			goto fail;
    911 		}
    912 		nb = newb;
    913 		*allocblk++ = nb;
    914 		if (bpp != NULL) {
    915 			error = ffs_getblk(vp, lbn, fsbtodb(fs, nb),
    916 			    fs->fs_bsize, (flags & B_CLRBUF) != 0, bpp);
    917 			if (error) {
    918 				brelse(bp, 0);
    919 				goto fail;
    920 			}
    921 		}
    922 		bap[indirs[num].in_off] = ufs_rw64(nb, needswap);
    923 		if (allocib == NULL && unwindidx < 0) {
    924 			unwindidx = i - 1;
    925 		}
    926 
    927 		/*
    928 		 * If required, write synchronously, otherwise use
    929 		 * delayed write.
    930 		 */
    931 
    932 		if (flags & B_SYNC) {
    933 			bwrite(bp);
    934 		} else {
    935 			bdwrite(bp);
    936 		}
    937 		return (0);
    938 	}
    939 	brelse(bp, 0);
    940 	if (bpp != NULL) {
    941 		if (flags & B_CLRBUF) {
    942 			error = bread(vp, lbn, (int)fs->fs_bsize,
    943 			    NOCRED, B_MODIFY, &nbp);
    944 			if (error) {
    945 				goto fail;
    946 			}
    947 		} else {
    948 			error = ffs_getblk(vp, lbn, fsbtodb(fs, nb),
    949 			    fs->fs_bsize, true, &nbp);
    950 			if (error)
    951 				goto fail;
    952 		}
    953 		*bpp = nbp;
    954 	}
    955 	return (0);
    956 
    957 fail:
    958 	/*
    959 	 * If we have failed part way through block allocation, we
    960 	 * have to deallocate any indirect blocks that we have allocated.
    961 	 */
    962 
    963 	if (unwindidx >= 0) {
    964 
    965 		/*
    966 		 * First write out any buffers we've created to resolve their
    967 		 * softdeps.  This must be done in reverse order of creation
    968 		 * so that we resolve the dependencies in one pass.
    969 		 * Write the cylinder group buffers for these buffers too.
    970 		 */
    971 
    972 		for (i = num; i >= unwindidx; i--) {
    973 			if (i == 0) {
    974 				break;
    975 			}
    976 			if (ffs_getblk(vp, indirs[i].in_lbn, FFS_NOBLK,
    977 			    fs->fs_bsize, false, &bp) != 0)
    978 				continue;
    979 			if (bp->b_oflags & BO_DELWRI) {
    980 				nb = fsbtodb(fs, cgtod(fs, dtog(fs,
    981 				    dbtofsb(fs, bp->b_blkno))));
    982 				bwrite(bp);
    983 				if (ffs_getblk(ip->i_devvp, nb, FFS_NOBLK,
    984 				    fs->fs_cgsize, false, &bp) != 0)
    985 					continue;
    986 				if (bp->b_oflags & BO_DELWRI) {
    987 					bwrite(bp);
    988 				} else {
    989 					brelse(bp, BC_INVAL);
    990 				}
    991 			} else {
    992 				brelse(bp, BC_INVAL);
    993 			}
    994 		}
    995 
    996 		/*
    997 		 * Now that any dependencies that we created have been
    998 		 * resolved, we can undo the partial allocation.
    999 		 */
   1000 
   1001 		if (unwindidx == 0) {
   1002 			*allocib = 0;
   1003 			ip->i_flag |= IN_CHANGE | IN_UPDATE;
   1004 		} else {
   1005 			int r;
   1006 
   1007 			r = bread(vp, indirs[unwindidx].in_lbn,
   1008 			    (int)fs->fs_bsize, NOCRED, 0, &bp);
   1009 			if (r) {
   1010 				panic("Could not unwind indirect block, error %d", r);
   1011 			} else {
   1012 				bap = (int64_t *)bp->b_data;
   1013 				bap[indirs[unwindidx].in_off] = 0;
   1014 				bwrite(bp);
   1015 			}
   1016 		}
   1017 		for (i = unwindidx + 1; i <= num; i++) {
   1018 			if (ffs_getblk(vp, indirs[i].in_lbn, FFS_NOBLK,
   1019 			    fs->fs_bsize, false, &bp) == 0)
   1020 				brelse(bp, BC_INVAL);
   1021 		}
   1022 	}
   1023 	for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) {
   1024 		ffs_blkfree(fs, ip->i_devvp, *blkp, fs->fs_bsize, ip->i_number);
   1025 		deallocated += fs->fs_bsize;
   1026 	}
   1027 	if (deallocated) {
   1028 #if defined(QUOTA) || defined(QUOTA2)
   1029 		/*
   1030 		 * Restore user's disk quota because allocation failed.
   1031 		 */
   1032 		(void)chkdq(ip, -btodb(deallocated), cred, FORCE);
   1033 #endif
   1034 		ip->i_ffs2_blocks -= btodb(deallocated);
   1035 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
   1036 	}
   1037 
   1038 	return (error);
   1039 }
   1040