Home | History | Annotate | Line # | Download | only in ffs
ffs_balloc.c revision 1.48.12.1
      1 /*	$NetBSD: ffs_balloc.c,v 1.48.12.1 2008/06/23 04:32:05 wrstuden Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 2002 Networks Associates Technology, Inc.
      5  * All rights reserved.
      6  *
      7  * This software was developed for the FreeBSD Project by Marshall
      8  * Kirk McKusick and Network Associates Laboratories, the Security
      9  * Research Division of Network Associates, Inc. under DARPA/SPAWAR
     10  * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS
     11  * research program
     12  *
     13  * Copyright (c) 1982, 1986, 1989, 1993
     14  *	The Regents of the University of California.  All rights reserved.
     15  *
     16  * Redistribution and use in source and binary forms, with or without
     17  * modification, are permitted provided that the following conditions
     18  * are met:
     19  * 1. Redistributions of source code must retain the above copyright
     20  *    notice, this list of conditions and the following disclaimer.
     21  * 2. Redistributions in binary form must reproduce the above copyright
     22  *    notice, this list of conditions and the following disclaimer in the
     23  *    documentation and/or other materials provided with the distribution.
     24  * 3. Neither the name of the University nor the names of its contributors
     25  *    may be used to endorse or promote products derived from this software
     26  *    without specific prior written permission.
     27  *
     28  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     29  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     30  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     31  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     32  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     33  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     34  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     35  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     36  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     37  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     38  * SUCH DAMAGE.
     39  *
     40  *	@(#)ffs_balloc.c	8.8 (Berkeley) 6/16/95
     41  */
     42 
     43 #include <sys/cdefs.h>
     44 __KERNEL_RCSID(0, "$NetBSD: ffs_balloc.c,v 1.48.12.1 2008/06/23 04:32:05 wrstuden Exp $");
     45 
     46 #if defined(_KERNEL_OPT)
     47 #include "opt_quota.h"
     48 #endif
     49 
     50 #include <sys/param.h>
     51 #include <sys/systm.h>
     52 #include <sys/buf.h>
     53 #include <sys/file.h>
     54 #include <sys/mount.h>
     55 #include <sys/vnode.h>
     56 #include <sys/kauth.h>
     57 #include <sys/fstrans.h>
     58 
     59 #include <ufs/ufs/quota.h>
     60 #include <ufs/ufs/ufsmount.h>
     61 #include <ufs/ufs/inode.h>
     62 #include <ufs/ufs/ufs_extern.h>
     63 #include <ufs/ufs/ufs_bswap.h>
     64 
     65 #include <ufs/ffs/fs.h>
     66 #include <ufs/ffs/ffs_extern.h>
     67 
     68 #include <uvm/uvm.h>
     69 
     70 static int ffs_balloc_ufs1(struct vnode *, off_t, int, kauth_cred_t, int,
     71     struct buf **);
     72 static int ffs_balloc_ufs2(struct vnode *, off_t, int, kauth_cred_t, int,
     73     struct buf **);
     74 
     75 /*
     76  * Balloc defines the structure of file system storage
     77  * by allocating the physical blocks on a device given
     78  * the inode and the logical block number in a file.
     79  */
     80 
     81 int
     82 ffs_balloc(struct vnode *vp, off_t off, int size, kauth_cred_t cred, int flags,
     83     struct buf **bpp)
     84 {
     85 	int error;
     86 
     87 	if (VTOI(vp)->i_fs->fs_magic == FS_UFS2_MAGIC)
     88 		error = ffs_balloc_ufs2(vp, off, size, cred, flags, bpp);
     89 	else
     90 		error = ffs_balloc_ufs1(vp, off, size, cred, flags, bpp);
     91 
     92 	if (error == 0 && bpp != NULL && (error = fscow_run(*bpp, false)) != 0)
     93 		brelse(*bpp, 0);
     94 
     95 	return error;
     96 }
     97 
     98 static int
     99 ffs_balloc_ufs1(struct vnode *vp, off_t off, int size, kauth_cred_t cred,
    100     int flags, struct buf **bpp)
    101 {
    102 	daddr_t lbn, lastlbn;
    103 	struct buf *bp, *nbp;
    104 	struct inode *ip = VTOI(vp);
    105 	struct fs *fs = ip->i_fs;
    106 	struct ufsmount *ump = ip->i_ump;
    107 	struct indir indirs[NIADDR + 2];
    108 	daddr_t newb, pref, nb;
    109 	int32_t *bap;	/* XXX ondisk32 */
    110 	int deallocated, osize, nsize, num, i, error;
    111 	int32_t *blkp, *allocblk, allociblk[NIADDR + 1];
    112 	int32_t *allocib;
    113 	int unwindidx = -1;
    114 #ifdef FFS_EI
    115 	const int needswap = UFS_FSNEEDSWAP(fs);
    116 #endif
    117 	UVMHIST_FUNC("ffs_balloc"); UVMHIST_CALLED(ubchist);
    118 
    119 	lbn = lblkno(fs, off);
    120 	size = blkoff(fs, off) + size;
    121 	if (size > fs->fs_bsize)
    122 		panic("ffs_balloc: blk too big");
    123 	if (bpp != NULL) {
    124 		*bpp = NULL;
    125 	}
    126 	UVMHIST_LOG(ubchist, "vp %p lbn 0x%x size 0x%x", vp, lbn, size,0);
    127 
    128 	if (lbn < 0)
    129 		return (EFBIG);
    130 
    131 	/*
    132 	 * If the next write will extend the file into a new block,
    133 	 * and the file is currently composed of a fragment
    134 	 * this fragment has to be extended to be a full block.
    135 	 */
    136 
    137 	lastlbn = lblkno(fs, ip->i_size);
    138 	if (lastlbn < NDADDR && lastlbn < lbn) {
    139 		nb = lastlbn;
    140 		osize = blksize(fs, ip, nb);
    141 		if (osize < fs->fs_bsize && osize > 0) {
    142 			mutex_enter(&ump->um_lock);
    143 			error = ffs_realloccg(ip, nb,
    144 				    ffs_blkpref_ufs1(ip, lastlbn, nb,
    145 					&ip->i_ffs1_db[0]),
    146 				    osize, (int)fs->fs_bsize, cred, bpp, &newb);
    147 			if (error)
    148 				return (error);
    149 			if (DOINGSOFTDEP(vp))
    150 				softdep_setup_allocdirect(ip, nb, newb,
    151 				    ufs_rw32(ip->i_ffs1_db[nb], needswap),
    152 				    fs->fs_bsize, osize, bpp ? *bpp : NULL);
    153 			ip->i_size = lblktosize(fs, nb + 1);
    154 			ip->i_ffs1_size = ip->i_size;
    155 			uvm_vnp_setsize(vp, ip->i_ffs1_size);
    156 			ip->i_ffs1_db[nb] = ufs_rw32((u_int32_t)newb, needswap);
    157 			ip->i_flag |= IN_CHANGE | IN_UPDATE;
    158 			if (bpp && *bpp) {
    159 				if (flags & B_SYNC)
    160 					bwrite(*bpp);
    161 				else
    162 					bawrite(*bpp);
    163 			}
    164 		}
    165 	}
    166 
    167 	/*
    168 	 * The first NDADDR blocks are direct blocks
    169 	 */
    170 
    171 	if (lbn < NDADDR) {
    172 		nb = ufs_rw32(ip->i_ffs1_db[lbn], needswap);
    173 		if (nb != 0 && ip->i_size >= lblktosize(fs, lbn + 1)) {
    174 
    175 			/*
    176 			 * The block is an already-allocated direct block
    177 			 * and the file already extends past this block,
    178 			 * thus this must be a whole block.
    179 			 * Just read the block (if requested).
    180 			 */
    181 
    182 			if (bpp != NULL) {
    183 				error = bread(vp, lbn, fs->fs_bsize, NOCRED,
    184 					      B_MODIFY, bpp);
    185 				if (error) {
    186 					brelse(*bpp, 0);
    187 					return (error);
    188 				}
    189 			}
    190 			return (0);
    191 		}
    192 		if (nb != 0) {
    193 
    194 			/*
    195 			 * Consider need to reallocate a fragment.
    196 			 */
    197 
    198 			osize = fragroundup(fs, blkoff(fs, ip->i_size));
    199 			nsize = fragroundup(fs, size);
    200 			if (nsize <= osize) {
    201 
    202 				/*
    203 				 * The existing block is already
    204 				 * at least as big as we want.
    205 				 * Just read the block (if requested).
    206 				 */
    207 
    208 				if (bpp != NULL) {
    209 					error = bread(vp, lbn, osize, NOCRED,
    210 						      B_MODIFY, bpp);
    211 					if (error) {
    212 						brelse(*bpp, 0);
    213 						return (error);
    214 					}
    215 				}
    216 				return 0;
    217 			} else {
    218 
    219 				/*
    220 				 * The existing block is smaller than we want,
    221 				 * grow it.
    222 				 */
    223 				mutex_enter(&ump->um_lock);
    224 				error = ffs_realloccg(ip, lbn,
    225 				    ffs_blkpref_ufs1(ip, lbn, (int)lbn,
    226 					&ip->i_ffs1_db[0]), osize, nsize, cred,
    227 					bpp, &newb);
    228 				if (error)
    229 					return (error);
    230 				if (DOINGSOFTDEP(vp))
    231 					softdep_setup_allocdirect(ip, lbn,
    232 					    newb, nb, nsize, osize,
    233 					    bpp ? *bpp : NULL);
    234 			}
    235 		} else {
    236 
    237 			/*
    238 			 * the block was not previously allocated,
    239 			 * allocate a new block or fragment.
    240 			 */
    241 
    242 			if (ip->i_size < lblktosize(fs, lbn + 1))
    243 				nsize = fragroundup(fs, size);
    244 			else
    245 				nsize = fs->fs_bsize;
    246 			mutex_enter(&ump->um_lock);
    247 			error = ffs_alloc(ip, lbn,
    248 			    ffs_blkpref_ufs1(ip, lbn, (int)lbn,
    249 				&ip->i_ffs1_db[0]),
    250 				nsize, cred, &newb);
    251 			if (error)
    252 				return (error);
    253 			if (bpp != NULL) {
    254 				error = ffs_getblk(vp, lbn, fsbtodb(fs, newb),
    255 				    nsize, (flags & B_CLRBUF) != 0, bpp);
    256 				if (error)
    257 					return error;
    258 			}
    259 			if (DOINGSOFTDEP(vp)) {
    260 				softdep_setup_allocdirect(ip, lbn, newb, 0,
    261 				    nsize, 0, bpp ? *bpp : NULL);
    262 			}
    263 		}
    264 		ip->i_ffs1_db[lbn] = ufs_rw32((u_int32_t)newb, needswap);
    265 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
    266 		return (0);
    267 	}
    268 
    269 	/*
    270 	 * Determine the number of levels of indirection.
    271 	 */
    272 
    273 	pref = 0;
    274 	if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
    275 		return (error);
    276 
    277 	/*
    278 	 * Fetch the first indirect block allocating if necessary.
    279 	 */
    280 
    281 	--num;
    282 	nb = ufs_rw32(ip->i_ffs1_ib[indirs[0].in_off], needswap);
    283 	allocib = NULL;
    284 	allocblk = allociblk;
    285 	if (nb == 0) {
    286 		mutex_enter(&ump->um_lock);
    287 		pref = ffs_blkpref_ufs1(ip, lbn, 0, (int32_t *)0);
    288 		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred,
    289 		    &newb);
    290 		if (error)
    291 			goto fail;
    292 		nb = newb;
    293 		*allocblk++ = nb;
    294 		error = ffs_getblk(vp, indirs[1].in_lbn, fsbtodb(fs, nb),
    295 		    fs->fs_bsize, true, &bp);
    296 		if (error)
    297 			goto fail;
    298 		if (DOINGSOFTDEP(vp)) {
    299 			softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off,
    300 			    newb, 0, fs->fs_bsize, 0, bp);
    301 			bdwrite(bp);
    302 		} else {
    303 
    304 			/*
    305 			 * Write synchronously so that indirect blocks
    306 			 * never point at garbage.
    307 			 */
    308 
    309 			if ((error = bwrite(bp)) != 0)
    310 				goto fail;
    311 		}
    312 		unwindidx = 0;
    313 		allocib = &ip->i_ffs1_ib[indirs[0].in_off];
    314 		*allocib = ufs_rw32(nb, needswap);
    315 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
    316 	}
    317 
    318 	/*
    319 	 * Fetch through the indirect blocks, allocating as necessary.
    320 	 */
    321 
    322 	for (i = 1;;) {
    323 		error = bread(vp,
    324 		    indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, 0, &bp);
    325 		if (error) {
    326 			brelse(bp, 0);
    327 			goto fail;
    328 		}
    329 		bap = (int32_t *)bp->b_data;	/* XXX ondisk32 */
    330 		nb = ufs_rw32(bap[indirs[i].in_off], needswap);
    331 		if (i == num)
    332 			break;
    333 		i++;
    334 		if (nb != 0) {
    335 			brelse(bp, 0);
    336 			continue;
    337 		}
    338 		if (fscow_run(bp, true) != 0) {
    339 			brelse(bp, 0);
    340 			goto fail;
    341 		}
    342 		mutex_enter(&ump->um_lock);
    343 		if (pref == 0)
    344 			pref = ffs_blkpref_ufs1(ip, lbn, 0, (int32_t *)0);
    345 		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred,
    346 		    &newb);
    347 		if (error) {
    348 			brelse(bp, 0);
    349 			goto fail;
    350 		}
    351 		nb = newb;
    352 		*allocblk++ = nb;
    353 		error = ffs_getblk(vp, indirs[i].in_lbn, fsbtodb(fs, nb),
    354 		    fs->fs_bsize, true, &nbp);
    355 		if (error) {
    356 			brelse(bp, 0);
    357 			goto fail;
    358 		}
    359 		if (DOINGSOFTDEP(vp)) {
    360 			softdep_setup_allocindir_meta(nbp, ip, bp,
    361 			    indirs[i - 1].in_off, nb);
    362 			bdwrite(nbp);
    363 		} else {
    364 
    365 			/*
    366 			 * Write synchronously so that indirect blocks
    367 			 * never point at garbage.
    368 			 */
    369 
    370 			if ((error = bwrite(nbp)) != 0) {
    371 				brelse(bp, 0);
    372 				goto fail;
    373 			}
    374 		}
    375 		if (unwindidx < 0)
    376 			unwindidx = i - 1;
    377 		bap[indirs[i - 1].in_off] = ufs_rw32(nb, needswap);
    378 
    379 		/*
    380 		 * If required, write synchronously, otherwise use
    381 		 * delayed write.
    382 		 */
    383 
    384 		if (flags & B_SYNC) {
    385 			bwrite(bp);
    386 		} else {
    387 			bdwrite(bp);
    388 		}
    389 	}
    390 
    391 	if (flags & B_METAONLY) {
    392 		KASSERT(bpp != NULL);
    393 		*bpp = bp;
    394 		return (0);
    395 	}
    396 
    397 	/*
    398 	 * Get the data block, allocating if necessary.
    399 	 */
    400 
    401 	if (nb == 0) {
    402 		if (fscow_run(bp, true) != 0) {
    403 			brelse(bp, 0);
    404 			goto fail;
    405 		}
    406 		mutex_enter(&ump->um_lock);
    407 		pref = ffs_blkpref_ufs1(ip, lbn, indirs[num].in_off, &bap[0]);
    408 		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred,
    409 		    &newb);
    410 		if (error) {
    411 			brelse(bp, 0);
    412 			goto fail;
    413 		}
    414 		nb = newb;
    415 		*allocblk++ = nb;
    416 		if (bpp != NULL) {
    417 			error = ffs_getblk(vp, lbn, fsbtodb(fs, nb),
    418 			    fs->fs_bsize, (flags & B_CLRBUF) != 0, bpp);
    419 			if (error) {
    420 				brelse(bp, 0);
    421 				goto fail;
    422 			}
    423 		}
    424 		if (DOINGSOFTDEP(vp))
    425 			softdep_setup_allocindir_page(ip, lbn, bp,
    426 			    indirs[num].in_off, nb, 0, bpp ? *bpp : NULL);
    427 		bap[indirs[num].in_off] = ufs_rw32(nb, needswap);
    428 		if (allocib == NULL && unwindidx < 0) {
    429 			unwindidx = i - 1;
    430 		}
    431 
    432 		/*
    433 		 * If required, write synchronously, otherwise use
    434 		 * delayed write.
    435 		 */
    436 
    437 		if (flags & B_SYNC) {
    438 			bwrite(bp);
    439 		} else {
    440 			bdwrite(bp);
    441 		}
    442 		return (0);
    443 	}
    444 	brelse(bp, 0);
    445 	if (bpp != NULL) {
    446 		if (flags & B_CLRBUF) {
    447 			error = bread(vp, lbn, (int)fs->fs_bsize,
    448 			    NOCRED, B_MODIFY, &nbp);
    449 			if (error) {
    450 				brelse(nbp, 0);
    451 				goto fail;
    452 			}
    453 		} else {
    454 			error = ffs_getblk(vp, lbn, fsbtodb(fs, nb),
    455 			    fs->fs_bsize, true, &nbp);
    456 			if (error)
    457 				goto fail;
    458 		}
    459 		*bpp = nbp;
    460 	}
    461 	return (0);
    462 
    463 fail:
    464 	/*
    465 	 * If we have failed part way through block allocation, we
    466 	 * have to deallocate any indirect blocks that we have allocated.
    467 	 */
    468 
    469 	if (unwindidx >= 0) {
    470 
    471 		/*
    472 		 * First write out any buffers we've created to resolve their
    473 		 * softdeps.  This must be done in reverse order of creation
    474 		 * so that we resolve the dependencies in one pass.
    475 		 * Write the cylinder group buffers for these buffers too.
    476 		 */
    477 
    478 		for (i = num; i >= unwindidx; i--) {
    479 			if (i == 0) {
    480 				break;
    481 			}
    482 			if (ffs_getblk(vp, indirs[i].in_lbn, FFS_NOBLK,
    483 			    fs->fs_bsize, false, &bp) != 0)
    484 				continue;
    485 			if (bp->b_oflags & BO_DELWRI) {
    486 				nb = fsbtodb(fs, cgtod(fs, dtog(fs,
    487 				    dbtofsb(fs, bp->b_blkno))));
    488 				bwrite(bp);
    489 				if (ffs_getblk(ip->i_devvp, nb, FFS_NOBLK,
    490 				    fs->fs_cgsize, false, &bp) != 0)
    491 					continue;
    492 				if (bp->b_oflags & BO_DELWRI) {
    493 					bwrite(bp);
    494 				} else {
    495 					brelse(bp, BC_INVAL);
    496 				}
    497 			} else {
    498 				brelse(bp, BC_INVAL);
    499 			}
    500 		}
    501 
    502 		/* Now flush all dependencies to disk. */
    503 #ifdef notyet
    504 		/* XXX pages locked */
    505 		(void)softdep_sync_metadata(vp);
    506 #endif
    507 
    508 		if (DOINGSOFTDEP(vp) && unwindidx == 0) {
    509 			ip->i_flag |= IN_CHANGE | IN_UPDATE;
    510 			ffs_update(vp, NULL, NULL, UPDATE_WAIT);
    511 		}
    512 
    513 		/*
    514 		 * Now that any dependencies that we created have been
    515 		 * resolved, we can undo the partial allocation.
    516 		 */
    517 
    518 		if (unwindidx == 0) {
    519 			*allocib = 0;
    520 			ip->i_flag |= IN_CHANGE | IN_UPDATE;
    521 			if (DOINGSOFTDEP(vp))
    522 				ffs_update(vp, NULL, NULL, UPDATE_WAIT);
    523 		} else {
    524 			int r;
    525 
    526 			r = bread(vp, indirs[unwindidx].in_lbn,
    527 			    (int)fs->fs_bsize, NOCRED, 0, &bp);
    528 			if (r) {
    529 				panic("Could not unwind indirect block, error %d", r);
    530 				brelse(bp, 0);
    531 			} else {
    532 				bap = (int32_t *)bp->b_data; /* XXX ondisk32 */
    533 				bap[indirs[unwindidx].in_off] = 0;
    534 				bwrite(bp);
    535 			}
    536 		}
    537 		for (i = unwindidx + 1; i <= num; i++) {
    538 			if (ffs_getblk(vp, indirs[i].in_lbn, FFS_NOBLK,
    539 			    fs->fs_bsize, false, &bp) == 0)
    540 				brelse(bp, BC_INVAL);
    541 		}
    542 	}
    543 	for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) {
    544 		ffs_blkfree(fs, ip->i_devvp, *blkp, fs->fs_bsize, ip->i_number);
    545 		deallocated += fs->fs_bsize;
    546 	}
    547 	if (deallocated) {
    548 #ifdef QUOTA
    549 		/*
    550 		 * Restore user's disk quota because allocation failed.
    551 		 */
    552 		(void)chkdq(ip, -btodb(deallocated), cred, FORCE);
    553 #endif
    554 		ip->i_ffs1_blocks -= btodb(deallocated);
    555 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
    556 	}
    557 	/*
    558 	 * Flush all dependencies again so that the soft updates code
    559 	 * doesn't find any untracked changes.
    560 	 */
    561 #ifdef notyet
    562 	/* XXX pages locked */
    563 	(void)softdep_sync_metadata(vp);
    564 #endif
    565 	return (error);
    566 }
    567 
    568 static int
    569 ffs_balloc_ufs2(struct vnode *vp, off_t off, int size, kauth_cred_t cred,
    570     int flags, struct buf **bpp)
    571 {
    572 	daddr_t lbn, lastlbn;
    573 	struct buf *bp, *nbp;
    574 	struct inode *ip = VTOI(vp);
    575 	struct fs *fs = ip->i_fs;
    576 	struct ufsmount *ump = ip->i_ump;
    577 	struct indir indirs[NIADDR + 2];
    578 	daddr_t newb, pref, nb;
    579 	int64_t *bap;
    580 	int deallocated, osize, nsize, num, i, error;
    581 	daddr_t *blkp, *allocblk, allociblk[NIADDR + 1];
    582 	int64_t *allocib;
    583 	int unwindidx = -1;
    584 #ifdef FFS_EI
    585 	const int needswap = UFS_FSNEEDSWAP(fs);
    586 #endif
    587 	UVMHIST_FUNC("ffs_balloc"); UVMHIST_CALLED(ubchist);
    588 
    589 	lbn = lblkno(fs, off);
    590 	size = blkoff(fs, off) + size;
    591 	if (size > fs->fs_bsize)
    592 		panic("ffs_balloc: blk too big");
    593 	if (bpp != NULL) {
    594 		*bpp = NULL;
    595 	}
    596 	UVMHIST_LOG(ubchist, "vp %p lbn 0x%x size 0x%x", vp, lbn, size,0);
    597 
    598 	if (lbn < 0)
    599 		return (EFBIG);
    600 
    601 #ifdef notyet
    602 	/*
    603 	 * Check for allocating external data.
    604 	 */
    605 	if (flags & IO_EXT) {
    606 		if (lbn >= NXADDR)
    607 			return (EFBIG);
    608 		/*
    609 		 * If the next write will extend the data into a new block,
    610 		 * and the data is currently composed of a fragment
    611 		 * this fragment has to be extended to be a full block.
    612 		 */
    613 		lastlbn = lblkno(fs, dp->di_extsize);
    614 		if (lastlbn < lbn) {
    615 			nb = lastlbn;
    616 			osize = sblksize(fs, dp->di_extsize, nb);
    617 			if (osize < fs->fs_bsize && osize > 0) {
    618 				mutex_enter(&ump->um_lock);
    619 				error = ffs_realloccg(ip, -1 - nb,
    620 				    dp->di_extb[nb],
    621 				    ffs_blkpref_ufs2(ip, lastlbn, (int)nb,
    622 				    &dp->di_extb[0]), osize,
    623 				    (int)fs->fs_bsize, cred, &bp);
    624 				if (error)
    625 					return (error);
    626 				if (DOINGSOFTDEP(vp))
    627 					softdep_setup_allocext(ip, nb,
    628 					    dbtofsb(fs, bp->b_blkno),
    629 					    dp->di_extb[nb],
    630 					    fs->fs_bsize, osize, bp);
    631 				dp->di_extsize = smalllblktosize(fs, nb + 1);
    632 				dp->di_extb[nb] = dbtofsb(fs, bp->b_blkno);
    633 				bp->b_xflags |= BX_ALTDATA;
    634 				ip->i_flag |= IN_CHANGE | IN_UPDATE;
    635 				if (flags & IO_SYNC)
    636 					bwrite(bp);
    637 				else
    638 					bawrite(bp);
    639 			}
    640 		}
    641 		/*
    642 		 * All blocks are direct blocks
    643 		 */
    644 		if (flags & BA_METAONLY)
    645 			panic("ffs_balloc_ufs2: BA_METAONLY for ext block");
    646 		nb = dp->di_extb[lbn];
    647 		if (nb != 0 && dp->di_extsize >= smalllblktosize(fs, lbn + 1)) {
    648 			error = bread(vp, -1 - lbn, fs->fs_bsize,
    649 			    NOCRED, 0, &bp);
    650 			if (error) {
    651 				brelse(bp, 0);
    652 				return (error);
    653 			}
    654 			mutex_enter(&bp->b_interlock);
    655 			bp->b_blkno = fsbtodb(fs, nb);
    656 			bp->b_xflags |= BX_ALTDATA;
    657 			mutex_exit(&bp->b_interlock);
    658 			*bpp = bp;
    659 			return (0);
    660 		}
    661 		if (nb != 0) {
    662 			/*
    663 			 * Consider need to reallocate a fragment.
    664 			 */
    665 			osize = fragroundup(fs, blkoff(fs, dp->di_extsize));
    666 			nsize = fragroundup(fs, size);
    667 			if (nsize <= osize) {
    668 				error = bread(vp, -1 - lbn, osize,
    669 				    NOCRED, 0, &bp);
    670 				if (error) {
    671 					brelse(bp, 0);
    672 					return (error);
    673 				}
    674 				mutex_enter(&bp->b_interlock);
    675 				bp->b_blkno = fsbtodb(fs, nb);
    676 				bp->b_xflags |= BX_ALTDATA;
    677 				mutex_exit(&bp->b_interlock);
    678 			} else {
    679 				mutex_enter(&ump->um_lock);
    680 				error = ffs_realloccg(ip, -1 - lbn,
    681 				    dp->di_extb[lbn],
    682 				    ffs_blkpref_ufs2(ip, lbn, (int)lbn,
    683 				    &dp->di_extb[0]), osize, nsize, cred, &bp);
    684 				if (error)
    685 					return (error);
    686 				bp->b_xflags |= BX_ALTDATA;
    687 				if (DOINGSOFTDEP(vp))
    688 					softdep_setup_allocext(ip, lbn,
    689 					    dbtofsb(fs, bp->b_blkno), nb,
    690 					    nsize, osize, bp);
    691 			}
    692 		} else {
    693 			if (dp->di_extsize < smalllblktosize(fs, lbn + 1))
    694 				nsize = fragroundup(fs, size);
    695 			else
    696 				nsize = fs->fs_bsize;
    697 			mutex_enter(&ump->um_lock);
    698 			error = ffs_alloc(ip, lbn,
    699 			   ffs_blkpref_ufs2(ip, lbn, (int)lbn, &dp->di_extb[0]),
    700 			   nsize, cred, &newb);
    701 			if (error)
    702 				return (error);
    703 			error = ffs_getblk(vp, -1 - lbn, fsbtodb(fs, newb),
    704 			    nsize, (flags & BA_CLRBUF) != 0, &bp);
    705 			if (error)
    706 				return error;
    707 			bp->b_xflags |= BX_ALTDATA;
    708 			if (DOINGSOFTDEP(vp))
    709 				softdep_setup_allocext(ip, lbn, newb, 0,
    710 				    nsize, 0, bp);
    711 		}
    712 		dp->di_extb[lbn] = dbtofsb(fs, bp->b_blkno);
    713 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
    714 		*bpp = bp;
    715 		return (0);
    716 	}
    717 #endif
    718 	/*
    719 	 * If the next write will extend the file into a new block,
    720 	 * and the file is currently composed of a fragment
    721 	 * this fragment has to be extended to be a full block.
    722 	 */
    723 
    724 	lastlbn = lblkno(fs, ip->i_size);
    725 	if (lastlbn < NDADDR && lastlbn < lbn) {
    726 		nb = lastlbn;
    727 		osize = blksize(fs, ip, nb);
    728 		if (osize < fs->fs_bsize && osize > 0) {
    729 			mutex_enter(&ump->um_lock);
    730 			error = ffs_realloccg(ip, nb,
    731 				    ffs_blkpref_ufs2(ip, lastlbn, nb,
    732 					&ip->i_ffs2_db[0]),
    733 				    osize, (int)fs->fs_bsize, cred, bpp, &newb);
    734 			if (error)
    735 				return (error);
    736 			if (DOINGSOFTDEP(vp))
    737 				softdep_setup_allocdirect(ip, nb, newb,
    738 				    ufs_rw64(ip->i_ffs2_db[nb], needswap),
    739 				    fs->fs_bsize, osize, bpp ? *bpp : NULL);
    740 			ip->i_size = lblktosize(fs, nb + 1);
    741 			ip->i_ffs2_size = ip->i_size;
    742 			uvm_vnp_setsize(vp, ip->i_size);
    743 			ip->i_ffs2_db[nb] = ufs_rw64(newb, needswap);
    744 			ip->i_flag |= IN_CHANGE | IN_UPDATE;
    745 			if (bpp) {
    746 				if (flags & B_SYNC)
    747 					bwrite(*bpp);
    748 				else
    749 					bawrite(*bpp);
    750 			}
    751 		}
    752 	}
    753 
    754 	/*
    755 	 * The first NDADDR blocks are direct blocks
    756 	 */
    757 
    758 	if (lbn < NDADDR) {
    759 		nb = ufs_rw64(ip->i_ffs2_db[lbn], needswap);
    760 		if (nb != 0 && ip->i_size >= lblktosize(fs, lbn + 1)) {
    761 
    762 			/*
    763 			 * The block is an already-allocated direct block
    764 			 * and the file already extends past this block,
    765 			 * thus this must be a whole block.
    766 			 * Just read the block (if requested).
    767 			 */
    768 
    769 			if (bpp != NULL) {
    770 				error = bread(vp, lbn, fs->fs_bsize, NOCRED,
    771 					      B_MODIFY, bpp);
    772 				if (error) {
    773 					brelse(*bpp, 0);
    774 					return (error);
    775 				}
    776 			}
    777 			return (0);
    778 		}
    779 		if (nb != 0) {
    780 
    781 			/*
    782 			 * Consider need to reallocate a fragment.
    783 			 */
    784 
    785 			osize = fragroundup(fs, blkoff(fs, ip->i_size));
    786 			nsize = fragroundup(fs, size);
    787 			if (nsize <= osize) {
    788 
    789 				/*
    790 				 * The existing block is already
    791 				 * at least as big as we want.
    792 				 * Just read the block (if requested).
    793 				 */
    794 
    795 				if (bpp != NULL) {
    796 					error = bread(vp, lbn, osize, NOCRED,
    797 						      B_MODIFY, bpp);
    798 					if (error) {
    799 						brelse(*bpp, 0);
    800 						return (error);
    801 					}
    802 				}
    803 				return 0;
    804 			} else {
    805 
    806 				/*
    807 				 * The existing block is smaller than we want,
    808 				 * grow it.
    809 				 */
    810 				mutex_enter(&ump->um_lock);
    811 				error = ffs_realloccg(ip, lbn,
    812 				    ffs_blkpref_ufs2(ip, lbn, (int)lbn,
    813 					&ip->i_ffs2_db[0]), osize, nsize, cred,
    814 					bpp, &newb);
    815 				if (error)
    816 					return (error);
    817 				if (DOINGSOFTDEP(vp))
    818 					softdep_setup_allocdirect(ip, lbn,
    819 					    newb, nb, nsize, osize,
    820 					    bpp ? *bpp : NULL);
    821 			}
    822 		} else {
    823 
    824 			/*
    825 			 * the block was not previously allocated,
    826 			 * allocate a new block or fragment.
    827 			 */
    828 
    829 			if (ip->i_size < lblktosize(fs, lbn + 1))
    830 				nsize = fragroundup(fs, size);
    831 			else
    832 				nsize = fs->fs_bsize;
    833 			mutex_enter(&ump->um_lock);
    834 			error = ffs_alloc(ip, lbn,
    835 			    ffs_blkpref_ufs2(ip, lbn, (int)lbn,
    836 				&ip->i_ffs2_db[0]), nsize, cred, &newb);
    837 			if (error)
    838 				return (error);
    839 			if (bpp != NULL) {
    840 				error = ffs_getblk(vp, lbn, fsbtodb(fs, newb),
    841 				    nsize, (flags & B_CLRBUF) != 0, bpp);
    842 				if (error)
    843 					return error;
    844 			}
    845 			if (DOINGSOFTDEP(vp)) {
    846 				softdep_setup_allocdirect(ip, lbn, newb, 0,
    847 				    nsize, 0, bpp ? *bpp : NULL);
    848 			}
    849 		}
    850 		ip->i_ffs2_db[lbn] = ufs_rw64(newb, needswap);
    851 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
    852 		return (0);
    853 	}
    854 
    855 	/*
    856 	 * Determine the number of levels of indirection.
    857 	 */
    858 
    859 	pref = 0;
    860 	if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
    861 		return (error);
    862 
    863 	/*
    864 	 * Fetch the first indirect block allocating if necessary.
    865 	 */
    866 
    867 	--num;
    868 	nb = ufs_rw64(ip->i_ffs2_ib[indirs[0].in_off], needswap);
    869 	allocib = NULL;
    870 	allocblk = allociblk;
    871 	if (nb == 0) {
    872 		mutex_enter(&ump->um_lock);
    873 		pref = ffs_blkpref_ufs2(ip, lbn, 0, (int64_t *)0);
    874 		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred,
    875 		    &newb);
    876 		if (error)
    877 			goto fail;
    878 		nb = newb;
    879 		*allocblk++ = nb;
    880 		error = ffs_getblk(vp, indirs[1].in_lbn, fsbtodb(fs, nb),
    881 		    fs->fs_bsize, true, &bp);
    882 		if (error)
    883 			goto fail;
    884 		if (DOINGSOFTDEP(vp)) {
    885 			softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off,
    886 			    newb, 0, fs->fs_bsize, 0, bp);
    887 			bdwrite(bp);
    888 		} else {
    889 
    890 			/*
    891 			 * Write synchronously so that indirect blocks
    892 			 * never point at garbage.
    893 			 */
    894 
    895 			if ((error = bwrite(bp)) != 0)
    896 				goto fail;
    897 		}
    898 		unwindidx = 0;
    899 		allocib = &ip->i_ffs2_ib[indirs[0].in_off];
    900 		*allocib = ufs_rw64(nb, needswap);
    901 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
    902 	}
    903 
    904 	/*
    905 	 * Fetch through the indirect blocks, allocating as necessary.
    906 	 */
    907 
    908 	for (i = 1;;) {
    909 		error = bread(vp,
    910 		    indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, 0, &bp);
    911 		if (error) {
    912 			brelse(bp, 0);
    913 			goto fail;
    914 		}
    915 		bap = (int64_t *)bp->b_data;
    916 		nb = ufs_rw64(bap[indirs[i].in_off], needswap);
    917 		if (i == num)
    918 			break;
    919 		i++;
    920 		if (nb != 0) {
    921 			brelse(bp, 0);
    922 			continue;
    923 		}
    924 		if (fscow_run(bp, true) != 0) {
    925 			brelse(bp, 0);
    926 			goto fail;
    927 		}
    928 		mutex_enter(&ump->um_lock);
    929 		if (pref == 0)
    930 			pref = ffs_blkpref_ufs2(ip, lbn, 0, (int64_t *)0);
    931 		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred,
    932 		    &newb);
    933 		if (error) {
    934 			brelse(bp, 0);
    935 			goto fail;
    936 		}
    937 		nb = newb;
    938 		*allocblk++ = nb;
    939 		error = ffs_getblk(vp, indirs[i].in_lbn, fsbtodb(fs, nb),
    940 		    fs->fs_bsize, true, &nbp);
    941 		if (error) {
    942 			brelse(bp, 0);
    943 			goto fail;
    944 		}
    945 		if (DOINGSOFTDEP(vp)) {
    946 			softdep_setup_allocindir_meta(nbp, ip, bp,
    947 			    indirs[i - 1].in_off, nb);
    948 			bdwrite(nbp);
    949 		} else {
    950 
    951 			/*
    952 			 * Write synchronously so that indirect blocks
    953 			 * never point at garbage.
    954 			 */
    955 
    956 			if ((error = bwrite(nbp)) != 0) {
    957 				brelse(bp, 0);
    958 				goto fail;
    959 			}
    960 		}
    961 		if (unwindidx < 0)
    962 			unwindidx = i - 1;
    963 		bap[indirs[i - 1].in_off] = ufs_rw64(nb, needswap);
    964 
    965 		/*
    966 		 * If required, write synchronously, otherwise use
    967 		 * delayed write.
    968 		 */
    969 
    970 		if (flags & B_SYNC) {
    971 			bwrite(bp);
    972 		} else {
    973 			bdwrite(bp);
    974 		}
    975 	}
    976 
    977 	if (flags & B_METAONLY) {
    978 		KASSERT(bpp != NULL);
    979 		*bpp = bp;
    980 		return (0);
    981 	}
    982 
    983 	/*
    984 	 * Get the data block, allocating if necessary.
    985 	 */
    986 
    987 	if (nb == 0) {
    988 		if (fscow_run(bp, true) != 0) {
    989 			brelse(bp, 0);
    990 			goto fail;
    991 		}
    992 		mutex_enter(&ump->um_lock);
    993 		pref = ffs_blkpref_ufs2(ip, lbn, indirs[num].in_off, &bap[0]);
    994 		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred,
    995 		    &newb);
    996 		if (error) {
    997 			brelse(bp, 0);
    998 			goto fail;
    999 		}
   1000 		nb = newb;
   1001 		*allocblk++ = nb;
   1002 		if (bpp != NULL) {
   1003 			error = ffs_getblk(vp, lbn, fsbtodb(fs, nb),
   1004 			    fs->fs_bsize, (flags & B_CLRBUF) != 0, bpp);
   1005 			if (error) {
   1006 				brelse(bp, 0);
   1007 				goto fail;
   1008 			}
   1009 		}
   1010 		if (DOINGSOFTDEP(vp))
   1011 			softdep_setup_allocindir_page(ip, lbn, bp,
   1012 			    indirs[num].in_off, nb, 0, bpp ? *bpp : NULL);
   1013 		bap[indirs[num].in_off] = ufs_rw64(nb, needswap);
   1014 		if (allocib == NULL && unwindidx < 0) {
   1015 			unwindidx = i - 1;
   1016 		}
   1017 
   1018 		/*
   1019 		 * If required, write synchronously, otherwise use
   1020 		 * delayed write.
   1021 		 */
   1022 
   1023 		if (flags & B_SYNC) {
   1024 			bwrite(bp);
   1025 		} else {
   1026 			bdwrite(bp);
   1027 		}
   1028 		return (0);
   1029 	}
   1030 	brelse(bp, 0);
   1031 	if (bpp != NULL) {
   1032 		if (flags & B_CLRBUF) {
   1033 			error = bread(vp, lbn, (int)fs->fs_bsize,
   1034 			    NOCRED, B_MODIFY, &nbp);
   1035 			if (error) {
   1036 				brelse(nbp, 0);
   1037 				goto fail;
   1038 			}
   1039 		} else {
   1040 			error = ffs_getblk(vp, lbn, fsbtodb(fs, nb),
   1041 			    fs->fs_bsize, true, &nbp);
   1042 			if (error)
   1043 				goto fail;
   1044 		}
   1045 		*bpp = nbp;
   1046 	}
   1047 	return (0);
   1048 
   1049 fail:
   1050 	/*
   1051 	 * If we have failed part way through block allocation, we
   1052 	 * have to deallocate any indirect blocks that we have allocated.
   1053 	 */
   1054 
   1055 	if (unwindidx >= 0) {
   1056 
   1057 		/*
   1058 		 * First write out any buffers we've created to resolve their
   1059 		 * softdeps.  This must be done in reverse order of creation
   1060 		 * so that we resolve the dependencies in one pass.
   1061 		 * Write the cylinder group buffers for these buffers too.
   1062 		 */
   1063 
   1064 		for (i = num; i >= unwindidx; i--) {
   1065 			if (i == 0) {
   1066 				break;
   1067 			}
   1068 			if (ffs_getblk(vp, indirs[i].in_lbn, FFS_NOBLK,
   1069 			    fs->fs_bsize, false, &bp) != 0)
   1070 				continue;
   1071 			if (bp->b_oflags & BO_DELWRI) {
   1072 				nb = fsbtodb(fs, cgtod(fs, dtog(fs,
   1073 				    dbtofsb(fs, bp->b_blkno))));
   1074 				bwrite(bp);
   1075 				if (ffs_getblk(ip->i_devvp, nb, FFS_NOBLK,
   1076 				    fs->fs_cgsize, false, &bp) != 0)
   1077 					continue;
   1078 				if (bp->b_oflags & BO_DELWRI) {
   1079 					bwrite(bp);
   1080 				} else {
   1081 					brelse(bp, BC_INVAL);
   1082 				}
   1083 			} else {
   1084 				brelse(bp, BC_INVAL);
   1085 			}
   1086 		}
   1087 
   1088 		/* Now flush the dependencies to disk. */
   1089 #ifdef notyet
   1090 		/* XXX pages locked */
   1091 		(void)softdep_sync_metadata(vp);
   1092 #endif
   1093 
   1094 		if (DOINGSOFTDEP(vp) && unwindidx == 0) {
   1095 			ip->i_flag |= IN_CHANGE | IN_UPDATE;
   1096 			ffs_update(vp, NULL, NULL, UPDATE_WAIT);
   1097 		}
   1098 
   1099 		/*
   1100 		 * Now that any dependencies that we created have been
   1101 		 * resolved, we can undo the partial allocation.
   1102 		 */
   1103 
   1104 		if (unwindidx == 0) {
   1105 			*allocib = 0;
   1106 			ip->i_flag |= IN_CHANGE | IN_UPDATE;
   1107 			if (DOINGSOFTDEP(vp))
   1108 				ffs_update(vp, NULL, NULL, UPDATE_WAIT);
   1109 		} else {
   1110 			int r;
   1111 
   1112 			r = bread(vp, indirs[unwindidx].in_lbn,
   1113 			    (int)fs->fs_bsize, NOCRED, 0, &bp);
   1114 			if (r) {
   1115 				panic("Could not unwind indirect block, error %d", r);
   1116 				brelse(bp, 0);
   1117 			} else {
   1118 				bap = (int64_t *)bp->b_data;
   1119 				bap[indirs[unwindidx].in_off] = 0;
   1120 				bwrite(bp);
   1121 			}
   1122 		}
   1123 		for (i = unwindidx + 1; i <= num; i++) {
   1124 			if (ffs_getblk(vp, indirs[i].in_lbn, FFS_NOBLK,
   1125 			    fs->fs_bsize, false, &bp) == 0)
   1126 				brelse(bp, BC_INVAL);
   1127 		}
   1128 	}
   1129 	for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) {
   1130 		ffs_blkfree(fs, ip->i_devvp, *blkp, fs->fs_bsize, ip->i_number);
   1131 		deallocated += fs->fs_bsize;
   1132 	}
   1133 	if (deallocated) {
   1134 #ifdef QUOTA
   1135 		/*
   1136 		 * Restore user's disk quota because allocation failed.
   1137 		 */
   1138 		(void)chkdq(ip, -btodb(deallocated), cred, FORCE);
   1139 #endif
   1140 		ip->i_ffs2_blocks -= btodb(deallocated);
   1141 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
   1142 	}
   1143 
   1144 	/*
   1145 	 * Flush all dependencies again so that the soft updates code
   1146 	 * doesn't find any untracked changes.
   1147 	 */
   1148 #ifdef notyet
   1149 	/* XXX pages locked */
   1150 	(void)softdep_sync_metadata(vp);
   1151 #endif
   1152 	return (error);
   1153 }
   1154