Home | History | Annotate | Line # | Download | only in ffs
ffs_balloc.c revision 1.50.4.1
      1 /*	$NetBSD: ffs_balloc.c,v 1.50.4.1 2008/10/19 22:18:10 haad Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 2002 Networks Associates Technology, Inc.
      5  * All rights reserved.
      6  *
      7  * This software was developed for the FreeBSD Project by Marshall
      8  * Kirk McKusick and Network Associates Laboratories, the Security
      9  * Research Division of Network Associates, Inc. under DARPA/SPAWAR
     10  * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS
     11  * research program
     12  *
     13  * Copyright (c) 1982, 1986, 1989, 1993
     14  *	The Regents of the University of California.  All rights reserved.
     15  *
     16  * Redistribution and use in source and binary forms, with or without
     17  * modification, are permitted provided that the following conditions
     18  * are met:
     19  * 1. Redistributions of source code must retain the above copyright
     20  *    notice, this list of conditions and the following disclaimer.
     21  * 2. Redistributions in binary form must reproduce the above copyright
     22  *    notice, this list of conditions and the following disclaimer in the
     23  *    documentation and/or other materials provided with the distribution.
     24  * 3. Neither the name of the University nor the names of its contributors
     25  *    may be used to endorse or promote products derived from this software
     26  *    without specific prior written permission.
     27  *
     28  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     29  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     30  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     31  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     32  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     33  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     34  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     35  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     36  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     37  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     38  * SUCH DAMAGE.
     39  *
     40  *	@(#)ffs_balloc.c	8.8 (Berkeley) 6/16/95
     41  */
     42 
     43 #include <sys/cdefs.h>
     44 __KERNEL_RCSID(0, "$NetBSD: ffs_balloc.c,v 1.50.4.1 2008/10/19 22:18:10 haad Exp $");
     45 
     46 #if defined(_KERNEL_OPT)
     47 #include "opt_quota.h"
     48 #endif
     49 
     50 #include <sys/param.h>
     51 #include <sys/systm.h>
     52 #include <sys/buf.h>
     53 #include <sys/file.h>
     54 #include <sys/mount.h>
     55 #include <sys/vnode.h>
     56 #include <sys/kauth.h>
     57 #include <sys/fstrans.h>
     58 
     59 #include <ufs/ufs/quota.h>
     60 #include <ufs/ufs/ufsmount.h>
     61 #include <ufs/ufs/inode.h>
     62 #include <ufs/ufs/ufs_extern.h>
     63 #include <ufs/ufs/ufs_bswap.h>
     64 
     65 #include <ufs/ffs/fs.h>
     66 #include <ufs/ffs/ffs_extern.h>
     67 
     68 #include <uvm/uvm.h>
     69 
     70 static int ffs_balloc_ufs1(struct vnode *, off_t, int, kauth_cred_t, int,
     71     struct buf **);
     72 static int ffs_balloc_ufs2(struct vnode *, off_t, int, kauth_cred_t, int,
     73     struct buf **);
     74 
     75 /*
     76  * Balloc defines the structure of file system storage
     77  * by allocating the physical blocks on a device given
     78  * the inode and the logical block number in a file.
     79  */
     80 
     81 int
     82 ffs_balloc(struct vnode *vp, off_t off, int size, kauth_cred_t cred, int flags,
     83     struct buf **bpp)
     84 {
     85 	int error;
     86 
     87 	if (VTOI(vp)->i_fs->fs_magic == FS_UFS2_MAGIC)
     88 		error = ffs_balloc_ufs2(vp, off, size, cred, flags, bpp);
     89 	else
     90 		error = ffs_balloc_ufs1(vp, off, size, cred, flags, bpp);
     91 
     92 	if (error == 0 && bpp != NULL && (error = fscow_run(*bpp, false)) != 0)
     93 		brelse(*bpp, 0);
     94 
     95 	return error;
     96 }
     97 
     98 static int
     99 ffs_balloc_ufs1(struct vnode *vp, off_t off, int size, kauth_cred_t cred,
    100     int flags, struct buf **bpp)
    101 {
    102 	daddr_t lbn, lastlbn;
    103 	struct buf *bp, *nbp;
    104 	struct inode *ip = VTOI(vp);
    105 	struct fs *fs = ip->i_fs;
    106 	struct ufsmount *ump = ip->i_ump;
    107 	struct indir indirs[NIADDR + 2];
    108 	daddr_t newb, pref, nb;
    109 	int32_t *bap;	/* XXX ondisk32 */
    110 	int deallocated, osize, nsize, num, i, error;
    111 	int32_t *blkp, *allocblk, allociblk[NIADDR + 1];
    112 	int32_t *allocib;
    113 	int unwindidx = -1;
    114 #ifdef FFS_EI
    115 	const int needswap = UFS_FSNEEDSWAP(fs);
    116 #endif
    117 	UVMHIST_FUNC("ffs_balloc"); UVMHIST_CALLED(ubchist);
    118 
    119 	lbn = lblkno(fs, off);
    120 	size = blkoff(fs, off) + size;
    121 	if (size > fs->fs_bsize)
    122 		panic("ffs_balloc: blk too big");
    123 	if (bpp != NULL) {
    124 		*bpp = NULL;
    125 	}
    126 	UVMHIST_LOG(ubchist, "vp %p lbn 0x%x size 0x%x", vp, lbn, size,0);
    127 
    128 	if (lbn < 0)
    129 		return (EFBIG);
    130 
    131 	/*
    132 	 * If the next write will extend the file into a new block,
    133 	 * and the file is currently composed of a fragment
    134 	 * this fragment has to be extended to be a full block.
    135 	 */
    136 
    137 	lastlbn = lblkno(fs, ip->i_size);
    138 	if (lastlbn < NDADDR && lastlbn < lbn) {
    139 		nb = lastlbn;
    140 		osize = blksize(fs, ip, nb);
    141 		if (osize < fs->fs_bsize && osize > 0) {
    142 			mutex_enter(&ump->um_lock);
    143 			error = ffs_realloccg(ip, nb,
    144 				    ffs_blkpref_ufs1(ip, lastlbn, nb, flags,
    145 					&ip->i_ffs1_db[0]),
    146 				    osize, (int)fs->fs_bsize, cred, bpp, &newb);
    147 			if (error)
    148 				return (error);
    149 			if (DOINGSOFTDEP(vp))
    150 				softdep_setup_allocdirect(ip, nb, newb,
    151 				    ufs_rw32(ip->i_ffs1_db[nb], needswap),
    152 				    fs->fs_bsize, osize, bpp ? *bpp : NULL);
    153 			ip->i_size = lblktosize(fs, nb + 1);
    154 			ip->i_ffs1_size = ip->i_size;
    155 			uvm_vnp_setsize(vp, ip->i_ffs1_size);
    156 			ip->i_ffs1_db[nb] = ufs_rw32((u_int32_t)newb, needswap);
    157 			ip->i_flag |= IN_CHANGE | IN_UPDATE;
    158 			if (bpp && *bpp) {
    159 				if (flags & B_SYNC)
    160 					bwrite(*bpp);
    161 				else
    162 					bawrite(*bpp);
    163 			}
    164 		}
    165 	}
    166 
    167 	/*
    168 	 * The first NDADDR blocks are direct blocks
    169 	 */
    170 
    171 	if (lbn < NDADDR) {
    172 		nb = ufs_rw32(ip->i_ffs1_db[lbn], needswap);
    173 		if (nb != 0 && ip->i_size >= lblktosize(fs, lbn + 1)) {
    174 
    175 			/*
    176 			 * The block is an already-allocated direct block
    177 			 * and the file already extends past this block,
    178 			 * thus this must be a whole block.
    179 			 * Just read the block (if requested).
    180 			 */
    181 
    182 			if (bpp != NULL) {
    183 				error = bread(vp, lbn, fs->fs_bsize, NOCRED,
    184 					      B_MODIFY, bpp);
    185 				if (error) {
    186 					brelse(*bpp, 0);
    187 					return (error);
    188 				}
    189 			}
    190 			return (0);
    191 		}
    192 		if (nb != 0) {
    193 
    194 			/*
    195 			 * Consider need to reallocate a fragment.
    196 			 */
    197 
    198 			osize = fragroundup(fs, blkoff(fs, ip->i_size));
    199 			nsize = fragroundup(fs, size);
    200 			if (nsize <= osize) {
    201 
    202 				/*
    203 				 * The existing block is already
    204 				 * at least as big as we want.
    205 				 * Just read the block (if requested).
    206 				 */
    207 
    208 				if (bpp != NULL) {
    209 					error = bread(vp, lbn, osize, NOCRED,
    210 						      B_MODIFY, bpp);
    211 					if (error) {
    212 						brelse(*bpp, 0);
    213 						return (error);
    214 					}
    215 				}
    216 				return 0;
    217 			} else {
    218 
    219 				/*
    220 				 * The existing block is smaller than we want,
    221 				 * grow it.
    222 				 */
    223 				mutex_enter(&ump->um_lock);
    224 				error = ffs_realloccg(ip, lbn,
    225 				    ffs_blkpref_ufs1(ip, lbn, (int)lbn, flags,
    226 					&ip->i_ffs1_db[0]),
    227 				    osize, nsize, cred, bpp, &newb);
    228 				if (error)
    229 					return (error);
    230 				if (DOINGSOFTDEP(vp))
    231 					softdep_setup_allocdirect(ip, lbn,
    232 					    newb, nb, nsize, osize,
    233 					    bpp ? *bpp : NULL);
    234 			}
    235 		} else {
    236 
    237 			/*
    238 			 * the block was not previously allocated,
    239 			 * allocate a new block or fragment.
    240 			 */
    241 
    242 			if (ip->i_size < lblktosize(fs, lbn + 1))
    243 				nsize = fragroundup(fs, size);
    244 			else
    245 				nsize = fs->fs_bsize;
    246 			mutex_enter(&ump->um_lock);
    247 			error = ffs_alloc(ip, lbn,
    248 			    ffs_blkpref_ufs1(ip, lbn, (int)lbn, flags,
    249 				&ip->i_ffs1_db[0]),
    250 			    nsize, flags, cred, &newb);
    251 			if (error)
    252 				return (error);
    253 			if (bpp != NULL) {
    254 				error = ffs_getblk(vp, lbn, fsbtodb(fs, newb),
    255 				    nsize, (flags & B_CLRBUF) != 0, bpp);
    256 				if (error)
    257 					return error;
    258 			}
    259 			if (DOINGSOFTDEP(vp)) {
    260 				softdep_setup_allocdirect(ip, lbn, newb, 0,
    261 				    nsize, 0, bpp ? *bpp : NULL);
    262 			}
    263 		}
    264 		ip->i_ffs1_db[lbn] = ufs_rw32((u_int32_t)newb, needswap);
    265 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
    266 		return (0);
    267 	}
    268 
    269 	/*
    270 	 * Determine the number of levels of indirection.
    271 	 */
    272 
    273 	pref = 0;
    274 	if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
    275 		return (error);
    276 
    277 	/*
    278 	 * Fetch the first indirect block allocating if necessary.
    279 	 */
    280 
    281 	--num;
    282 	nb = ufs_rw32(ip->i_ffs1_ib[indirs[0].in_off], needswap);
    283 	allocib = NULL;
    284 	allocblk = allociblk;
    285 	if (nb == 0) {
    286 		mutex_enter(&ump->um_lock);
    287 		pref = ffs_blkpref_ufs1(ip, lbn, 0, flags | B_METAONLY, NULL);
    288 		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
    289 		    flags | B_METAONLY, cred, &newb);
    290 		if (error)
    291 			goto fail;
    292 		nb = newb;
    293 		*allocblk++ = nb;
    294 		error = ffs_getblk(vp, indirs[1].in_lbn, fsbtodb(fs, nb),
    295 		    fs->fs_bsize, true, &bp);
    296 		if (error)
    297 			goto fail;
    298 		if (DOINGSOFTDEP(vp)) {
    299 			softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off,
    300 			    newb, 0, fs->fs_bsize, 0, bp);
    301 			bdwrite(bp);
    302 		} else {
    303 
    304 			/*
    305 			 * Write synchronously so that indirect blocks
    306 			 * never point at garbage.
    307 			 */
    308 
    309 			if ((error = bwrite(bp)) != 0)
    310 				goto fail;
    311 		}
    312 		unwindidx = 0;
    313 		allocib = &ip->i_ffs1_ib[indirs[0].in_off];
    314 		*allocib = ufs_rw32(nb, needswap);
    315 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
    316 	}
    317 
    318 	/*
    319 	 * Fetch through the indirect blocks, allocating as necessary.
    320 	 */
    321 
    322 	for (i = 1;;) {
    323 		error = bread(vp,
    324 		    indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, 0, &bp);
    325 		if (error) {
    326 			brelse(bp, 0);
    327 			goto fail;
    328 		}
    329 		bap = (int32_t *)bp->b_data;	/* XXX ondisk32 */
    330 		nb = ufs_rw32(bap[indirs[i].in_off], needswap);
    331 		if (i == num)
    332 			break;
    333 		i++;
    334 		if (nb != 0) {
    335 			brelse(bp, 0);
    336 			continue;
    337 		}
    338 		if (fscow_run(bp, true) != 0) {
    339 			brelse(bp, 0);
    340 			goto fail;
    341 		}
    342 		mutex_enter(&ump->um_lock);
    343 		if (pref == 0)
    344 			pref = ffs_blkpref_ufs1(ip, lbn, 0, flags | B_METAONLY,
    345 			    NULL);
    346 		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
    347 		    flags | B_METAONLY, cred, &newb);
    348 		if (error) {
    349 			brelse(bp, 0);
    350 			goto fail;
    351 		}
    352 		nb = newb;
    353 		*allocblk++ = nb;
    354 		error = ffs_getblk(vp, indirs[i].in_lbn, fsbtodb(fs, nb),
    355 		    fs->fs_bsize, true, &nbp);
    356 		if (error) {
    357 			brelse(bp, 0);
    358 			goto fail;
    359 		}
    360 		if (DOINGSOFTDEP(vp)) {
    361 			softdep_setup_allocindir_meta(nbp, ip, bp,
    362 			    indirs[i - 1].in_off, nb);
    363 			bdwrite(nbp);
    364 		} else {
    365 
    366 			/*
    367 			 * Write synchronously so that indirect blocks
    368 			 * never point at garbage.
    369 			 */
    370 
    371 			if ((error = bwrite(nbp)) != 0) {
    372 				brelse(bp, 0);
    373 				goto fail;
    374 			}
    375 		}
    376 		if (unwindidx < 0)
    377 			unwindidx = i - 1;
    378 		bap[indirs[i - 1].in_off] = ufs_rw32(nb, needswap);
    379 
    380 		/*
    381 		 * If required, write synchronously, otherwise use
    382 		 * delayed write.
    383 		 */
    384 
    385 		if (flags & B_SYNC) {
    386 			bwrite(bp);
    387 		} else {
    388 			bdwrite(bp);
    389 		}
    390 	}
    391 
    392 	if (flags & B_METAONLY) {
    393 		KASSERT(bpp != NULL);
    394 		*bpp = bp;
    395 		return (0);
    396 	}
    397 
    398 	/*
    399 	 * Get the data block, allocating if necessary.
    400 	 */
    401 
    402 	if (nb == 0) {
    403 		if (fscow_run(bp, true) != 0) {
    404 			brelse(bp, 0);
    405 			goto fail;
    406 		}
    407 		mutex_enter(&ump->um_lock);
    408 		pref = ffs_blkpref_ufs1(ip, lbn, indirs[num].in_off, flags,
    409 		    &bap[0]);
    410 		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, flags, cred,
    411 		    &newb);
    412 		if (error) {
    413 			brelse(bp, 0);
    414 			goto fail;
    415 		}
    416 		nb = newb;
    417 		*allocblk++ = nb;
    418 		if (bpp != NULL) {
    419 			error = ffs_getblk(vp, lbn, fsbtodb(fs, nb),
    420 			    fs->fs_bsize, (flags & B_CLRBUF) != 0, bpp);
    421 			if (error) {
    422 				brelse(bp, 0);
    423 				goto fail;
    424 			}
    425 		}
    426 		if (DOINGSOFTDEP(vp))
    427 			softdep_setup_allocindir_page(ip, lbn, bp,
    428 			    indirs[num].in_off, nb, 0, bpp ? *bpp : NULL);
    429 		bap[indirs[num].in_off] = ufs_rw32(nb, needswap);
    430 		if (allocib == NULL && unwindidx < 0) {
    431 			unwindidx = i - 1;
    432 		}
    433 
    434 		/*
    435 		 * If required, write synchronously, otherwise use
    436 		 * delayed write.
    437 		 */
    438 
    439 		if (flags & B_SYNC) {
    440 			bwrite(bp);
    441 		} else {
    442 			bdwrite(bp);
    443 		}
    444 		return (0);
    445 	}
    446 	brelse(bp, 0);
    447 	if (bpp != NULL) {
    448 		if (flags & B_CLRBUF) {
    449 			error = bread(vp, lbn, (int)fs->fs_bsize,
    450 			    NOCRED, B_MODIFY, &nbp);
    451 			if (error) {
    452 				brelse(nbp, 0);
    453 				goto fail;
    454 			}
    455 		} else {
    456 			error = ffs_getblk(vp, lbn, fsbtodb(fs, nb),
    457 			    fs->fs_bsize, true, &nbp);
    458 			if (error)
    459 				goto fail;
    460 		}
    461 		*bpp = nbp;
    462 	}
    463 	return (0);
    464 
    465 fail:
    466 	/*
    467 	 * If we have failed part way through block allocation, we
    468 	 * have to deallocate any indirect blocks that we have allocated.
    469 	 */
    470 
    471 	if (unwindidx >= 0) {
    472 
    473 		/*
    474 		 * First write out any buffers we've created to resolve their
    475 		 * softdeps.  This must be done in reverse order of creation
    476 		 * so that we resolve the dependencies in one pass.
    477 		 * Write the cylinder group buffers for these buffers too.
    478 		 */
    479 
    480 		for (i = num; i >= unwindidx; i--) {
    481 			if (i == 0) {
    482 				break;
    483 			}
    484 			if (ffs_getblk(vp, indirs[i].in_lbn, FFS_NOBLK,
    485 			    fs->fs_bsize, false, &bp) != 0)
    486 				continue;
    487 			if (bp->b_oflags & BO_DELWRI) {
    488 				nb = fsbtodb(fs, cgtod(fs, dtog(fs,
    489 				    dbtofsb(fs, bp->b_blkno))));
    490 				bwrite(bp);
    491 				if (ffs_getblk(ip->i_devvp, nb, FFS_NOBLK,
    492 				    fs->fs_cgsize, false, &bp) != 0)
    493 					continue;
    494 				if (bp->b_oflags & BO_DELWRI) {
    495 					bwrite(bp);
    496 				} else {
    497 					brelse(bp, BC_INVAL);
    498 				}
    499 			} else {
    500 				brelse(bp, BC_INVAL);
    501 			}
    502 		}
    503 
    504 		/* Now flush all dependencies to disk. */
    505 #ifdef notyet
    506 		/* XXX pages locked */
    507 		(void)softdep_sync_metadata(vp);
    508 #endif
    509 
    510 		if (DOINGSOFTDEP(vp) && unwindidx == 0) {
    511 			ip->i_flag |= IN_CHANGE | IN_UPDATE;
    512 			ffs_update(vp, NULL, NULL, UPDATE_WAIT);
    513 		}
    514 
    515 		/*
    516 		 * Now that any dependencies that we created have been
    517 		 * resolved, we can undo the partial allocation.
    518 		 */
    519 
    520 		if (unwindidx == 0) {
    521 			*allocib = 0;
    522 			ip->i_flag |= IN_CHANGE | IN_UPDATE;
    523 			if (DOINGSOFTDEP(vp))
    524 				ffs_update(vp, NULL, NULL, UPDATE_WAIT);
    525 		} else {
    526 			int r;
    527 
    528 			r = bread(vp, indirs[unwindidx].in_lbn,
    529 			    (int)fs->fs_bsize, NOCRED, 0, &bp);
    530 			if (r) {
    531 				panic("Could not unwind indirect block, error %d", r);
    532 				brelse(bp, 0);
    533 			} else {
    534 				bap = (int32_t *)bp->b_data; /* XXX ondisk32 */
    535 				bap[indirs[unwindidx].in_off] = 0;
    536 				bwrite(bp);
    537 			}
    538 		}
    539 		for (i = unwindidx + 1; i <= num; i++) {
    540 			if (ffs_getblk(vp, indirs[i].in_lbn, FFS_NOBLK,
    541 			    fs->fs_bsize, false, &bp) == 0)
    542 				brelse(bp, BC_INVAL);
    543 		}
    544 	}
    545 	for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) {
    546 		ffs_blkfree(fs, ip->i_devvp, *blkp, fs->fs_bsize, ip->i_number);
    547 		deallocated += fs->fs_bsize;
    548 	}
    549 	if (deallocated) {
    550 #ifdef QUOTA
    551 		/*
    552 		 * Restore user's disk quota because allocation failed.
    553 		 */
    554 		(void)chkdq(ip, -btodb(deallocated), cred, FORCE);
    555 #endif
    556 		ip->i_ffs1_blocks -= btodb(deallocated);
    557 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
    558 	}
    559 	/*
    560 	 * Flush all dependencies again so that the soft updates code
    561 	 * doesn't find any untracked changes.
    562 	 */
    563 #ifdef notyet
    564 	/* XXX pages locked */
    565 	(void)softdep_sync_metadata(vp);
    566 #endif
    567 	return (error);
    568 }
    569 
    570 static int
    571 ffs_balloc_ufs2(struct vnode *vp, off_t off, int size, kauth_cred_t cred,
    572     int flags, struct buf **bpp)
    573 {
    574 	daddr_t lbn, lastlbn;
    575 	struct buf *bp, *nbp;
    576 	struct inode *ip = VTOI(vp);
    577 	struct fs *fs = ip->i_fs;
    578 	struct ufsmount *ump = ip->i_ump;
    579 	struct indir indirs[NIADDR + 2];
    580 	daddr_t newb, pref, nb;
    581 	int64_t *bap;
    582 	int deallocated, osize, nsize, num, i, error;
    583 	daddr_t *blkp, *allocblk, allociblk[NIADDR + 1];
    584 	int64_t *allocib;
    585 	int unwindidx = -1;
    586 #ifdef FFS_EI
    587 	const int needswap = UFS_FSNEEDSWAP(fs);
    588 #endif
    589 	UVMHIST_FUNC("ffs_balloc"); UVMHIST_CALLED(ubchist);
    590 
    591 	lbn = lblkno(fs, off);
    592 	size = blkoff(fs, off) + size;
    593 	if (size > fs->fs_bsize)
    594 		panic("ffs_balloc: blk too big");
    595 	if (bpp != NULL) {
    596 		*bpp = NULL;
    597 	}
    598 	UVMHIST_LOG(ubchist, "vp %p lbn 0x%x size 0x%x", vp, lbn, size,0);
    599 
    600 	if (lbn < 0)
    601 		return (EFBIG);
    602 
    603 #ifdef notyet
    604 	/*
    605 	 * Check for allocating external data.
    606 	 */
    607 	if (flags & IO_EXT) {
    608 		if (lbn >= NXADDR)
    609 			return (EFBIG);
    610 		/*
    611 		 * If the next write will extend the data into a new block,
    612 		 * and the data is currently composed of a fragment
    613 		 * this fragment has to be extended to be a full block.
    614 		 */
    615 		lastlbn = lblkno(fs, dp->di_extsize);
    616 		if (lastlbn < lbn) {
    617 			nb = lastlbn;
    618 			osize = sblksize(fs, dp->di_extsize, nb);
    619 			if (osize < fs->fs_bsize && osize > 0) {
    620 				mutex_enter(&ump->um_lock);
    621 				error = ffs_realloccg(ip, -1 - nb,
    622 				    dp->di_extb[nb],
    623 				    ffs_blkpref_ufs2(ip, lastlbn, (int)nb,
    624 					flags, &dp->di_extb[0]),
    625 				    osize,
    626 				    (int)fs->fs_bsize, cred, &bp);
    627 				if (error)
    628 					return (error);
    629 				if (DOINGSOFTDEP(vp))
    630 					softdep_setup_allocext(ip, nb,
    631 					    dbtofsb(fs, bp->b_blkno),
    632 					    dp->di_extb[nb],
    633 					    fs->fs_bsize, osize, bp);
    634 				dp->di_extsize = smalllblktosize(fs, nb + 1);
    635 				dp->di_extb[nb] = dbtofsb(fs, bp->b_blkno);
    636 				bp->b_xflags |= BX_ALTDATA;
    637 				ip->i_flag |= IN_CHANGE | IN_UPDATE;
    638 				if (flags & IO_SYNC)
    639 					bwrite(bp);
    640 				else
    641 					bawrite(bp);
    642 			}
    643 		}
    644 		/*
    645 		 * All blocks are direct blocks
    646 		 */
    647 		if (flags & BA_METAONLY)
    648 			panic("ffs_balloc_ufs2: BA_METAONLY for ext block");
    649 		nb = dp->di_extb[lbn];
    650 		if (nb != 0 && dp->di_extsize >= smalllblktosize(fs, lbn + 1)) {
    651 			error = bread(vp, -1 - lbn, fs->fs_bsize,
    652 			    NOCRED, 0, &bp);
    653 			if (error) {
    654 				brelse(bp, 0);
    655 				return (error);
    656 			}
    657 			mutex_enter(&bp->b_interlock);
    658 			bp->b_blkno = fsbtodb(fs, nb);
    659 			bp->b_xflags |= BX_ALTDATA;
    660 			mutex_exit(&bp->b_interlock);
    661 			*bpp = bp;
    662 			return (0);
    663 		}
    664 		if (nb != 0) {
    665 			/*
    666 			 * Consider need to reallocate a fragment.
    667 			 */
    668 			osize = fragroundup(fs, blkoff(fs, dp->di_extsize));
    669 			nsize = fragroundup(fs, size);
    670 			if (nsize <= osize) {
    671 				error = bread(vp, -1 - lbn, osize,
    672 				    NOCRED, 0, &bp);
    673 				if (error) {
    674 					brelse(bp, 0);
    675 					return (error);
    676 				}
    677 				mutex_enter(&bp->b_interlock);
    678 				bp->b_blkno = fsbtodb(fs, nb);
    679 				bp->b_xflags |= BX_ALTDATA;
    680 				mutex_exit(&bp->b_interlock);
    681 			} else {
    682 				mutex_enter(&ump->um_lock);
    683 				error = ffs_realloccg(ip, -1 - lbn,
    684 				    dp->di_extb[lbn],
    685 				    ffs_blkpref_ufs2(ip, lbn, (int)lbn, flags,
    686 				        &dp->di_extb[0]),
    687 				    osize, nsize, cred, &bp);
    688 				if (error)
    689 					return (error);
    690 				bp->b_xflags |= BX_ALTDATA;
    691 				if (DOINGSOFTDEP(vp))
    692 					softdep_setup_allocext(ip, lbn,
    693 					    dbtofsb(fs, bp->b_blkno), nb,
    694 					    nsize, osize, bp);
    695 			}
    696 		} else {
    697 			if (dp->di_extsize < smalllblktosize(fs, lbn + 1))
    698 				nsize = fragroundup(fs, size);
    699 			else
    700 				nsize = fs->fs_bsize;
    701 			mutex_enter(&ump->um_lock);
    702 			error = ffs_alloc(ip, lbn,
    703 			   ffs_blkpref_ufs2(ip, lbn, (int)lbn, flags,
    704 			       &dp->di_extb[0]),
    705 			   nsize, flags, cred, &newb);
    706 			if (error)
    707 				return (error);
    708 			error = ffs_getblk(vp, -1 - lbn, fsbtodb(fs, newb),
    709 			    nsize, (flags & BA_CLRBUF) != 0, &bp);
    710 			if (error)
    711 				return error;
    712 			bp->b_xflags |= BX_ALTDATA;
    713 			if (DOINGSOFTDEP(vp))
    714 				softdep_setup_allocext(ip, lbn, newb, 0,
    715 				    nsize, 0, bp);
    716 		}
    717 		dp->di_extb[lbn] = dbtofsb(fs, bp->b_blkno);
    718 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
    719 		*bpp = bp;
    720 		return (0);
    721 	}
    722 #endif
    723 	/*
    724 	 * If the next write will extend the file into a new block,
    725 	 * and the file is currently composed of a fragment
    726 	 * this fragment has to be extended to be a full block.
    727 	 */
    728 
    729 	lastlbn = lblkno(fs, ip->i_size);
    730 	if (lastlbn < NDADDR && lastlbn < lbn) {
    731 		nb = lastlbn;
    732 		osize = blksize(fs, ip, nb);
    733 		if (osize < fs->fs_bsize && osize > 0) {
    734 			mutex_enter(&ump->um_lock);
    735 			error = ffs_realloccg(ip, nb,
    736 				    ffs_blkpref_ufs2(ip, lastlbn, nb, flags,
    737 					&ip->i_ffs2_db[0]),
    738 				    osize, (int)fs->fs_bsize, cred, bpp, &newb);
    739 			if (error)
    740 				return (error);
    741 			if (DOINGSOFTDEP(vp))
    742 				softdep_setup_allocdirect(ip, nb, newb,
    743 				    ufs_rw64(ip->i_ffs2_db[nb], needswap),
    744 				    fs->fs_bsize, osize, bpp ? *bpp : NULL);
    745 			ip->i_size = lblktosize(fs, nb + 1);
    746 			ip->i_ffs2_size = ip->i_size;
    747 			uvm_vnp_setsize(vp, ip->i_size);
    748 			ip->i_ffs2_db[nb] = ufs_rw64(newb, needswap);
    749 			ip->i_flag |= IN_CHANGE | IN_UPDATE;
    750 			if (bpp) {
    751 				if (flags & B_SYNC)
    752 					bwrite(*bpp);
    753 				else
    754 					bawrite(*bpp);
    755 			}
    756 		}
    757 	}
    758 
    759 	/*
    760 	 * The first NDADDR blocks are direct blocks
    761 	 */
    762 
    763 	if (lbn < NDADDR) {
    764 		nb = ufs_rw64(ip->i_ffs2_db[lbn], needswap);
    765 		if (nb != 0 && ip->i_size >= lblktosize(fs, lbn + 1)) {
    766 
    767 			/*
    768 			 * The block is an already-allocated direct block
    769 			 * and the file already extends past this block,
    770 			 * thus this must be a whole block.
    771 			 * Just read the block (if requested).
    772 			 */
    773 
    774 			if (bpp != NULL) {
    775 				error = bread(vp, lbn, fs->fs_bsize, NOCRED,
    776 					      B_MODIFY, bpp);
    777 				if (error) {
    778 					brelse(*bpp, 0);
    779 					return (error);
    780 				}
    781 			}
    782 			return (0);
    783 		}
    784 		if (nb != 0) {
    785 
    786 			/*
    787 			 * Consider need to reallocate a fragment.
    788 			 */
    789 
    790 			osize = fragroundup(fs, blkoff(fs, ip->i_size));
    791 			nsize = fragroundup(fs, size);
    792 			if (nsize <= osize) {
    793 
    794 				/*
    795 				 * The existing block is already
    796 				 * at least as big as we want.
    797 				 * Just read the block (if requested).
    798 				 */
    799 
    800 				if (bpp != NULL) {
    801 					error = bread(vp, lbn, osize, NOCRED,
    802 						      B_MODIFY, bpp);
    803 					if (error) {
    804 						brelse(*bpp, 0);
    805 						return (error);
    806 					}
    807 				}
    808 				return 0;
    809 			} else {
    810 
    811 				/*
    812 				 * The existing block is smaller than we want,
    813 				 * grow it.
    814 				 */
    815 				mutex_enter(&ump->um_lock);
    816 				error = ffs_realloccg(ip, lbn,
    817 				    ffs_blkpref_ufs2(ip, lbn, (int)lbn, flags,
    818 					&ip->i_ffs2_db[0]),
    819 				    osize, nsize, cred, bpp, &newb);
    820 				if (error)
    821 					return (error);
    822 				if (DOINGSOFTDEP(vp))
    823 					softdep_setup_allocdirect(ip, lbn,
    824 					    newb, nb, nsize, osize,
    825 					    bpp ? *bpp : NULL);
    826 			}
    827 		} else {
    828 
    829 			/*
    830 			 * the block was not previously allocated,
    831 			 * allocate a new block or fragment.
    832 			 */
    833 
    834 			if (ip->i_size < lblktosize(fs, lbn + 1))
    835 				nsize = fragroundup(fs, size);
    836 			else
    837 				nsize = fs->fs_bsize;
    838 			mutex_enter(&ump->um_lock);
    839 			error = ffs_alloc(ip, lbn,
    840 			    ffs_blkpref_ufs2(ip, lbn, (int)lbn, flags,
    841 				&ip->i_ffs2_db[0]),
    842 			    nsize, flags, cred, &newb);
    843 			if (error)
    844 				return (error);
    845 			if (bpp != NULL) {
    846 				error = ffs_getblk(vp, lbn, fsbtodb(fs, newb),
    847 				    nsize, (flags & B_CLRBUF) != 0, bpp);
    848 				if (error)
    849 					return error;
    850 			}
    851 			if (DOINGSOFTDEP(vp)) {
    852 				softdep_setup_allocdirect(ip, lbn, newb, 0,
    853 				    nsize, 0, bpp ? *bpp : NULL);
    854 			}
    855 		}
    856 		ip->i_ffs2_db[lbn] = ufs_rw64(newb, needswap);
    857 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
    858 		return (0);
    859 	}
    860 
    861 	/*
    862 	 * Determine the number of levels of indirection.
    863 	 */
    864 
    865 	pref = 0;
    866 	if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
    867 		return (error);
    868 
    869 	/*
    870 	 * Fetch the first indirect block allocating if necessary.
    871 	 */
    872 
    873 	--num;
    874 	nb = ufs_rw64(ip->i_ffs2_ib[indirs[0].in_off], needswap);
    875 	allocib = NULL;
    876 	allocblk = allociblk;
    877 	if (nb == 0) {
    878 		mutex_enter(&ump->um_lock);
    879 		pref = ffs_blkpref_ufs2(ip, lbn, 0, flags | B_METAONLY, NULL);
    880 		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
    881 		    flags | B_METAONLY, cred, &newb);
    882 		if (error)
    883 			goto fail;
    884 		nb = newb;
    885 		*allocblk++ = nb;
    886 		error = ffs_getblk(vp, indirs[1].in_lbn, fsbtodb(fs, nb),
    887 		    fs->fs_bsize, true, &bp);
    888 		if (error)
    889 			goto fail;
    890 		if (DOINGSOFTDEP(vp)) {
    891 			softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off,
    892 			    newb, 0, fs->fs_bsize, 0, bp);
    893 			bdwrite(bp);
    894 		} else {
    895 
    896 			/*
    897 			 * Write synchronously so that indirect blocks
    898 			 * never point at garbage.
    899 			 */
    900 
    901 			if ((error = bwrite(bp)) != 0)
    902 				goto fail;
    903 		}
    904 		unwindidx = 0;
    905 		allocib = &ip->i_ffs2_ib[indirs[0].in_off];
    906 		*allocib = ufs_rw64(nb, needswap);
    907 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
    908 	}
    909 
    910 	/*
    911 	 * Fetch through the indirect blocks, allocating as necessary.
    912 	 */
    913 
    914 	for (i = 1;;) {
    915 		error = bread(vp,
    916 		    indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, 0, &bp);
    917 		if (error) {
    918 			brelse(bp, 0);
    919 			goto fail;
    920 		}
    921 		bap = (int64_t *)bp->b_data;
    922 		nb = ufs_rw64(bap[indirs[i].in_off], needswap);
    923 		if (i == num)
    924 			break;
    925 		i++;
    926 		if (nb != 0) {
    927 			brelse(bp, 0);
    928 			continue;
    929 		}
    930 		if (fscow_run(bp, true) != 0) {
    931 			brelse(bp, 0);
    932 			goto fail;
    933 		}
    934 		mutex_enter(&ump->um_lock);
    935 		if (pref == 0)
    936 			pref = ffs_blkpref_ufs2(ip, lbn, 0, flags | B_METAONLY,
    937 			    NULL);
    938 		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
    939 		    flags | B_METAONLY, cred, &newb);
    940 		if (error) {
    941 			brelse(bp, 0);
    942 			goto fail;
    943 		}
    944 		nb = newb;
    945 		*allocblk++ = nb;
    946 		error = ffs_getblk(vp, indirs[i].in_lbn, fsbtodb(fs, nb),
    947 		    fs->fs_bsize, true, &nbp);
    948 		if (error) {
    949 			brelse(bp, 0);
    950 			goto fail;
    951 		}
    952 		if (DOINGSOFTDEP(vp)) {
    953 			softdep_setup_allocindir_meta(nbp, ip, bp,
    954 			    indirs[i - 1].in_off, nb);
    955 			bdwrite(nbp);
    956 		} else {
    957 
    958 			/*
    959 			 * Write synchronously so that indirect blocks
    960 			 * never point at garbage.
    961 			 */
    962 
    963 			if ((error = bwrite(nbp)) != 0) {
    964 				brelse(bp, 0);
    965 				goto fail;
    966 			}
    967 		}
    968 		if (unwindidx < 0)
    969 			unwindidx = i - 1;
    970 		bap[indirs[i - 1].in_off] = ufs_rw64(nb, needswap);
    971 
    972 		/*
    973 		 * If required, write synchronously, otherwise use
    974 		 * delayed write.
    975 		 */
    976 
    977 		if (flags & B_SYNC) {
    978 			bwrite(bp);
    979 		} else {
    980 			bdwrite(bp);
    981 		}
    982 	}
    983 
    984 	if (flags & B_METAONLY) {
    985 		KASSERT(bpp != NULL);
    986 		*bpp = bp;
    987 		return (0);
    988 	}
    989 
    990 	/*
    991 	 * Get the data block, allocating if necessary.
    992 	 */
    993 
    994 	if (nb == 0) {
    995 		if (fscow_run(bp, true) != 0) {
    996 			brelse(bp, 0);
    997 			goto fail;
    998 		}
    999 		mutex_enter(&ump->um_lock);
   1000 		pref = ffs_blkpref_ufs2(ip, lbn, indirs[num].in_off, flags,
   1001 		    &bap[0]);
   1002 		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, flags, cred,
   1003 		    &newb);
   1004 		if (error) {
   1005 			brelse(bp, 0);
   1006 			goto fail;
   1007 		}
   1008 		nb = newb;
   1009 		*allocblk++ = nb;
   1010 		if (bpp != NULL) {
   1011 			error = ffs_getblk(vp, lbn, fsbtodb(fs, nb),
   1012 			    fs->fs_bsize, (flags & B_CLRBUF) != 0, bpp);
   1013 			if (error) {
   1014 				brelse(bp, 0);
   1015 				goto fail;
   1016 			}
   1017 		}
   1018 		if (DOINGSOFTDEP(vp))
   1019 			softdep_setup_allocindir_page(ip, lbn, bp,
   1020 			    indirs[num].in_off, nb, 0, bpp ? *bpp : NULL);
   1021 		bap[indirs[num].in_off] = ufs_rw64(nb, needswap);
   1022 		if (allocib == NULL && unwindidx < 0) {
   1023 			unwindidx = i - 1;
   1024 		}
   1025 
   1026 		/*
   1027 		 * If required, write synchronously, otherwise use
   1028 		 * delayed write.
   1029 		 */
   1030 
   1031 		if (flags & B_SYNC) {
   1032 			bwrite(bp);
   1033 		} else {
   1034 			bdwrite(bp);
   1035 		}
   1036 		return (0);
   1037 	}
   1038 	brelse(bp, 0);
   1039 	if (bpp != NULL) {
   1040 		if (flags & B_CLRBUF) {
   1041 			error = bread(vp, lbn, (int)fs->fs_bsize,
   1042 			    NOCRED, B_MODIFY, &nbp);
   1043 			if (error) {
   1044 				brelse(nbp, 0);
   1045 				goto fail;
   1046 			}
   1047 		} else {
   1048 			error = ffs_getblk(vp, lbn, fsbtodb(fs, nb),
   1049 			    fs->fs_bsize, true, &nbp);
   1050 			if (error)
   1051 				goto fail;
   1052 		}
   1053 		*bpp = nbp;
   1054 	}
   1055 	return (0);
   1056 
   1057 fail:
   1058 	/*
   1059 	 * If we have failed part way through block allocation, we
   1060 	 * have to deallocate any indirect blocks that we have allocated.
   1061 	 */
   1062 
   1063 	if (unwindidx >= 0) {
   1064 
   1065 		/*
   1066 		 * First write out any buffers we've created to resolve their
   1067 		 * softdeps.  This must be done in reverse order of creation
   1068 		 * so that we resolve the dependencies in one pass.
   1069 		 * Write the cylinder group buffers for these buffers too.
   1070 		 */
   1071 
   1072 		for (i = num; i >= unwindidx; i--) {
   1073 			if (i == 0) {
   1074 				break;
   1075 			}
   1076 			if (ffs_getblk(vp, indirs[i].in_lbn, FFS_NOBLK,
   1077 			    fs->fs_bsize, false, &bp) != 0)
   1078 				continue;
   1079 			if (bp->b_oflags & BO_DELWRI) {
   1080 				nb = fsbtodb(fs, cgtod(fs, dtog(fs,
   1081 				    dbtofsb(fs, bp->b_blkno))));
   1082 				bwrite(bp);
   1083 				if (ffs_getblk(ip->i_devvp, nb, FFS_NOBLK,
   1084 				    fs->fs_cgsize, false, &bp) != 0)
   1085 					continue;
   1086 				if (bp->b_oflags & BO_DELWRI) {
   1087 					bwrite(bp);
   1088 				} else {
   1089 					brelse(bp, BC_INVAL);
   1090 				}
   1091 			} else {
   1092 				brelse(bp, BC_INVAL);
   1093 			}
   1094 		}
   1095 
   1096 		/* Now flush the dependencies to disk. */
   1097 #ifdef notyet
   1098 		/* XXX pages locked */
   1099 		(void)softdep_sync_metadata(vp);
   1100 #endif
   1101 
   1102 		if (DOINGSOFTDEP(vp) && unwindidx == 0) {
   1103 			ip->i_flag |= IN_CHANGE | IN_UPDATE;
   1104 			ffs_update(vp, NULL, NULL, UPDATE_WAIT);
   1105 		}
   1106 
   1107 		/*
   1108 		 * Now that any dependencies that we created have been
   1109 		 * resolved, we can undo the partial allocation.
   1110 		 */
   1111 
   1112 		if (unwindidx == 0) {
   1113 			*allocib = 0;
   1114 			ip->i_flag |= IN_CHANGE | IN_UPDATE;
   1115 			if (DOINGSOFTDEP(vp))
   1116 				ffs_update(vp, NULL, NULL, UPDATE_WAIT);
   1117 		} else {
   1118 			int r;
   1119 
   1120 			r = bread(vp, indirs[unwindidx].in_lbn,
   1121 			    (int)fs->fs_bsize, NOCRED, 0, &bp);
   1122 			if (r) {
   1123 				panic("Could not unwind indirect block, error %d", r);
   1124 				brelse(bp, 0);
   1125 			} else {
   1126 				bap = (int64_t *)bp->b_data;
   1127 				bap[indirs[unwindidx].in_off] = 0;
   1128 				bwrite(bp);
   1129 			}
   1130 		}
   1131 		for (i = unwindidx + 1; i <= num; i++) {
   1132 			if (ffs_getblk(vp, indirs[i].in_lbn, FFS_NOBLK,
   1133 			    fs->fs_bsize, false, &bp) == 0)
   1134 				brelse(bp, BC_INVAL);
   1135 		}
   1136 	}
   1137 	for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) {
   1138 		ffs_blkfree(fs, ip->i_devvp, *blkp, fs->fs_bsize, ip->i_number);
   1139 		deallocated += fs->fs_bsize;
   1140 	}
   1141 	if (deallocated) {
   1142 #ifdef QUOTA
   1143 		/*
   1144 		 * Restore user's disk quota because allocation failed.
   1145 		 */
   1146 		(void)chkdq(ip, -btodb(deallocated), cred, FORCE);
   1147 #endif
   1148 		ip->i_ffs2_blocks -= btodb(deallocated);
   1149 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
   1150 	}
   1151 
   1152 	/*
   1153 	 * Flush all dependencies again so that the soft updates code
   1154 	 * doesn't find any untracked changes.
   1155 	 */
   1156 #ifdef notyet
   1157 	/* XXX pages locked */
   1158 	(void)softdep_sync_metadata(vp);
   1159 #endif
   1160 	return (error);
   1161 }
   1162