Home | History | Annotate | Line # | Download | only in ffs
ffs_balloc.c revision 1.23.2.5
      1 /*	$NetBSD: ffs_balloc.c,v 1.23.2.5 2001/10/08 20:11:51 nathanw Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 1982, 1986, 1989, 1993
      5  *	The Regents of the University of California.  All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  * 3. All advertising materials mentioning features or use of this software
     16  *    must display the following acknowledgement:
     17  *	This product includes software developed by the University of
     18  *	California, Berkeley and its contributors.
     19  * 4. Neither the name of the University nor the names of its contributors
     20  *    may be used to endorse or promote products derived from this software
     21  *    without specific prior written permission.
     22  *
     23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     33  * SUCH DAMAGE.
     34  *
     35  *	@(#)ffs_balloc.c	8.8 (Berkeley) 6/16/95
     36  */
     37 
     38 #if defined(_KERNEL_OPT)
     39 #include "opt_quota.h"
     40 #endif
     41 
     42 #include <sys/param.h>
     43 #include <sys/systm.h>
     44 #include <sys/buf.h>
     45 #include <sys/lwp.h>
     46 #include <sys/proc.h>
     47 #include <sys/file.h>
     48 #include <sys/mount.h>
     49 #include <sys/vnode.h>
     50 #include <sys/mount.h>
     51 
     52 #include <ufs/ufs/quota.h>
     53 #include <ufs/ufs/ufsmount.h>
     54 #include <ufs/ufs/inode.h>
     55 #include <ufs/ufs/ufs_extern.h>
     56 #include <ufs/ufs/ufs_bswap.h>
     57 
     58 #include <ufs/ffs/fs.h>
     59 #include <ufs/ffs/ffs_extern.h>
     60 
     61 #include <uvm/uvm.h>
     62 
     63 /*
     64  * Balloc defines the structure of file system storage
     65  * by allocating the physical blocks on a device given
     66  * the inode and the logical block number in a file.
     67  */
     68 int
     69 ffs_balloc(v)
     70 	void *v;
     71 {
     72 	struct vop_balloc_args /* {
     73 		struct vnode *a_vp;
     74 		off_t a_startoffset;
     75 		int a_size;
     76 		struct ucred *a_cred;
     77 		int a_flags;
     78 		struct buf **a_bpp;
     79 	} */ *ap = v;
     80 	ufs_daddr_t lbn;
     81 	int size;
     82 	struct ucred *cred;
     83 	int flags;
     84 	ufs_daddr_t nb;
     85 	struct buf *bp, *nbp;
     86 	struct vnode *vp = ap->a_vp;
     87 	struct inode *ip = VTOI(vp);
     88 	struct fs *fs = ip->i_fs;
     89 	struct indir indirs[NIADDR + 2];
     90 	ufs_daddr_t newb, *bap, pref;
     91 	int deallocated, osize, nsize, num, i, error;
     92 	ufs_daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR + 1];
     93 	int unwindidx = -1;
     94 	struct buf **bpp = ap->a_bpp;
     95 	off_t off;
     96 #ifdef FFS_EI
     97 	const int needswap = UFS_FSNEEDSWAP(fs);
     98 #endif
     99 	UVMHIST_FUNC("ffs_balloc"); UVMHIST_CALLED(ubchist);
    100 
    101 	lbn = lblkno(fs, ap->a_startoffset);
    102 	size = blkoff(fs, ap->a_startoffset) + ap->a_size;
    103 	if (size > fs->fs_bsize)
    104 		panic("ffs_balloc: blk too big");
    105 	if (bpp != NULL) {
    106 		*bpp = NULL;
    107 	}
    108 	UVMHIST_LOG(ubchist, "vp %p lbn 0x%x size 0x%x", vp, lbn, size,0);
    109 
    110 	KASSERT(size <= fs->fs_bsize);
    111 	if (lbn < 0)
    112 		return (EFBIG);
    113 	cred = ap->a_cred;
    114 	flags = ap->a_flags;
    115 
    116 	/*
    117 	 * If the next write will extend the file into a new block,
    118 	 * and the file is currently composed of a fragment
    119 	 * this fragment has to be extended to be a full block.
    120 	 */
    121 
    122 	nb = lblkno(fs, ip->i_ffs_size);
    123 	if (nb < NDADDR && nb < lbn) {
    124 		osize = blksize(fs, ip, nb);
    125 		if (osize < fs->fs_bsize && osize > 0) {
    126 			error = ffs_realloccg(ip, nb,
    127 				ffs_blkpref(ip, nb, (int)nb, &ip->i_ffs_db[0]),
    128 				osize, (int)fs->fs_bsize, cred, bpp, &newb);
    129 			if (error)
    130 				return (error);
    131 			if (DOINGSOFTDEP(vp))
    132 				softdep_setup_allocdirect(ip, nb, newb,
    133 				    ufs_rw32(ip->i_ffs_db[nb], needswap),
    134 				    fs->fs_bsize, osize, bpp ? *bpp : NULL);
    135 			ip->i_ffs_size = lblktosize(fs, nb + 1);
    136 			uvm_vnp_setsize(vp, ip->i_ffs_size);
    137 			ip->i_ffs_db[nb] = ufs_rw32(newb, needswap);
    138 			ip->i_flag |= IN_CHANGE | IN_UPDATE;
    139 			if (bpp) {
    140 				if (flags & B_SYNC)
    141 					bwrite(*bpp);
    142 				else
    143 					bawrite(*bpp);
    144 			}
    145 		}
    146 	}
    147 
    148 	/*
    149 	 * The first NDADDR blocks are direct blocks
    150 	 */
    151 
    152 	if (lbn < NDADDR) {
    153 		nb = ufs_rw32(ip->i_ffs_db[lbn], needswap);
    154 		if (nb != 0 && ip->i_ffs_size >= lblktosize(fs, lbn + 1)) {
    155 
    156 			/*
    157 			 * The block is an already-allocated direct block
    158 			 * and the file already extends past this block,
    159 			 * thus this must be a whole block.
    160 			 * Just read the block (if requested).
    161 			 */
    162 
    163 			if (bpp != NULL) {
    164 				error = bread(vp, lbn, fs->fs_bsize, NOCRED,
    165 					      bpp);
    166 				if (error) {
    167 					brelse(*bpp);
    168 					return (error);
    169 				}
    170 			}
    171 			return (0);
    172 		}
    173 		if (nb != 0) {
    174 
    175 			/*
    176 			 * Consider need to reallocate a fragment.
    177 			 */
    178 
    179 			osize = fragroundup(fs, blkoff(fs, ip->i_ffs_size));
    180 			nsize = fragroundup(fs, size);
    181 			if (nsize <= osize) {
    182 
    183 				/*
    184 				 * The existing block is already
    185 				 * at least as big as we want.
    186 				 * Just read the block (if requested).
    187 				 */
    188 
    189 				if (bpp != NULL) {
    190 					error = bread(vp, lbn, osize, NOCRED,
    191 						      bpp);
    192 					if (error) {
    193 						brelse(*bpp);
    194 						return (error);
    195 					}
    196 				}
    197 				return 0;
    198 			} else {
    199 
    200 				/*
    201 				 * The existing block is smaller than we want,
    202 				 * grow it.
    203 				 */
    204 
    205 				error = ffs_realloccg(ip, lbn,
    206 				    ffs_blkpref(ip, lbn, (int)lbn,
    207 					&ip->i_ffs_db[0]), osize, nsize, cred,
    208 					bpp, &newb);
    209 				if (error)
    210 					return (error);
    211 				if (DOINGSOFTDEP(vp))
    212 					softdep_setup_allocdirect(ip, lbn,
    213 					    newb, nb, nsize, osize,
    214 					    bpp ? *bpp : NULL);
    215 			}
    216 		} else {
    217 
    218 			/*
    219 			 * the block was not previously allocated,
    220 			 * allocate a new block or fragment.
    221 			 */
    222 
    223 			if (ip->i_ffs_size < lblktosize(fs, lbn + 1))
    224 				nsize = fragroundup(fs, size);
    225 			else
    226 				nsize = fs->fs_bsize;
    227 			error = ffs_alloc(ip, lbn,
    228 			    ffs_blkpref(ip, lbn, (int)lbn, &ip->i_ffs_db[0]),
    229 				nsize, cred, &newb);
    230 			if (error)
    231 				return (error);
    232 			if (bpp != NULL) {
    233 				bp = getblk(vp, lbn, nsize, 0, 0);
    234 				bp->b_blkno = fsbtodb(fs, newb);
    235 				if (flags & B_CLRBUF)
    236 					clrbuf(bp);
    237 				*bpp = bp;
    238 			}
    239 			if (DOINGSOFTDEP(vp)) {
    240 				softdep_setup_allocdirect(ip, lbn, newb, 0,
    241 				    nsize, 0, bpp ? *bpp : NULL);
    242 			}
    243 		}
    244 		ip->i_ffs_db[lbn] = ufs_rw32(newb, needswap);
    245 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
    246 		return (0);
    247 	}
    248 	/*
    249 	 * Determine the number of levels of indirection.
    250 	 */
    251 	pref = 0;
    252 	if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
    253 		return(error);
    254 
    255 #ifdef DIAGNOSTIC
    256 	if (num < 1)
    257 		panic ("ffs_balloc: ufs_bmaparray returned indirect block\n");
    258 #endif
    259 	/*
    260 	 * Fetch the first indirect block allocating if necessary.
    261 	 */
    262 	--num;
    263 	nb = ufs_rw32(ip->i_ffs_ib[indirs[0].in_off], needswap);
    264 	allocib = NULL;
    265 	allocblk = allociblk;
    266 	if (nb == 0) {
    267 		pref = ffs_blkpref(ip, lbn, 0, (ufs_daddr_t *)0);
    268 		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred,
    269 		    &newb);
    270 		if (error)
    271 			goto fail;
    272 		nb = newb;
    273 		*allocblk++ = nb;
    274 		bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0);
    275 		bp->b_blkno = fsbtodb(fs, nb);
    276 		clrbuf(bp);
    277 		if (DOINGSOFTDEP(vp)) {
    278 			softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off,
    279 			    newb, 0, fs->fs_bsize, 0, bp);
    280 			bdwrite(bp);
    281 		} else {
    282 			/*
    283 			 * Write synchronously so that indirect blocks
    284 			 * never point at garbage.
    285 			 */
    286 			if ((error = bwrite(bp)) != 0)
    287 				goto fail;
    288 		}
    289 		unwindidx = 0;
    290 		allocib = &ip->i_ffs_ib[indirs[0].in_off];
    291 		*allocib = ufs_rw32(nb, needswap);
    292 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
    293 	}
    294 	/*
    295 	 * Fetch through the indirect blocks, allocating as necessary.
    296 	 */
    297 	for (i = 1;;) {
    298 		error = bread(vp,
    299 		    indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp);
    300 		if (error) {
    301 			brelse(bp);
    302 			goto fail;
    303 		}
    304 		bap = (ufs_daddr_t *)bp->b_data;
    305 		nb = ufs_rw32(bap[indirs[i].in_off], needswap);
    306 		if (i == num)
    307 			break;
    308 		i++;
    309 		if (nb != 0) {
    310 			brelse(bp);
    311 			continue;
    312 		}
    313 		if (pref == 0)
    314 			pref = ffs_blkpref(ip, lbn, 0, (ufs_daddr_t *)0);
    315 		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred,
    316 		    &newb);
    317 		if (error) {
    318 			brelse(bp);
    319 			goto fail;
    320 		}
    321 		nb = newb;
    322 		*allocblk++ = nb;
    323 		nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0);
    324 		nbp->b_blkno = fsbtodb(fs, nb);
    325 		clrbuf(nbp);
    326 		if (DOINGSOFTDEP(vp)) {
    327 			softdep_setup_allocindir_meta(nbp, ip, bp,
    328 			    indirs[i - 1].in_off, nb);
    329 			bdwrite(nbp);
    330 		} else {
    331 			/*
    332 			 * Write synchronously so that indirect blocks
    333 			 * never point at garbage.
    334 			 */
    335 			if ((error = bwrite(nbp)) != 0) {
    336 				brelse(bp);
    337 				goto fail;
    338 			}
    339 		}
    340 		if (unwindidx < 0)
    341 			unwindidx = i - 1;
    342 		bap[indirs[i - 1].in_off] = ufs_rw32(nb, needswap);
    343 		/*
    344 		 * If required, write synchronously, otherwise use
    345 		 * delayed write.
    346 		 */
    347 		if (flags & B_SYNC) {
    348 			bwrite(bp);
    349 		} else {
    350 			bdwrite(bp);
    351 		}
    352 	}
    353 	/*
    354 	 * Get the data block, allocating if necessary.
    355 	 */
    356 	if (nb == 0) {
    357 		pref = ffs_blkpref(ip, lbn, indirs[num].in_off, &bap[0]);
    358 		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred,
    359 		    &newb);
    360 		if (error) {
    361 			brelse(bp);
    362 			goto fail;
    363 		}
    364 		nb = newb;
    365 		*allocblk++ = nb;
    366 		if (bpp != NULL) {
    367 			nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0);
    368 			nbp->b_blkno = fsbtodb(fs, nb);
    369 			if (flags & B_CLRBUF)
    370 				clrbuf(nbp);
    371 			*bpp = nbp;
    372 		}
    373 		if (DOINGSOFTDEP(vp))
    374 			softdep_setup_allocindir_page(ip, lbn, bp,
    375 			    indirs[num].in_off, nb, 0, bpp ? *bpp : NULL);
    376 		bap[indirs[num].in_off] = ufs_rw32(nb, needswap);
    377 		if (allocib == NULL && unwindidx < 0) {
    378 			unwindidx = i - 1;
    379 		}
    380 		/*
    381 		 * If required, write synchronously, otherwise use
    382 		 * delayed write.
    383 		 */
    384 		if (flags & B_SYNC) {
    385 			bwrite(bp);
    386 		} else {
    387 			bdwrite(bp);
    388 		}
    389 		return (0);
    390 	}
    391 	brelse(bp);
    392 	if (bpp != NULL) {
    393 		if (flags & B_CLRBUF) {
    394 			error = bread(vp, lbn, (int)fs->fs_bsize, NOCRED, &nbp);
    395 			if (error) {
    396 				brelse(nbp);
    397 				goto fail;
    398 			}
    399 		} else {
    400 			nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0);
    401 			nbp->b_blkno = fsbtodb(fs, nb);
    402 			clrbuf(nbp);
    403 		}
    404 		*bpp = nbp;
    405 	}
    406 	return (0);
    407 
    408 fail:
    409 
    410 	/*
    411 	 * Restore the UVM state to what the rest of the FFS code is
    412 	 * expecting.  Unbusy any pages that we allocated and left busy up in
    413 	 * ufs_balloc_range().  the following VOP_FSYNC() will try to busy
    414 	 * those pages again, which would deadlock if they are still busy
    415 	 * from before.  After this we're back to a state where we can undo
    416 	 * any partial allocation.
    417 	 */
    418 
    419 	simple_lock(&vp->v_uobj.vmobjlock);
    420 	for (off = ap->a_startoffset; off < ap->a_startoffset + fs->fs_bsize;
    421 	     off += PAGE_SIZE) {
    422 		struct vm_page *pg;
    423 
    424 		pg = uvm_pagelookup(&vp->v_uobj, off);
    425 		if (pg == NULL) {
    426 			break;
    427 		}
    428 		uvm_pageactivate(pg);
    429 		KASSERT((pg->flags & PG_FAKE) == 0);
    430 		pg->flags &= ~(PG_BUSY);
    431 		UVM_PAGE_OWN(pg, NULL);
    432 	}
    433 	simple_unlock(&vp->v_uobj.vmobjlock);
    434 
    435 	/*
    436 	 * If we have failed part way through block allocation, we
    437 	 * have to deallocate any indirect blocks that we have allocated.
    438 	 * We have to fsync the file before we start to get rid of all
    439 	 * of its dependencies so that we do not leave them dangling.
    440 	 * We have to sync it at the end so that the soft updates code
    441 	 * does not find any untracked changes. Although this is really
    442 	 * slow, running out of disk space is not expected to be a common
    443 	 * occurence. The error return from fsync is ignored as we already
    444 	 * have an error to return to the user.
    445 	 */
    446 
    447 	(void) VOP_FSYNC(vp, cred, FSYNC_WAIT, 0, 0, curproc->l_proc);
    448 	for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) {
    449 		ffs_blkfree(ip, *blkp, fs->fs_bsize);
    450 		deallocated += fs->fs_bsize;
    451 	}
    452 	if (unwindidx >= 0) {
    453 		if (unwindidx == 0) {
    454 			*allocib = 0;
    455 		} else {
    456 			int r;
    457 
    458 			r = bread(vp, indirs[unwindidx].in_lbn,
    459 			    (int)fs->fs_bsize, NOCRED, &bp);
    460 			if (r) {
    461 				panic("Could not unwind indirect block, error %d", r);
    462 				brelse(bp);
    463 			} else {
    464 				bap = (ufs_daddr_t *)bp->b_data;
    465 				bap[indirs[unwindidx].in_off] = 0;
    466 				if (flags & B_SYNC)
    467 					bwrite(bp);
    468 				else
    469 					bdwrite(bp);
    470 			}
    471 		}
    472 		for (i = unwindidx + 1; i <= num; i++) {
    473 			bp = getblk(vp, indirs[i].in_lbn, (int)fs->fs_bsize, 0,
    474 			    0);
    475 			bp->b_flags |= B_INVAL;
    476 			brelse(bp);
    477 		}
    478 	}
    479 	if (deallocated) {
    480 #ifdef QUOTA
    481 		/*
    482 		 * Restore user's disk quota because allocation failed.
    483 		 */
    484 		(void)chkdq(ip, (long)-btodb(deallocated), cred, FORCE);
    485 #endif
    486 		ip->i_ffs_blocks -= btodb(deallocated);
    487 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
    488 	}
    489 	(void) VOP_FSYNC(vp, cred, FSYNC_WAIT, 0, 0, curproc->l_proc);
    490 	return (error);
    491 }
    492 
    493 
    494 int
    495 ffs_gop_alloc(struct vnode *vp, off_t off, off_t len, int flags,
    496     struct ucred *cred)
    497 {
    498 	struct inode *ip = VTOI(vp);
    499 	struct fs *fs = ip->i_fs;
    500 	int error, delta, bshift, bsize;
    501 	UVMHIST_FUNC("ffs_gop_alloc"); UVMHIST_CALLED(ubchist);
    502 
    503 	error = 0;
    504 	bshift = fs->fs_bshift;
    505 	bsize = 1 << bshift;
    506 
    507 	delta = off & (bsize - 1);
    508 	off -= delta;
    509 	len += delta;
    510 
    511 	while (len > 0) {
    512 		bsize = MIN(bsize, len);
    513 
    514 		error = VOP_BALLOC(vp, off, bsize, cred, flags, NULL);
    515 		if (error) {
    516 			goto out;
    517 		}
    518 
    519 		/*
    520 		 * increase file size now, VOP_BALLOC() requires that
    521 		 * EOF be up-to-date before each call.
    522 		 */
    523 
    524 		if (ip->i_ffs_size < off + bsize) {
    525 			UVMHIST_LOG(ubchist, "vp %p old 0x%x new 0x%x",
    526 			    vp, ip->i_ffs_size, off + bsize, 0);
    527 			ip->i_ffs_size = off + bsize;
    528 		}
    529 
    530 		off += bsize;
    531 		len -= bsize;
    532 	}
    533 
    534 out:
    535 	return error;
    536 }
    537