Home | History | Annotate | Line # | Download | only in lfs
lfs_syscalls.c revision 1.2
      1 /*-
      2  * Copyright (c) 1991, 1993, 1994
      3  *	The Regents of the University of California.  All rights reserved.
      4  *
      5  * Redistribution and use in source and binary forms, with or without
      6  * modification, are permitted provided that the following conditions
      7  * are met:
      8  * 1. Redistributions of source code must retain the above copyright
      9  *    notice, this list of conditions and the following disclaimer.
     10  * 2. Redistributions in binary form must reproduce the above copyright
     11  *    notice, this list of conditions and the following disclaimer in the
     12  *    documentation and/or other materials provided with the distribution.
     13  * 3. All advertising materials mentioning features or use of this software
     14  *    must display the following acknowledgement:
     15  *	This product includes software developed by the University of
     16  *	California, Berkeley and its contributors.
     17  * 4. Neither the name of the University nor the names of its contributors
     18  *    may be used to endorse or promote products derived from this software
     19  *    without specific prior written permission.
     20  *
     21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     31  * SUCH DAMAGE.
     32  *
     33  *	from: @(#)lfs_syscalls.c	8.5 (Berkeley) 4/20/94
     34  *	$Id: lfs_syscalls.c,v 1.2 1994/06/16 03:25:40 mycroft Exp $
     35  */
     36 
     37 #include <sys/param.h>
     38 #include <sys/proc.h>
     39 #include <sys/buf.h>
     40 #include <sys/mount.h>
     41 #include <sys/vnode.h>
     42 #include <sys/malloc.h>
     43 #include <sys/kernel.h>
     44 
     45 #include <ufs/ufs/quota.h>
     46 #include <ufs/ufs/inode.h>
     47 #include <ufs/ufs/ufsmount.h>
     48 #include <ufs/ufs/ufs_extern.h>
     49 
     50 #include <ufs/lfs/lfs.h>
     51 #include <ufs/lfs/lfs_extern.h>
     52 #define BUMP_FIP(SP) \
     53 	(SP)->fip = (FINFO *) (&(SP)->fip->fi_blocks[(SP)->fip->fi_nblocks])
     54 
     55 #define INC_FINFO(SP) ++((SEGSUM *)((SP)->segsum))->ss_nfinfo
     56 #define DEC_FINFO(SP) --((SEGSUM *)((SP)->segsum))->ss_nfinfo
     57 
     58 /*
     59  * Before committing to add something to a segment summary, make sure there
     60  * is enough room.  S is the bytes added to the summary.
     61  */
     62 #define	CHECK_SEG(s)			\
     63 if (sp->sum_bytes_left < (s)) {		\
     64 	(void) lfs_writeseg(fs, sp);	\
     65 }
     66 struct buf *lfs_fakebuf __P((struct vnode *, int, size_t, caddr_t));
     67 
     68 /*
     69  * lfs_markv:
     70  *
     71  * This will mark inodes and blocks dirty, so they are written into the log.
     72  * It will block until all the blocks have been written.  The segment create
     73  * time passed in the block_info and inode_info structures is used to decide
     74  * if the data is valid for each block (in case some process dirtied a block
     75  * or inode that is being cleaned between the determination that a block is
     76  * live and the lfs_markv call).
     77  *
     78  *  0 on success
     79  * -1/errno is return on error.
     80  */
     81 struct lfs_markv_args {
     82 	fsid_t *fsidp;		/* file system */
     83 	BLOCK_INFO *blkiov;	/* block array */
     84 	int blkcnt;		/* count of block array entries */
     85 };
     86 int
     87 lfs_markv(p, uap, retval)
     88 	struct proc *p;
     89 	struct lfs_markv_args *uap;
     90 	int *retval;
     91 {
     92 	struct segment *sp;
     93 	BLOCK_INFO *blkp;
     94 	IFILE *ifp;
     95 	struct buf *bp, **bpp;
     96 	struct inode *ip;
     97 	struct lfs *fs;
     98 	struct mount *mntp;
     99 	struct vnode *vp;
    100 	fsid_t fsid;
    101 	void *start;
    102 	ino_t lastino;
    103 	daddr_t b_daddr, v_daddr;
    104 	u_long bsize;
    105 	int cnt, error;
    106 
    107 	if (error = suser(p->p_ucred, &p->p_acflag))
    108 		return (error);
    109 
    110 	if (error = copyin(uap->fsidp, &fsid, sizeof(fsid_t)))
    111 		return (error);
    112 	if ((mntp = getvfs(&fsid)) == NULL)
    113 		return (EINVAL);
    114 
    115 	cnt = uap->blkcnt;
    116 	start = malloc(cnt * sizeof(BLOCK_INFO), M_SEGMENT, M_WAITOK);
    117 	if (error = copyin(uap->blkiov, start, cnt * sizeof(BLOCK_INFO)))
    118 		goto err1;
    119 
    120 	/* Mark blocks/inodes dirty.  */
    121 	fs = VFSTOUFS(mntp)->um_lfs;
    122 	bsize = fs->lfs_bsize;
    123 	error = 0;
    124 
    125 	lfs_seglock(fs, SEGM_SYNC | SEGM_CLEAN);
    126 	sp = fs->lfs_sp;
    127 	for (v_daddr = LFS_UNUSED_DADDR, lastino = LFS_UNUSED_INUM,
    128 	    blkp = start; cnt--; ++blkp) {
    129 		/*
    130 		 * Get the IFILE entry (only once) and see if the file still
    131 		 * exists.
    132 		 */
    133 		if (lastino != blkp->bi_inode) {
    134 			if (lastino != LFS_UNUSED_INUM) {
    135 				/* Finish up last file */
    136 				if (sp->fip->fi_nblocks == 0) {
    137 					DEC_FINFO(sp);
    138 					sp->sum_bytes_left +=
    139 					    sizeof(FINFO) - sizeof(daddr_t);
    140 				} else {
    141 					lfs_updatemeta(sp);
    142 					BUMP_FIP(sp);
    143 				}
    144 
    145 				lfs_writeinode(fs, sp, ip);
    146 				lfs_vunref(vp);
    147 			}
    148 
    149 			/* Start a new file */
    150 			CHECK_SEG(sizeof(FINFO));
    151 			sp->sum_bytes_left -= sizeof(FINFO) - sizeof(daddr_t);
    152 			INC_FINFO(sp);
    153 			sp->start_lbp = &sp->fip->fi_blocks[0];
    154 			sp->vp = NULL;
    155 			sp->fip->fi_version = blkp->bi_version;
    156 			sp->fip->fi_nblocks = 0;
    157 			sp->fip->fi_ino = blkp->bi_inode;
    158 			lastino = blkp->bi_inode;
    159 			if (blkp->bi_inode == LFS_IFILE_INUM)
    160 				v_daddr = fs->lfs_idaddr;
    161 			else {
    162 				LFS_IENTRY(ifp, fs, blkp->bi_inode, bp);
    163 				v_daddr = ifp->if_daddr;
    164 				brelse(bp);
    165 			}
    166 			if (v_daddr == LFS_UNUSED_DADDR)
    167 				continue;
    168 
    169 			/* Get the vnode/inode. */
    170 			if (lfs_fastvget(mntp, blkp->bi_inode, v_daddr, &vp,
    171 			    blkp->bi_lbn == LFS_UNUSED_LBN ?
    172 			    blkp->bi_bp : NULL)) {
    173 #ifdef DIAGNOSTIC
    174 				printf("lfs_markv: VFS_VGET failed (%d)\n",
    175 				    blkp->bi_inode);
    176 #endif
    177 				lastino = LFS_UNUSED_INUM;
    178 				v_daddr = LFS_UNUSED_DADDR;
    179 				continue;
    180 			}
    181 			sp->vp = vp;
    182 			ip = VTOI(vp);
    183 		} else if (v_daddr == LFS_UNUSED_DADDR)
    184 			continue;
    185 
    186 		/* If this BLOCK_INFO didn't contain a block, keep going. */
    187 		if (blkp->bi_lbn == LFS_UNUSED_LBN)
    188 			continue;
    189 		if (VOP_BMAP(vp, blkp->bi_lbn, NULL, &b_daddr, NULL) ||
    190 		    b_daddr != blkp->bi_daddr)
    191 			continue;
    192 		/*
    193 		 * If we got to here, then we are keeping the block.  If it
    194 		 * is an indirect block, we want to actually put it in the
    195 		 * buffer cache so that it can be updated in the finish_meta
    196 		 * section.  If it's not, we need to allocate a fake buffer
    197 		 * so that writeseg can perform the copyin and write the buffer.
    198 		 */
    199 		if (blkp->bi_lbn >= 0)	/* Data Block */
    200 			bp = lfs_fakebuf(vp, blkp->bi_lbn, bsize,
    201 			    blkp->bi_bp);
    202 		else {
    203 			bp = getblk(vp, blkp->bi_lbn, bsize, 0, 0);
    204 			if (!(bp->b_flags & (B_DELWRI | B_DONE | B_CACHE)) &&
    205 			    (error = copyin(blkp->bi_bp, bp->b_data,
    206 			    bsize)))
    207 				goto err2;
    208 			if (error = VOP_BWRITE(bp))
    209 				goto err2;
    210 		}
    211 		while (lfs_gatherblock(sp, bp, NULL));
    212 	}
    213 	if (sp->vp) {
    214 		if (sp->fip->fi_nblocks == 0) {
    215 			DEC_FINFO(sp);
    216 			sp->sum_bytes_left +=
    217 			    sizeof(FINFO) - sizeof(daddr_t);
    218 		} else
    219 			lfs_updatemeta(sp);
    220 
    221 		lfs_writeinode(fs, sp, ip);
    222 		lfs_vunref(vp);
    223 	}
    224 	(void) lfs_writeseg(fs, sp);
    225 	lfs_segunlock(fs);
    226 	free(start, M_SEGMENT);
    227 	return (error);
    228 
    229 /*
    230  * XXX
    231  * If we come in to error 2, we might have indirect blocks that were
    232  * updated and now have bad block pointers.  I don't know what to do
    233  * about this.
    234  */
    235 
    236 err2:	lfs_vunref(vp);
    237 	/* Free up fakebuffers */
    238 	for (bpp = --sp->cbpp; bpp >= sp->bpp; --bpp)
    239 		if ((*bpp)->b_flags & B_CALL) {
    240 			brelvp(*bpp);
    241 			free(*bpp, M_SEGMENT);
    242 		} else
    243 			brelse(*bpp);
    244 	lfs_segunlock(fs);
    245 err1:
    246 	free(start, M_SEGMENT);
    247 	return (error);
    248 }
    249 
    250 /*
    251  * lfs_bmapv:
    252  *
    253  * This will fill in the current disk address for arrays of blocks.
    254  *
    255  *  0 on success
    256  * -1/errno is return on error.
    257  */
    258 struct lfs_bmapv_args {
    259 	fsid_t *fsidp;		/* file system */
    260 	BLOCK_INFO *blkiov;	/* block array */
    261 	int blkcnt;		/* count of block array entries */
    262 };
    263 int
    264 lfs_bmapv(p, uap, retval)
    265 	struct proc *p;
    266 	struct lfs_bmapv_args *uap;
    267 	int *retval;
    268 {
    269 	BLOCK_INFO *blkp;
    270 	struct mount *mntp;
    271 	struct vnode *vp;
    272 	fsid_t fsid;
    273 	void *start;
    274 	daddr_t daddr;
    275 	int cnt, error, step;
    276 
    277 	if (error = suser(p->p_ucred, &p->p_acflag))
    278 		return (error);
    279 
    280 	if (error = copyin(uap->fsidp, &fsid, sizeof(fsid_t)))
    281 		return (error);
    282 	if ((mntp = getvfs(&fsid)) == NULL)
    283 		return (EINVAL);
    284 
    285 	cnt = uap->blkcnt;
    286 	start = blkp = malloc(cnt * sizeof(BLOCK_INFO), M_SEGMENT, M_WAITOK);
    287 	if (error = copyin(uap->blkiov, blkp, cnt * sizeof(BLOCK_INFO))) {
    288 		free(blkp, M_SEGMENT);
    289 		return (error);
    290 	}
    291 
    292 	for (step = cnt; step--; ++blkp) {
    293 		if (blkp->bi_lbn == LFS_UNUSED_LBN)
    294 			continue;
    295 		/* Could be a deadlock ? */
    296 		if (VFS_VGET(mntp, blkp->bi_inode, &vp))
    297 			daddr = LFS_UNUSED_DADDR;
    298 		else {
    299 			if (VOP_BMAP(vp, blkp->bi_lbn, NULL, &daddr, NULL))
    300 				daddr = LFS_UNUSED_DADDR;
    301 			vput(vp);
    302 		}
    303 		blkp->bi_daddr = daddr;
    304         }
    305 	copyout(start, uap->blkiov, cnt * sizeof(BLOCK_INFO));
    306 	free(start, M_SEGMENT);
    307 	return (0);
    308 }
    309 
    310 /*
    311  * lfs_segclean:
    312  *
    313  * Mark the segment clean.
    314  *
    315  *  0 on success
    316  * -1/errno is return on error.
    317  */
    318 struct lfs_segclean_args {
    319 	fsid_t *fsidp;		/* file system */
    320 	u_long segment;		/* segment number */
    321 };
    322 int
    323 lfs_segclean(p, uap, retval)
    324 	struct proc *p;
    325 	struct lfs_segclean_args *uap;
    326 	int *retval;
    327 {
    328 	CLEANERINFO *cip;
    329 	SEGUSE *sup;
    330 	struct buf *bp;
    331 	struct mount *mntp;
    332 	struct lfs *fs;
    333 	fsid_t fsid;
    334 	int error;
    335 
    336 	if (error = suser(p->p_ucred, &p->p_acflag))
    337 		return (error);
    338 
    339 	if (error = copyin(uap->fsidp, &fsid, sizeof(fsid_t)))
    340 		return (error);
    341 	if ((mntp = getvfs(&fsid)) == NULL)
    342 		return (EINVAL);
    343 
    344 	fs = VFSTOUFS(mntp)->um_lfs;
    345 
    346 	if (datosn(fs, fs->lfs_curseg) == uap->segment)
    347 		return (EBUSY);
    348 
    349 	LFS_SEGENTRY(sup, fs, uap->segment, bp);
    350 	if (sup->su_flags & SEGUSE_ACTIVE) {
    351 		brelse(bp);
    352 		return (EBUSY);
    353 	}
    354 	fs->lfs_avail += fsbtodb(fs, fs->lfs_ssize) - 1;
    355 	fs->lfs_bfree += (sup->su_nsums * LFS_SUMMARY_SIZE / DEV_BSIZE) +
    356 	    sup->su_ninos * btodb(fs->lfs_bsize);
    357 	sup->su_flags &= ~SEGUSE_DIRTY;
    358 	(void) VOP_BWRITE(bp);
    359 
    360 	LFS_CLEANERINFO(cip, fs, bp);
    361 	++cip->clean;
    362 	--cip->dirty;
    363 	(void) VOP_BWRITE(bp);
    364 	wakeup(&fs->lfs_avail);
    365 	return (0);
    366 }
    367 
    368 /*
    369  * lfs_segwait:
    370  *
    371  * This will block until a segment in file system fsid is written.  A timeout
    372  * in milliseconds may be specified which will awake the cleaner automatically.
    373  * An fsid of -1 means any file system, and a timeout of 0 means forever.
    374  *
    375  *  0 on success
    376  *  1 on timeout
    377  * -1/errno is return on error.
    378  */
    379 struct lfs_segwait_args {
    380 	fsid_t *fsidp;		/* file system */
    381 	struct timeval *tv;	/* timeout */
    382 };
    383 int
    384 lfs_segwait(p, uap, retval)
    385 	struct proc *p;
    386 	struct lfs_segwait_args *uap;
    387 	int *retval;
    388 {
    389 	extern int lfs_allclean_wakeup;
    390 	struct mount *mntp;
    391 	struct timeval atv;
    392 	fsid_t fsid;
    393 	void *addr;
    394 	u_long timeout;
    395 	int error, s;
    396 
    397 	if (error = suser(p->p_ucred, &p->p_acflag)) {
    398 		return (error);
    399 }
    400 #ifdef WHEN_QUADS_WORK
    401 	if (error = copyin(uap->fsidp, &fsid, sizeof(fsid_t)))
    402 		return (error);
    403 	if (fsid == (fsid_t)-1)
    404 		addr = &lfs_allclean_wakeup;
    405 	else {
    406 		if ((mntp = getvfs(&fsid)) == NULL)
    407 			return (EINVAL);
    408 		addr = &VFSTOUFS(mntp)->um_lfs->lfs_nextseg;
    409 	}
    410 #else
    411 	if (error = copyin(uap->fsidp, &fsid, sizeof(fsid_t)))
    412 		return (error);
    413 	if ((mntp = getvfs(&fsid)) == NULL)
    414 		addr = &lfs_allclean_wakeup;
    415 	else
    416 		addr = &VFSTOUFS(mntp)->um_lfs->lfs_nextseg;
    417 #endif
    418 
    419 	if (uap->tv) {
    420 		if (error = copyin(uap->tv, &atv, sizeof(struct timeval)))
    421 			return (error);
    422 		if (itimerfix(&atv))
    423 			return (EINVAL);
    424 		s = splclock();
    425 		timevaladd(&atv, (struct timeval *)&time);
    426 		timeout = hzto(&atv);
    427 		splx(s);
    428 	} else
    429 		timeout = 0;
    430 
    431 	error = tsleep(addr, PCATCH | PUSER, "segment", timeout);
    432 	return (error == ERESTART ? EINTR : 0);
    433 }
    434 
    435 /*
    436  * VFS_VGET call specialized for the cleaner.  The cleaner already knows the
    437  * daddr from the ifile, so don't look it up again.  If the cleaner is
    438  * processing IINFO structures, it may have the ondisk inode already, so
    439  * don't go retrieving it again.
    440  */
    441 int
    442 lfs_fastvget(mp, ino, daddr, vpp, dinp)
    443 	struct mount *mp;
    444 	ino_t ino;
    445 	daddr_t daddr;
    446 	struct vnode **vpp;
    447 	struct dinode *dinp;
    448 {
    449 	register struct inode *ip;
    450 	struct vnode *vp;
    451 	struct ufsmount *ump;
    452 	struct buf *bp;
    453 	dev_t dev;
    454 	int error;
    455 
    456 	ump = VFSTOUFS(mp);
    457 	dev = ump->um_dev;
    458 	/*
    459 	 * This is playing fast and loose.  Someone may have the inode
    460 	 * locked, in which case they are going to be distinctly unhappy
    461 	 * if we trash something.
    462 	 */
    463 	if ((*vpp = ufs_ihashlookup(dev, ino)) != NULL) {
    464 		lfs_vref(*vpp);
    465 		if ((*vpp)->v_flag & VXLOCK)
    466 			printf ("Cleaned vnode VXLOCKED\n");
    467 		ip = VTOI(*vpp);
    468 		if (ip->i_flag & IN_LOCKED)
    469 			printf("cleaned vnode locked\n");
    470 		if (!(ip->i_flag & IN_MODIFIED)) {
    471 			++ump->um_lfs->lfs_uinodes;
    472 			ip->i_flag |= IN_MODIFIED;
    473 		}
    474 		ip->i_flag |= IN_MODIFIED;
    475 		return (0);
    476 	}
    477 
    478 	/* Allocate new vnode/inode. */
    479 	if (error = lfs_vcreate(mp, ino, &vp)) {
    480 		*vpp = NULL;
    481 		return (error);
    482 	}
    483 
    484 	/*
    485 	 * Put it onto its hash chain and lock it so that other requests for
    486 	 * this inode will block if they arrive while we are sleeping waiting
    487 	 * for old data structures to be purged or for the contents of the
    488 	 * disk portion of this inode to be read.
    489 	 */
    490 	ip = VTOI(vp);
    491 	ufs_ihashins(ip);
    492 
    493 	/*
    494 	 * XXX
    495 	 * This may not need to be here, logically it should go down with
    496 	 * the i_devvp initialization.
    497 	 * Ask Kirk.
    498 	 */
    499 	ip->i_lfs = ump->um_lfs;
    500 
    501 	/* Read in the disk contents for the inode, copy into the inode. */
    502 	if (dinp)
    503 		if (error = copyin(dinp, &ip->i_din, sizeof(struct dinode)))
    504 			return (error);
    505 	else {
    506 		if (error = bread(ump->um_devvp, daddr,
    507 		    (int)ump->um_lfs->lfs_bsize, NOCRED, &bp)) {
    508 			/*
    509 			 * The inode does not contain anything useful, so it
    510 			 * would be misleading to leave it on its hash chain.
    511 			 * Iput() will return it to the free list.
    512 			 */
    513 			ufs_ihashrem(ip);
    514 
    515 			/* Unlock and discard unneeded inode. */
    516 			lfs_vunref(vp);
    517 			brelse(bp);
    518 			*vpp = NULL;
    519 			return (error);
    520 		}
    521 		ip->i_din =
    522 		    *lfs_ifind(ump->um_lfs, ino, (struct dinode *)bp->b_data);
    523 		brelse(bp);
    524 	}
    525 
    526 	/* Inode was just read from user space or disk, make sure it's locked */
    527 	ip->i_flag |= IN_LOCKED;
    528 
    529 	/*
    530 	 * Initialize the vnode from the inode, check for aliases.  In all
    531 	 * cases re-init ip, the underlying vnode/inode may have changed.
    532 	 */
    533 	if (error = ufs_vinit(mp, lfs_specop_p, LFS_FIFOOPS, &vp)) {
    534 		lfs_vunref(vp);
    535 		*vpp = NULL;
    536 		return (error);
    537 	}
    538 	/*
    539 	 * Finish inode initialization now that aliasing has been resolved.
    540 	 */
    541 	ip->i_devvp = ump->um_devvp;
    542 	ip->i_flag |= IN_MODIFIED;
    543 	++ump->um_lfs->lfs_uinodes;
    544 	VREF(ip->i_devvp);
    545 	*vpp = vp;
    546 	return (0);
    547 }
    548 struct buf *
    549 lfs_fakebuf(vp, lbn, size, uaddr)
    550 	struct vnode *vp;
    551 	int lbn;
    552 	size_t size;
    553 	caddr_t uaddr;
    554 {
    555 	struct buf *bp;
    556 
    557 	bp = lfs_newbuf(vp, lbn, 0);
    558 	bp->b_saveaddr = uaddr;
    559 	bp->b_bufsize = size;
    560 	bp->b_bcount = size;
    561 	bp->b_flags |= B_INVAL;
    562 	return (bp);
    563 }
    564