Home | History | Annotate | Line # | Download | only in lfs
lfs_syscalls.c revision 1.3
      1 /*	$NetBSD: lfs_syscalls.c,v 1.3 1994/06/29 06:47:02 cgd Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 1991, 1993, 1994
      5  *	The Regents of the University of California.  All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  * 3. All advertising materials mentioning features or use of this software
     16  *    must display the following acknowledgement:
     17  *	This product includes software developed by the University of
     18  *	California, Berkeley and its contributors.
     19  * 4. Neither the name of the University nor the names of its contributors
     20  *    may be used to endorse or promote products derived from this software
     21  *    without specific prior written permission.
     22  *
     23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     33  * SUCH DAMAGE.
     34  *
     35  *	@(#)lfs_syscalls.c	8.5 (Berkeley) 4/20/94
     36  */
     37 
     38 #include <sys/param.h>
     39 #include <sys/proc.h>
     40 #include <sys/buf.h>
     41 #include <sys/mount.h>
     42 #include <sys/vnode.h>
     43 #include <sys/malloc.h>
     44 #include <sys/kernel.h>
     45 
     46 #include <ufs/ufs/quota.h>
     47 #include <ufs/ufs/inode.h>
     48 #include <ufs/ufs/ufsmount.h>
     49 #include <ufs/ufs/ufs_extern.h>
     50 
     51 #include <ufs/lfs/lfs.h>
     52 #include <ufs/lfs/lfs_extern.h>
     53 #define BUMP_FIP(SP) \
     54 	(SP)->fip = (FINFO *) (&(SP)->fip->fi_blocks[(SP)->fip->fi_nblocks])
     55 
     56 #define INC_FINFO(SP) ++((SEGSUM *)((SP)->segsum))->ss_nfinfo
     57 #define DEC_FINFO(SP) --((SEGSUM *)((SP)->segsum))->ss_nfinfo
     58 
     59 /*
     60  * Before committing to add something to a segment summary, make sure there
     61  * is enough room.  S is the bytes added to the summary.
     62  */
     63 #define	CHECK_SEG(s)			\
     64 if (sp->sum_bytes_left < (s)) {		\
     65 	(void) lfs_writeseg(fs, sp);	\
     66 }
     67 struct buf *lfs_fakebuf __P((struct vnode *, int, size_t, caddr_t));
     68 
     69 /*
     70  * lfs_markv:
     71  *
     72  * This will mark inodes and blocks dirty, so they are written into the log.
     73  * It will block until all the blocks have been written.  The segment create
     74  * time passed in the block_info and inode_info structures is used to decide
     75  * if the data is valid for each block (in case some process dirtied a block
     76  * or inode that is being cleaned between the determination that a block is
     77  * live and the lfs_markv call).
     78  *
     79  *  0 on success
     80  * -1/errno is return on error.
     81  */
     82 struct lfs_markv_args {
     83 	fsid_t *fsidp;		/* file system */
     84 	BLOCK_INFO *blkiov;	/* block array */
     85 	int blkcnt;		/* count of block array entries */
     86 };
     87 int
     88 lfs_markv(p, uap, retval)
     89 	struct proc *p;
     90 	struct lfs_markv_args *uap;
     91 	int *retval;
     92 {
     93 	struct segment *sp;
     94 	BLOCK_INFO *blkp;
     95 	IFILE *ifp;
     96 	struct buf *bp, **bpp;
     97 	struct inode *ip;
     98 	struct lfs *fs;
     99 	struct mount *mntp;
    100 	struct vnode *vp;
    101 	fsid_t fsid;
    102 	void *start;
    103 	ino_t lastino;
    104 	daddr_t b_daddr, v_daddr;
    105 	u_long bsize;
    106 	int cnt, error;
    107 
    108 	if (error = suser(p->p_ucred, &p->p_acflag))
    109 		return (error);
    110 
    111 	if (error = copyin(uap->fsidp, &fsid, sizeof(fsid_t)))
    112 		return (error);
    113 	if ((mntp = getvfs(&fsid)) == NULL)
    114 		return (EINVAL);
    115 
    116 	cnt = uap->blkcnt;
    117 	start = malloc(cnt * sizeof(BLOCK_INFO), M_SEGMENT, M_WAITOK);
    118 	if (error = copyin(uap->blkiov, start, cnt * sizeof(BLOCK_INFO)))
    119 		goto err1;
    120 
    121 	/* Mark blocks/inodes dirty.  */
    122 	fs = VFSTOUFS(mntp)->um_lfs;
    123 	bsize = fs->lfs_bsize;
    124 	error = 0;
    125 
    126 	lfs_seglock(fs, SEGM_SYNC | SEGM_CLEAN);
    127 	sp = fs->lfs_sp;
    128 	for (v_daddr = LFS_UNUSED_DADDR, lastino = LFS_UNUSED_INUM,
    129 	    blkp = start; cnt--; ++blkp) {
    130 		/*
    131 		 * Get the IFILE entry (only once) and see if the file still
    132 		 * exists.
    133 		 */
    134 		if (lastino != blkp->bi_inode) {
    135 			if (lastino != LFS_UNUSED_INUM) {
    136 				/* Finish up last file */
    137 				if (sp->fip->fi_nblocks == 0) {
    138 					DEC_FINFO(sp);
    139 					sp->sum_bytes_left +=
    140 					    sizeof(FINFO) - sizeof(daddr_t);
    141 				} else {
    142 					lfs_updatemeta(sp);
    143 					BUMP_FIP(sp);
    144 				}
    145 
    146 				lfs_writeinode(fs, sp, ip);
    147 				lfs_vunref(vp);
    148 			}
    149 
    150 			/* Start a new file */
    151 			CHECK_SEG(sizeof(FINFO));
    152 			sp->sum_bytes_left -= sizeof(FINFO) - sizeof(daddr_t);
    153 			INC_FINFO(sp);
    154 			sp->start_lbp = &sp->fip->fi_blocks[0];
    155 			sp->vp = NULL;
    156 			sp->fip->fi_version = blkp->bi_version;
    157 			sp->fip->fi_nblocks = 0;
    158 			sp->fip->fi_ino = blkp->bi_inode;
    159 			lastino = blkp->bi_inode;
    160 			if (blkp->bi_inode == LFS_IFILE_INUM)
    161 				v_daddr = fs->lfs_idaddr;
    162 			else {
    163 				LFS_IENTRY(ifp, fs, blkp->bi_inode, bp);
    164 				v_daddr = ifp->if_daddr;
    165 				brelse(bp);
    166 			}
    167 			if (v_daddr == LFS_UNUSED_DADDR)
    168 				continue;
    169 
    170 			/* Get the vnode/inode. */
    171 			if (lfs_fastvget(mntp, blkp->bi_inode, v_daddr, &vp,
    172 			    blkp->bi_lbn == LFS_UNUSED_LBN ?
    173 			    blkp->bi_bp : NULL)) {
    174 #ifdef DIAGNOSTIC
    175 				printf("lfs_markv: VFS_VGET failed (%d)\n",
    176 				    blkp->bi_inode);
    177 #endif
    178 				lastino = LFS_UNUSED_INUM;
    179 				v_daddr = LFS_UNUSED_DADDR;
    180 				continue;
    181 			}
    182 			sp->vp = vp;
    183 			ip = VTOI(vp);
    184 		} else if (v_daddr == LFS_UNUSED_DADDR)
    185 			continue;
    186 
    187 		/* If this BLOCK_INFO didn't contain a block, keep going. */
    188 		if (blkp->bi_lbn == LFS_UNUSED_LBN)
    189 			continue;
    190 		if (VOP_BMAP(vp, blkp->bi_lbn, NULL, &b_daddr, NULL) ||
    191 		    b_daddr != blkp->bi_daddr)
    192 			continue;
    193 		/*
    194 		 * If we got to here, then we are keeping the block.  If it
    195 		 * is an indirect block, we want to actually put it in the
    196 		 * buffer cache so that it can be updated in the finish_meta
    197 		 * section.  If it's not, we need to allocate a fake buffer
    198 		 * so that writeseg can perform the copyin and write the buffer.
    199 		 */
    200 		if (blkp->bi_lbn >= 0)	/* Data Block */
    201 			bp = lfs_fakebuf(vp, blkp->bi_lbn, bsize,
    202 			    blkp->bi_bp);
    203 		else {
    204 			bp = getblk(vp, blkp->bi_lbn, bsize, 0, 0);
    205 			if (!(bp->b_flags & (B_DELWRI | B_DONE | B_CACHE)) &&
    206 			    (error = copyin(blkp->bi_bp, bp->b_data,
    207 			    bsize)))
    208 				goto err2;
    209 			if (error = VOP_BWRITE(bp))
    210 				goto err2;
    211 		}
    212 		while (lfs_gatherblock(sp, bp, NULL));
    213 	}
    214 	if (sp->vp) {
    215 		if (sp->fip->fi_nblocks == 0) {
    216 			DEC_FINFO(sp);
    217 			sp->sum_bytes_left +=
    218 			    sizeof(FINFO) - sizeof(daddr_t);
    219 		} else
    220 			lfs_updatemeta(sp);
    221 
    222 		lfs_writeinode(fs, sp, ip);
    223 		lfs_vunref(vp);
    224 	}
    225 	(void) lfs_writeseg(fs, sp);
    226 	lfs_segunlock(fs);
    227 	free(start, M_SEGMENT);
    228 	return (error);
    229 
    230 /*
    231  * XXX
    232  * If we come in to error 2, we might have indirect blocks that were
    233  * updated and now have bad block pointers.  I don't know what to do
    234  * about this.
    235  */
    236 
    237 err2:	lfs_vunref(vp);
    238 	/* Free up fakebuffers */
    239 	for (bpp = --sp->cbpp; bpp >= sp->bpp; --bpp)
    240 		if ((*bpp)->b_flags & B_CALL) {
    241 			brelvp(*bpp);
    242 			free(*bpp, M_SEGMENT);
    243 		} else
    244 			brelse(*bpp);
    245 	lfs_segunlock(fs);
    246 err1:
    247 	free(start, M_SEGMENT);
    248 	return (error);
    249 }
    250 
    251 /*
    252  * lfs_bmapv:
    253  *
    254  * This will fill in the current disk address for arrays of blocks.
    255  *
    256  *  0 on success
    257  * -1/errno is return on error.
    258  */
    259 struct lfs_bmapv_args {
    260 	fsid_t *fsidp;		/* file system */
    261 	BLOCK_INFO *blkiov;	/* block array */
    262 	int blkcnt;		/* count of block array entries */
    263 };
    264 int
    265 lfs_bmapv(p, uap, retval)
    266 	struct proc *p;
    267 	struct lfs_bmapv_args *uap;
    268 	int *retval;
    269 {
    270 	BLOCK_INFO *blkp;
    271 	struct mount *mntp;
    272 	struct vnode *vp;
    273 	fsid_t fsid;
    274 	void *start;
    275 	daddr_t daddr;
    276 	int cnt, error, step;
    277 
    278 	if (error = suser(p->p_ucred, &p->p_acflag))
    279 		return (error);
    280 
    281 	if (error = copyin(uap->fsidp, &fsid, sizeof(fsid_t)))
    282 		return (error);
    283 	if ((mntp = getvfs(&fsid)) == NULL)
    284 		return (EINVAL);
    285 
    286 	cnt = uap->blkcnt;
    287 	start = blkp = malloc(cnt * sizeof(BLOCK_INFO), M_SEGMENT, M_WAITOK);
    288 	if (error = copyin(uap->blkiov, blkp, cnt * sizeof(BLOCK_INFO))) {
    289 		free(blkp, M_SEGMENT);
    290 		return (error);
    291 	}
    292 
    293 	for (step = cnt; step--; ++blkp) {
    294 		if (blkp->bi_lbn == LFS_UNUSED_LBN)
    295 			continue;
    296 		/* Could be a deadlock ? */
    297 		if (VFS_VGET(mntp, blkp->bi_inode, &vp))
    298 			daddr = LFS_UNUSED_DADDR;
    299 		else {
    300 			if (VOP_BMAP(vp, blkp->bi_lbn, NULL, &daddr, NULL))
    301 				daddr = LFS_UNUSED_DADDR;
    302 			vput(vp);
    303 		}
    304 		blkp->bi_daddr = daddr;
    305         }
    306 	copyout(start, uap->blkiov, cnt * sizeof(BLOCK_INFO));
    307 	free(start, M_SEGMENT);
    308 	return (0);
    309 }
    310 
    311 /*
    312  * lfs_segclean:
    313  *
    314  * Mark the segment clean.
    315  *
    316  *  0 on success
    317  * -1/errno is return on error.
    318  */
    319 struct lfs_segclean_args {
    320 	fsid_t *fsidp;		/* file system */
    321 	u_long segment;		/* segment number */
    322 };
    323 int
    324 lfs_segclean(p, uap, retval)
    325 	struct proc *p;
    326 	struct lfs_segclean_args *uap;
    327 	int *retval;
    328 {
    329 	CLEANERINFO *cip;
    330 	SEGUSE *sup;
    331 	struct buf *bp;
    332 	struct mount *mntp;
    333 	struct lfs *fs;
    334 	fsid_t fsid;
    335 	int error;
    336 
    337 	if (error = suser(p->p_ucred, &p->p_acflag))
    338 		return (error);
    339 
    340 	if (error = copyin(uap->fsidp, &fsid, sizeof(fsid_t)))
    341 		return (error);
    342 	if ((mntp = getvfs(&fsid)) == NULL)
    343 		return (EINVAL);
    344 
    345 	fs = VFSTOUFS(mntp)->um_lfs;
    346 
    347 	if (datosn(fs, fs->lfs_curseg) == uap->segment)
    348 		return (EBUSY);
    349 
    350 	LFS_SEGENTRY(sup, fs, uap->segment, bp);
    351 	if (sup->su_flags & SEGUSE_ACTIVE) {
    352 		brelse(bp);
    353 		return (EBUSY);
    354 	}
    355 	fs->lfs_avail += fsbtodb(fs, fs->lfs_ssize) - 1;
    356 	fs->lfs_bfree += (sup->su_nsums * LFS_SUMMARY_SIZE / DEV_BSIZE) +
    357 	    sup->su_ninos * btodb(fs->lfs_bsize);
    358 	sup->su_flags &= ~SEGUSE_DIRTY;
    359 	(void) VOP_BWRITE(bp);
    360 
    361 	LFS_CLEANERINFO(cip, fs, bp);
    362 	++cip->clean;
    363 	--cip->dirty;
    364 	(void) VOP_BWRITE(bp);
    365 	wakeup(&fs->lfs_avail);
    366 	return (0);
    367 }
    368 
    369 /*
    370  * lfs_segwait:
    371  *
    372  * This will block until a segment in file system fsid is written.  A timeout
    373  * in milliseconds may be specified which will awake the cleaner automatically.
    374  * An fsid of -1 means any file system, and a timeout of 0 means forever.
    375  *
    376  *  0 on success
    377  *  1 on timeout
    378  * -1/errno is return on error.
    379  */
    380 struct lfs_segwait_args {
    381 	fsid_t *fsidp;		/* file system */
    382 	struct timeval *tv;	/* timeout */
    383 };
    384 int
    385 lfs_segwait(p, uap, retval)
    386 	struct proc *p;
    387 	struct lfs_segwait_args *uap;
    388 	int *retval;
    389 {
    390 	extern int lfs_allclean_wakeup;
    391 	struct mount *mntp;
    392 	struct timeval atv;
    393 	fsid_t fsid;
    394 	void *addr;
    395 	u_long timeout;
    396 	int error, s;
    397 
    398 	if (error = suser(p->p_ucred, &p->p_acflag)) {
    399 		return (error);
    400 }
    401 #ifdef WHEN_QUADS_WORK
    402 	if (error = copyin(uap->fsidp, &fsid, sizeof(fsid_t)))
    403 		return (error);
    404 	if (fsid == (fsid_t)-1)
    405 		addr = &lfs_allclean_wakeup;
    406 	else {
    407 		if ((mntp = getvfs(&fsid)) == NULL)
    408 			return (EINVAL);
    409 		addr = &VFSTOUFS(mntp)->um_lfs->lfs_nextseg;
    410 	}
    411 #else
    412 	if (error = copyin(uap->fsidp, &fsid, sizeof(fsid_t)))
    413 		return (error);
    414 	if ((mntp = getvfs(&fsid)) == NULL)
    415 		addr = &lfs_allclean_wakeup;
    416 	else
    417 		addr = &VFSTOUFS(mntp)->um_lfs->lfs_nextseg;
    418 #endif
    419 
    420 	if (uap->tv) {
    421 		if (error = copyin(uap->tv, &atv, sizeof(struct timeval)))
    422 			return (error);
    423 		if (itimerfix(&atv))
    424 			return (EINVAL);
    425 		s = splclock();
    426 		timevaladd(&atv, (struct timeval *)&time);
    427 		timeout = hzto(&atv);
    428 		splx(s);
    429 	} else
    430 		timeout = 0;
    431 
    432 	error = tsleep(addr, PCATCH | PUSER, "segment", timeout);
    433 	return (error == ERESTART ? EINTR : 0);
    434 }
    435 
    436 /*
    437  * VFS_VGET call specialized for the cleaner.  The cleaner already knows the
    438  * daddr from the ifile, so don't look it up again.  If the cleaner is
    439  * processing IINFO structures, it may have the ondisk inode already, so
    440  * don't go retrieving it again.
    441  */
    442 int
    443 lfs_fastvget(mp, ino, daddr, vpp, dinp)
    444 	struct mount *mp;
    445 	ino_t ino;
    446 	daddr_t daddr;
    447 	struct vnode **vpp;
    448 	struct dinode *dinp;
    449 {
    450 	register struct inode *ip;
    451 	struct vnode *vp;
    452 	struct ufsmount *ump;
    453 	struct buf *bp;
    454 	dev_t dev;
    455 	int error;
    456 
    457 	ump = VFSTOUFS(mp);
    458 	dev = ump->um_dev;
    459 	/*
    460 	 * This is playing fast and loose.  Someone may have the inode
    461 	 * locked, in which case they are going to be distinctly unhappy
    462 	 * if we trash something.
    463 	 */
    464 	if ((*vpp = ufs_ihashlookup(dev, ino)) != NULL) {
    465 		lfs_vref(*vpp);
    466 		if ((*vpp)->v_flag & VXLOCK)
    467 			printf ("Cleaned vnode VXLOCKED\n");
    468 		ip = VTOI(*vpp);
    469 		if (ip->i_flag & IN_LOCKED)
    470 			printf("cleaned vnode locked\n");
    471 		if (!(ip->i_flag & IN_MODIFIED)) {
    472 			++ump->um_lfs->lfs_uinodes;
    473 			ip->i_flag |= IN_MODIFIED;
    474 		}
    475 		ip->i_flag |= IN_MODIFIED;
    476 		return (0);
    477 	}
    478 
    479 	/* Allocate new vnode/inode. */
    480 	if (error = lfs_vcreate(mp, ino, &vp)) {
    481 		*vpp = NULL;
    482 		return (error);
    483 	}
    484 
    485 	/*
    486 	 * Put it onto its hash chain and lock it so that other requests for
    487 	 * this inode will block if they arrive while we are sleeping waiting
    488 	 * for old data structures to be purged or for the contents of the
    489 	 * disk portion of this inode to be read.
    490 	 */
    491 	ip = VTOI(vp);
    492 	ufs_ihashins(ip);
    493 
    494 	/*
    495 	 * XXX
    496 	 * This may not need to be here, logically it should go down with
    497 	 * the i_devvp initialization.
    498 	 * Ask Kirk.
    499 	 */
    500 	ip->i_lfs = ump->um_lfs;
    501 
    502 	/* Read in the disk contents for the inode, copy into the inode. */
    503 	if (dinp)
    504 		if (error = copyin(dinp, &ip->i_din, sizeof(struct dinode)))
    505 			return (error);
    506 	else {
    507 		if (error = bread(ump->um_devvp, daddr,
    508 		    (int)ump->um_lfs->lfs_bsize, NOCRED, &bp)) {
    509 			/*
    510 			 * The inode does not contain anything useful, so it
    511 			 * would be misleading to leave it on its hash chain.
    512 			 * Iput() will return it to the free list.
    513 			 */
    514 			ufs_ihashrem(ip);
    515 
    516 			/* Unlock and discard unneeded inode. */
    517 			lfs_vunref(vp);
    518 			brelse(bp);
    519 			*vpp = NULL;
    520 			return (error);
    521 		}
    522 		ip->i_din =
    523 		    *lfs_ifind(ump->um_lfs, ino, (struct dinode *)bp->b_data);
    524 		brelse(bp);
    525 	}
    526 
    527 	/* Inode was just read from user space or disk, make sure it's locked */
    528 	ip->i_flag |= IN_LOCKED;
    529 
    530 	/*
    531 	 * Initialize the vnode from the inode, check for aliases.  In all
    532 	 * cases re-init ip, the underlying vnode/inode may have changed.
    533 	 */
    534 	if (error = ufs_vinit(mp, lfs_specop_p, LFS_FIFOOPS, &vp)) {
    535 		lfs_vunref(vp);
    536 		*vpp = NULL;
    537 		return (error);
    538 	}
    539 	/*
    540 	 * Finish inode initialization now that aliasing has been resolved.
    541 	 */
    542 	ip->i_devvp = ump->um_devvp;
    543 	ip->i_flag |= IN_MODIFIED;
    544 	++ump->um_lfs->lfs_uinodes;
    545 	VREF(ip->i_devvp);
    546 	*vpp = vp;
    547 	return (0);
    548 }
    549 struct buf *
    550 lfs_fakebuf(vp, lbn, size, uaddr)
    551 	struct vnode *vp;
    552 	int lbn;
    553 	size_t size;
    554 	caddr_t uaddr;
    555 {
    556 	struct buf *bp;
    557 
    558 	bp = lfs_newbuf(vp, lbn, 0);
    559 	bp->b_saveaddr = uaddr;
    560 	bp->b_bufsize = size;
    561 	bp->b_bcount = size;
    562 	bp->b_flags |= B_INVAL;
    563 	return (bp);
    564 }
    565