Home | History | Annotate | Line # | Download | only in lfs
lfs_syscalls.c revision 1.8
      1 /*	$NetBSD: lfs_syscalls.c,v 1.8 1995/03/21 13:34:08 mycroft Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 1991, 1993, 1994
      5  *	The Regents of the University of California.  All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  * 3. All advertising materials mentioning features or use of this software
     16  *    must display the following acknowledgement:
     17  *	This product includes software developed by the University of
     18  *	California, Berkeley and its contributors.
     19  * 4. Neither the name of the University nor the names of its contributors
     20  *    may be used to endorse or promote products derived from this software
     21  *    without specific prior written permission.
     22  *
     23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     33  * SUCH DAMAGE.
     34  *
     35  *	@(#)lfs_syscalls.c	8.6 (Berkeley) 6/16/94
     36  */
     37 
     38 #include <sys/param.h>
     39 #include <sys/systm.h>
     40 #include <sys/proc.h>
     41 #include <sys/buf.h>
     42 #include <sys/mount.h>
     43 #include <sys/vnode.h>
     44 #include <sys/malloc.h>
     45 #include <sys/kernel.h>
     46 
     47 #include <sys/syscallargs.h>
     48 
     49 #include <ufs/ufs/quota.h>
     50 #include <ufs/ufs/inode.h>
     51 #include <ufs/ufs/ufsmount.h>
     52 #include <ufs/ufs/ufs_extern.h>
     53 
     54 #include <ufs/lfs/lfs.h>
     55 #include <ufs/lfs/lfs_extern.h>
     56 #define BUMP_FIP(SP) \
     57 	(SP)->fip = (FINFO *) (&(SP)->fip->fi_blocks[(SP)->fip->fi_nblocks])
     58 
     59 #define INC_FINFO(SP) ++((SEGSUM *)((SP)->segsum))->ss_nfinfo
     60 #define DEC_FINFO(SP) --((SEGSUM *)((SP)->segsum))->ss_nfinfo
     61 
     62 /*
     63  * Before committing to add something to a segment summary, make sure there
     64  * is enough room.  S is the bytes added to the summary.
     65  */
     66 #define	CHECK_SEG(s)			\
     67 if (sp->sum_bytes_left < (s)) {		\
     68 	(void) lfs_writeseg(fs, sp);	\
     69 }
     70 struct buf *lfs_fakebuf __P((struct vnode *, int, size_t, caddr_t));
     71 
     72 /*
     73  * lfs_markv:
     74  *
     75  * This will mark inodes and blocks dirty, so they are written into the log.
     76  * It will block until all the blocks have been written.  The segment create
     77  * time passed in the block_info and inode_info structures is used to decide
     78  * if the data is valid for each block (in case some process dirtied a block
     79  * or inode that is being cleaned between the determination that a block is
     80  * live and the lfs_markv call).
     81  *
     82  *  0 on success
     83  * -1/errno is return on error.
     84  */
     85 int
     86 lfs_markv(p, uap, retval)
     87 	struct proc *p;
     88 	struct lfs_markv_args /* {
     89 		syscallarg(fsid_t *) fsidp;
     90 		syscallarg(struct block_info *) blkiov;
     91 		syscallarg(int) blkcnt;
     92 	} */ *uap;
     93 	register_t *retval;
     94 {
     95 	struct segment *sp;
     96 	BLOCK_INFO *blkp;
     97 	IFILE *ifp;
     98 	struct buf *bp, **bpp;
     99 	struct inode *ip;
    100 	struct lfs *fs;
    101 	struct mount *mntp;
    102 	struct vnode *vp;
    103 	fsid_t fsid;
    104 	void *start;
    105 	ino_t lastino;
    106 	daddr_t b_daddr, v_daddr;
    107 	u_long bsize;
    108 	int cnt, error;
    109 
    110 	if (error = suser(p->p_ucred, &p->p_acflag))
    111 		return (error);
    112 
    113 	if (error = copyin(SCARG(uap, fsidp), &fsid, sizeof(fsid_t)))
    114 		return (error);
    115 	if ((mntp = getvfs(&fsid)) == NULL)
    116 		return (EINVAL);
    117 
    118 	cnt = SCARG(uap, blkcnt);
    119 	start = malloc(cnt * sizeof(BLOCK_INFO), M_SEGMENT, M_WAITOK);
    120 	if (error = copyin(SCARG(uap, blkiov), start, cnt * sizeof(BLOCK_INFO)))
    121 		goto err1;
    122 
    123 	/* Mark blocks/inodes dirty.  */
    124 	fs = VFSTOUFS(mntp)->um_lfs;
    125 	bsize = fs->lfs_bsize;
    126 	error = 0;
    127 
    128 	lfs_seglock(fs, SEGM_SYNC | SEGM_CLEAN);
    129 	sp = fs->lfs_sp;
    130 	for (v_daddr = LFS_UNUSED_DADDR, lastino = LFS_UNUSED_INUM,
    131 	    blkp = start; cnt--; ++blkp) {
    132 		/*
    133 		 * Get the IFILE entry (only once) and see if the file still
    134 		 * exists.
    135 		 */
    136 		if (lastino != blkp->bi_inode) {
    137 			if (lastino != LFS_UNUSED_INUM) {
    138 				/* Finish up last file */
    139 				if (sp->fip->fi_nblocks == 0) {
    140 					DEC_FINFO(sp);
    141 					sp->sum_bytes_left +=
    142 					    sizeof(FINFO) - sizeof(daddr_t);
    143 				} else {
    144 					lfs_updatemeta(sp);
    145 					BUMP_FIP(sp);
    146 				}
    147 
    148 				lfs_writeinode(fs, sp, ip);
    149 				lfs_vunref(vp);
    150 			}
    151 
    152 			/* Start a new file */
    153 			CHECK_SEG(sizeof(FINFO));
    154 			sp->sum_bytes_left -= sizeof(FINFO) - sizeof(daddr_t);
    155 			INC_FINFO(sp);
    156 			sp->start_lbp = &sp->fip->fi_blocks[0];
    157 			sp->vp = NULL;
    158 			sp->fip->fi_version = blkp->bi_version;
    159 			sp->fip->fi_nblocks = 0;
    160 			sp->fip->fi_ino = blkp->bi_inode;
    161 			lastino = blkp->bi_inode;
    162 			if (blkp->bi_inode == LFS_IFILE_INUM)
    163 				v_daddr = fs->lfs_idaddr;
    164 			else {
    165 				LFS_IENTRY(ifp, fs, blkp->bi_inode, bp);
    166 				v_daddr = ifp->if_daddr;
    167 				brelse(bp);
    168 			}
    169 			if (v_daddr == LFS_UNUSED_DADDR)
    170 				continue;
    171 
    172 			/* Get the vnode/inode. */
    173 			if (lfs_fastvget(mntp, blkp->bi_inode, v_daddr, &vp,
    174 			    blkp->bi_lbn == LFS_UNUSED_LBN ?
    175 			    blkp->bi_bp : NULL)) {
    176 #ifdef DIAGNOSTIC
    177 				printf("lfs_markv: VFS_VGET failed (%d)\n",
    178 				    blkp->bi_inode);
    179 #endif
    180 				lastino = LFS_UNUSED_INUM;
    181 				v_daddr = LFS_UNUSED_DADDR;
    182 				continue;
    183 			}
    184 			sp->vp = vp;
    185 			ip = VTOI(vp);
    186 		} else if (v_daddr == LFS_UNUSED_DADDR)
    187 			continue;
    188 
    189 		/* If this BLOCK_INFO didn't contain a block, keep going. */
    190 		if (blkp->bi_lbn == LFS_UNUSED_LBN)
    191 			continue;
    192 		if (VOP_BMAP(vp, blkp->bi_lbn, NULL, &b_daddr, NULL) ||
    193 		    b_daddr != blkp->bi_daddr)
    194 			continue;
    195 		/*
    196 		 * If we got to here, then we are keeping the block.  If it
    197 		 * is an indirect block, we want to actually put it in the
    198 		 * buffer cache so that it can be updated in the finish_meta
    199 		 * section.  If it's not, we need to allocate a fake buffer
    200 		 * so that writeseg can perform the copyin and write the buffer.
    201 		 */
    202 		if (blkp->bi_lbn >= 0)	/* Data Block */
    203 			bp = lfs_fakebuf(vp, blkp->bi_lbn, bsize,
    204 			    blkp->bi_bp);
    205 		else {
    206 			bp = getblk(vp, blkp->bi_lbn, bsize, 0, 0);
    207 			if (!(bp->b_flags & (B_DELWRI | B_DONE | B_CACHE)) &&
    208 			    (error = copyin(blkp->bi_bp, bp->b_data,
    209 			    bsize)))
    210 				goto err2;
    211 			if (error = VOP_BWRITE(bp))
    212 				goto err2;
    213 		}
    214 		while (lfs_gatherblock(sp, bp, NULL));
    215 	}
    216 	if (sp->vp) {
    217 		if (sp->fip->fi_nblocks == 0) {
    218 			DEC_FINFO(sp);
    219 			sp->sum_bytes_left +=
    220 			    sizeof(FINFO) - sizeof(daddr_t);
    221 		} else
    222 			lfs_updatemeta(sp);
    223 
    224 		lfs_writeinode(fs, sp, ip);
    225 		lfs_vunref(vp);
    226 	}
    227 	(void) lfs_writeseg(fs, sp);
    228 	lfs_segunlock(fs);
    229 	free(start, M_SEGMENT);
    230 	return (error);
    231 
    232 /*
    233  * XXX
    234  * If we come in to error 2, we might have indirect blocks that were
    235  * updated and now have bad block pointers.  I don't know what to do
    236  * about this.
    237  */
    238 
    239 err2:	lfs_vunref(vp);
    240 	/* Free up fakebuffers */
    241 	for (bpp = --sp->cbpp; bpp >= sp->bpp; --bpp)
    242 		if ((*bpp)->b_flags & B_CALL) {
    243 			brelvp(*bpp);
    244 			free(*bpp, M_SEGMENT);
    245 		} else
    246 			brelse(*bpp);
    247 	lfs_segunlock(fs);
    248 err1:
    249 	free(start, M_SEGMENT);
    250 	return (error);
    251 }
    252 
    253 /*
    254  * lfs_bmapv:
    255  *
    256  * This will fill in the current disk address for arrays of blocks.
    257  *
    258  *  0 on success
    259  * -1/errno is return on error.
    260  */
    261 int
    262 lfs_bmapv(p, uap, retval)
    263 	struct proc *p;
    264 	struct lfs_bmapv_args /* {
    265 		syscallarg(fsid_t *) fsidp;
    266 		syscallarg(struct block_info *) blkiov;
    267 		syscallarg(int) blkcnt;
    268 	} */ *uap;
    269 	register_t *retval;
    270 {
    271 	BLOCK_INFO *blkp;
    272 	struct mount *mntp;
    273 	struct vnode *vp;
    274 	fsid_t fsid;
    275 	void *start;
    276 	daddr_t daddr;
    277 	int cnt, error, step;
    278 
    279 	if (error = suser(p->p_ucred, &p->p_acflag))
    280 		return (error);
    281 
    282 	if (error = copyin(SCARG(uap, fsidp), &fsid, sizeof(fsid_t)))
    283 		return (error);
    284 	if ((mntp = getvfs(&fsid)) == NULL)
    285 		return (EINVAL);
    286 
    287 	cnt = SCARG(uap, blkcnt);
    288 	start = blkp = malloc(cnt * sizeof(BLOCK_INFO), M_SEGMENT, M_WAITOK);
    289 	if (error = copyin(SCARG(uap, blkiov), blkp,
    290 	    cnt * sizeof(BLOCK_INFO))) {
    291 		free(blkp, M_SEGMENT);
    292 		return (error);
    293 	}
    294 
    295 	for (step = cnt; step--; ++blkp) {
    296 		if (blkp->bi_lbn == LFS_UNUSED_LBN)
    297 			continue;
    298 		/* Could be a deadlock ? */
    299 		if (VFS_VGET(mntp, blkp->bi_inode, &vp))
    300 			daddr = LFS_UNUSED_DADDR;
    301 		else {
    302 			if (VOP_BMAP(vp, blkp->bi_lbn, NULL, &daddr, NULL))
    303 				daddr = LFS_UNUSED_DADDR;
    304 			vput(vp);
    305 		}
    306 		blkp->bi_daddr = daddr;
    307         }
    308 	copyout(start, SCARG(uap, blkiov), cnt * sizeof(BLOCK_INFO));
    309 	free(start, M_SEGMENT);
    310 	return (0);
    311 }
    312 
    313 /*
    314  * lfs_segclean:
    315  *
    316  * Mark the segment clean.
    317  *
    318  *  0 on success
    319  * -1/errno is return on error.
    320  */
    321 int
    322 lfs_segclean(p, uap, retval)
    323 	struct proc *p;
    324 	struct lfs_segclean_args /* {
    325 		syscallarg(fsid_t *) fsidp;
    326 		syscallarg(u_long) segment;
    327 	} */ *uap;
    328 	register_t *retval;
    329 {
    330 	CLEANERINFO *cip;
    331 	SEGUSE *sup;
    332 	struct buf *bp;
    333 	struct mount *mntp;
    334 	struct lfs *fs;
    335 	fsid_t fsid;
    336 	int error;
    337 
    338 	if (error = suser(p->p_ucred, &p->p_acflag))
    339 		return (error);
    340 
    341 	if (error = copyin(SCARG(uap, fsidp), &fsid, sizeof(fsid_t)))
    342 		return (error);
    343 	if ((mntp = getvfs(&fsid)) == NULL)
    344 		return (EINVAL);
    345 
    346 	fs = VFSTOUFS(mntp)->um_lfs;
    347 
    348 	if (datosn(fs, fs->lfs_curseg) == SCARG(uap, segment))
    349 		return (EBUSY);
    350 
    351 	LFS_SEGENTRY(sup, fs, SCARG(uap, segment), bp);
    352 	if (sup->su_flags & SEGUSE_ACTIVE) {
    353 		brelse(bp);
    354 		return (EBUSY);
    355 	}
    356 	fs->lfs_avail += fsbtodb(fs, fs->lfs_ssize) - 1;
    357 	fs->lfs_bfree += (sup->su_nsums * LFS_SUMMARY_SIZE / DEV_BSIZE) +
    358 	    sup->su_ninos * btodb(fs->lfs_bsize);
    359 	sup->su_flags &= ~SEGUSE_DIRTY;
    360 	(void) VOP_BWRITE(bp);
    361 
    362 	LFS_CLEANERINFO(cip, fs, bp);
    363 	++cip->clean;
    364 	--cip->dirty;
    365 	(void) VOP_BWRITE(bp);
    366 	wakeup(&fs->lfs_avail);
    367 	return (0);
    368 }
    369 
    370 /*
    371  * lfs_segwait:
    372  *
    373  * This will block until a segment in file system fsid is written.  A timeout
    374  * in milliseconds may be specified which will awake the cleaner automatically.
    375  * An fsid of -1 means any file system, and a timeout of 0 means forever.
    376  *
    377  *  0 on success
    378  *  1 on timeout
    379  * -1/errno is return on error.
    380  */
    381 int
    382 lfs_segwait(p, uap, retval)
    383 	struct proc *p;
    384 	struct lfs_segwait_args /* {
    385 		syscallarg(fsid_t *) fsidp;
    386 		syscallarg(struct timeval *) tv;
    387 	} */ *uap;
    388 	register_t *retval;
    389 {
    390 	extern int lfs_allclean_wakeup;
    391 	struct mount *mntp;
    392 	struct timeval atv;
    393 	fsid_t fsid;
    394 	void *addr;
    395 	u_long timeout;
    396 	int error, s;
    397 
    398 	if (error = suser(p->p_ucred, &p->p_acflag)) {
    399 		return (error);
    400 }
    401 #ifdef WHEN_QUADS_WORK
    402 	if (error = copyin(SCARG(uap, fsidp), &fsid, sizeof(fsid_t)))
    403 		return (error);
    404 	if (fsid == (fsid_t)-1)
    405 		addr = &lfs_allclean_wakeup;
    406 	else {
    407 		if ((mntp = getvfs(&fsid)) == NULL)
    408 			return (EINVAL);
    409 		addr = &VFSTOUFS(mntp)->um_lfs->lfs_nextseg;
    410 	}
    411 #else
    412 	if (error = copyin(SCARG(uap, fsidp), &fsid, sizeof(fsid_t)))
    413 		return (error);
    414 	if ((mntp = getvfs(&fsid)) == NULL)
    415 		addr = &lfs_allclean_wakeup;
    416 	else
    417 		addr = &VFSTOUFS(mntp)->um_lfs->lfs_nextseg;
    418 #endif
    419 
    420 	if (SCARG(uap, tv)) {
    421 		if (error =
    422 		    copyin(SCARG(uap, tv), &atv, sizeof(struct timeval)))
    423 			return (error);
    424 		if (itimerfix(&atv))
    425 			return (EINVAL);
    426 		s = splclock();
    427 		timeradd(&atv, &time, &atv);
    428 		timeout = hzto(&atv);
    429 		splx(s);
    430 	} else
    431 		timeout = 0;
    432 
    433 	error = tsleep(addr, PCATCH | PUSER, "segment", timeout);
    434 	return (error == ERESTART ? EINTR : 0);
    435 }
    436 
    437 /*
    438  * VFS_VGET call specialized for the cleaner.  The cleaner already knows the
    439  * daddr from the ifile, so don't look it up again.  If the cleaner is
    440  * processing IINFO structures, it may have the ondisk inode already, so
    441  * don't go retrieving it again.
    442  */
    443 int
    444 lfs_fastvget(mp, ino, daddr, vpp, dinp)
    445 	struct mount *mp;
    446 	ino_t ino;
    447 	daddr_t daddr;
    448 	struct vnode **vpp;
    449 	struct dinode *dinp;
    450 {
    451 	register struct inode *ip;
    452 	struct vnode *vp;
    453 	struct ufsmount *ump;
    454 	struct buf *bp;
    455 	dev_t dev;
    456 	int error;
    457 
    458 	ump = VFSTOUFS(mp);
    459 	dev = ump->um_dev;
    460 	/*
    461 	 * This is playing fast and loose.  Someone may have the inode
    462 	 * locked, in which case they are going to be distinctly unhappy
    463 	 * if we trash something.
    464 	 */
    465 	if ((*vpp = ufs_ihashlookup(dev, ino)) != NULL) {
    466 		lfs_vref(*vpp);
    467 		if ((*vpp)->v_flag & VXLOCK)
    468 			printf ("Cleaned vnode VXLOCKED\n");
    469 		ip = VTOI(*vpp);
    470 		if (ip->i_flag & IN_LOCKED)
    471 			printf("cleaned vnode locked\n");
    472 		if (!(ip->i_flag & IN_MODIFIED)) {
    473 			++ump->um_lfs->lfs_uinodes;
    474 			ip->i_flag |= IN_MODIFIED;
    475 		}
    476 		ip->i_flag |= IN_MODIFIED;
    477 		return (0);
    478 	}
    479 
    480 	/* Allocate new vnode/inode. */
    481 	if (error = lfs_vcreate(mp, ino, &vp)) {
    482 		*vpp = NULL;
    483 		return (error);
    484 	}
    485 
    486 	/*
    487 	 * Put it onto its hash chain and lock it so that other requests for
    488 	 * this inode will block if they arrive while we are sleeping waiting
    489 	 * for old data structures to be purged or for the contents of the
    490 	 * disk portion of this inode to be read.
    491 	 */
    492 	ip = VTOI(vp);
    493 	ufs_ihashins(ip);
    494 
    495 	/*
    496 	 * XXX
    497 	 * This may not need to be here, logically it should go down with
    498 	 * the i_devvp initialization.
    499 	 * Ask Kirk.
    500 	 */
    501 	ip->i_lfs = ump->um_lfs;
    502 
    503 	/* Read in the disk contents for the inode, copy into the inode. */
    504 	if (dinp)
    505 		if (error = copyin(dinp, &ip->i_din, sizeof(struct dinode)))
    506 			return (error);
    507 	else {
    508 		if (error = bread(ump->um_devvp, daddr,
    509 		    (int)ump->um_lfs->lfs_bsize, NOCRED, &bp)) {
    510 			/*
    511 			 * The inode does not contain anything useful, so it
    512 			 * would be misleading to leave it on its hash chain.
    513 			 * Iput() will return it to the free list.
    514 			 */
    515 			ufs_ihashrem(ip);
    516 
    517 			/* Unlock and discard unneeded inode. */
    518 			lfs_vunref(vp);
    519 			brelse(bp);
    520 			*vpp = NULL;
    521 			return (error);
    522 		}
    523 		ip->i_din =
    524 		    *lfs_ifind(ump->um_lfs, ino, (struct dinode *)bp->b_data);
    525 		brelse(bp);
    526 	}
    527 
    528 	/* Inode was just read from user space or disk, make sure it's locked */
    529 	ip->i_flag |= IN_LOCKED;
    530 
    531 	/*
    532 	 * Initialize the vnode from the inode, check for aliases.  In all
    533 	 * cases re-init ip, the underlying vnode/inode may have changed.
    534 	 */
    535 	if (error = ufs_vinit(mp, lfs_specop_p, LFS_FIFOOPS, &vp)) {
    536 		lfs_vunref(vp);
    537 		*vpp = NULL;
    538 		return (error);
    539 	}
    540 	/*
    541 	 * Finish inode initialization now that aliasing has been resolved.
    542 	 */
    543 	ip->i_devvp = ump->um_devvp;
    544 	ip->i_flag |= IN_MODIFIED;
    545 	++ump->um_lfs->lfs_uinodes;
    546 	VREF(ip->i_devvp);
    547 	*vpp = vp;
    548 	return (0);
    549 }
    550 struct buf *
    551 lfs_fakebuf(vp, lbn, size, uaddr)
    552 	struct vnode *vp;
    553 	int lbn;
    554 	size_t size;
    555 	caddr_t uaddr;
    556 {
    557 	struct buf *bp;
    558 
    559 	bp = lfs_newbuf(vp, lbn, 0);
    560 	bp->b_saveaddr = uaddr;
    561 	bp->b_bufsize = size;
    562 	bp->b_bcount = size;
    563 	bp->b_flags |= B_INVAL;
    564 	return (bp);
    565 }
    566