Home | History | Annotate | Line # | Download | only in ffs
ffs_vfsops.c revision 1.37
      1 /*	$NetBSD: ffs_vfsops.c,v 1.37 1998/06/09 07:46:33 scottr Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 1989, 1991, 1993, 1994
      5  *	The Regents of the University of California.  All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  * 3. All advertising materials mentioning features or use of this software
     16  *    must display the following acknowledgement:
     17  *	This product includes software developed by the University of
     18  *	California, Berkeley and its contributors.
     19  * 4. Neither the name of the University nor the names of its contributors
     20  *    may be used to endorse or promote products derived from this software
     21  *    without specific prior written permission.
     22  *
     23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     33  * SUCH DAMAGE.
     34  *
     35  *	@(#)ffs_vfsops.c	8.31 (Berkeley) 5/20/95
     36  */
     37 
     38 #if defined(_KERNEL) && !defined(_LKM)
     39 #include "opt_quota.h"
     40 #endif
     41 
     42 #include <sys/param.h>
     43 #include <sys/systm.h>
     44 #include <sys/namei.h>
     45 #include <sys/proc.h>
     46 #include <sys/kernel.h>
     47 #include <sys/vnode.h>
     48 #include <sys/socket.h>
     49 #include <sys/mount.h>
     50 #include <sys/buf.h>
     51 #include <sys/device.h>
     52 #include <sys/mbuf.h>
     53 #include <sys/file.h>
     54 #include <sys/disklabel.h>
     55 #include <sys/ioctl.h>
     56 #include <sys/errno.h>
     57 #include <sys/malloc.h>
     58 #include <sys/lock.h>
     59 #include <vm/vm.h>
     60 #include <sys/sysctl.h>
     61 
     62 #include <miscfs/specfs/specdev.h>
     63 
     64 #include <ufs/ufs/quota.h>
     65 #include <ufs/ufs/ufsmount.h>
     66 #include <ufs/ufs/inode.h>
     67 #include <ufs/ufs/dir.h>
     68 #include <ufs/ufs/ufs_extern.h>
     69 #include <ufs/ufs/ufs_bswap.h>
     70 
     71 #include <ufs/ffs/fs.h>
     72 #include <ufs/ffs/ffs_extern.h>
     73 
     74 extern struct lock ufs_hashlock;
     75 
     76 int ffs_sbupdate __P((struct ufsmount *, int));
     77 
     78 extern struct vnodeopv_desc ffs_vnodeop_opv_desc;
     79 extern struct vnodeopv_desc ffs_specop_opv_desc;
     80 #ifdef FIFO
     81 extern struct vnodeopv_desc ffs_fifoop_opv_desc;
     82 #endif
     83 
     84 struct vnodeopv_desc *ffs_vnodeopv_descs[] = {
     85 	&ffs_vnodeop_opv_desc,
     86 	&ffs_specop_opv_desc,
     87 #ifdef FIFO
     88 	&ffs_fifoop_opv_desc,
     89 #endif
     90 	NULL,
     91 };
     92 
     93 struct vfsops ffs_vfsops = {
     94 	MOUNT_FFS,
     95 	ffs_mount,
     96 	ufs_start,
     97 	ffs_unmount,
     98 	ufs_root,
     99 	ufs_quotactl,
    100 	ffs_statfs,
    101 	ffs_sync,
    102 	ffs_vget,
    103 	ffs_fhtovp,
    104 	ffs_vptofh,
    105 	ffs_init,
    106 	ffs_sysctl,
    107 	ffs_mountroot,
    108 	ffs_vnodeopv_descs,
    109 };
    110 
    111 /*
    112  * Called by main() when ffs is going to be mounted as root.
    113  */
    114 
    115 int
    116 ffs_mountroot()
    117 {
    118 	extern struct vnode *rootvp;
    119 	struct fs *fs;
    120 	struct mount *mp;
    121 	struct proc *p = curproc;	/* XXX */
    122 	struct ufsmount *ump;
    123 	int error;
    124 
    125 	if (root_device->dv_class != DV_DISK)
    126 		return (ENODEV);
    127 
    128 	/*
    129 	 * Get vnodes for rootdev.
    130 	 */
    131 	if (bdevvp(rootdev, &rootvp))
    132 		panic("ffs_mountroot: can't setup bdevvp's");
    133 
    134 	if ((error = vfs_rootmountalloc(MOUNT_FFS, "root_device", &mp)))
    135 		return (error);
    136 	if ((error = ffs_mountfs(rootvp, mp, p)) != 0) {
    137 		mp->mnt_op->vfs_refcount--;
    138 		vfs_unbusy(mp);
    139 		free(mp, M_MOUNT);
    140 		return (error);
    141 	}
    142 	simple_lock(&mountlist_slock);
    143 	CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list);
    144 	simple_unlock(&mountlist_slock);
    145 	ump = VFSTOUFS(mp);
    146 	fs = ump->um_fs;
    147 	bzero(fs->fs_fsmnt, sizeof(fs->fs_fsmnt));
    148 	(void)copystr(mp->mnt_stat.f_mntonname, fs->fs_fsmnt, MNAMELEN - 1, 0);
    149 	(void)ffs_statfs(mp, &mp->mnt_stat, p);
    150 	vfs_unbusy(mp);
    151 	inittodr(fs->fs_time);
    152 	return (0);
    153 }
    154 
    155 /*
    156  * VFS Operations.
    157  *
    158  * mount system call
    159  */
    160 int
    161 ffs_mount(mp, path, data, ndp, p)
    162 	register struct mount *mp;
    163 	const char *path;
    164 	void *data;
    165 	struct nameidata *ndp;
    166 	struct proc *p;
    167 {
    168 	struct vnode *devvp;
    169 	struct ufs_args args;
    170 	struct ufsmount *ump = NULL;
    171 	register struct fs *fs;
    172 	size_t size;
    173 	int error, flags;
    174 	mode_t accessmode;
    175 
    176 	error = copyin(data, (caddr_t)&args, sizeof (struct ufs_args));
    177 	if (error)
    178 		return (error);
    179 	/*
    180 	 * If updating, check whether changing from read-only to
    181 	 * read/write; if there is no device name, that's all we do.
    182 	 */
    183 	if (mp->mnt_flag & MNT_UPDATE) {
    184 		ump = VFSTOUFS(mp);
    185 		fs = ump->um_fs;
    186 		if (fs->fs_ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) {
    187 			flags = WRITECLOSE;
    188 			if (mp->mnt_flag & MNT_FORCE)
    189 				flags |= FORCECLOSE;
    190 			error = ffs_flushfiles(mp, flags, p);
    191 			if (error == 0 &&
    192 			    ffs_cgupdate(ump, MNT_WAIT) == 0 &&
    193 			    fs->fs_clean & FS_WASCLEAN) {
    194 				fs->fs_clean = FS_ISCLEAN;
    195 				(void) ffs_sbupdate(ump, MNT_WAIT);
    196 			}
    197 			if (error)
    198 				return (error);
    199 			fs->fs_ronly = 1;
    200 		}
    201 		if (mp->mnt_flag & MNT_RELOAD) {
    202 			error = ffs_reload(mp, ndp->ni_cnd.cn_cred, p);
    203 			if (error)
    204 				return (error);
    205 		}
    206 		if (fs->fs_ronly && (mp->mnt_flag & MNT_WANTRDWR)) {
    207 			/*
    208 			 * If upgrade to read-write by non-root, then verify
    209 			 * that user has necessary permissions on the device.
    210 			 */
    211 			if (p->p_ucred->cr_uid != 0) {
    212 				devvp = ump->um_devvp;
    213 				vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
    214 				error = VOP_ACCESS(devvp, VREAD | VWRITE,
    215 						   p->p_ucred, p);
    216 				VOP_UNLOCK(devvp, 0);
    217 				if (error)
    218 					return (error);
    219 			}
    220 			fs->fs_ronly = 0;
    221 			fs->fs_clean <<= 1;
    222 			fs->fs_fmod = 1;
    223 		}
    224 		if (args.fspec == 0) {
    225 			/*
    226 			 * Process export requests.
    227 			 */
    228 			return (vfs_export(mp, &ump->um_export, &args.export));
    229 		}
    230 	}
    231 	/*
    232 	 * Not an update, or updating the name: look up the name
    233 	 * and verify that it refers to a sensible block device.
    234 	 */
    235 	NDINIT(ndp, LOOKUP, FOLLOW, UIO_USERSPACE, args.fspec, p);
    236 	if ((error = namei(ndp)) != 0)
    237 		return (error);
    238 	devvp = ndp->ni_vp;
    239 
    240 	if (devvp->v_type != VBLK) {
    241 		vrele(devvp);
    242 		return (ENOTBLK);
    243 	}
    244 	if (major(devvp->v_rdev) >= nblkdev) {
    245 		vrele(devvp);
    246 		return (ENXIO);
    247 	}
    248 	/*
    249 	 * If mount by non-root, then verify that user has necessary
    250 	 * permissions on the device.
    251 	 */
    252 	if (p->p_ucred->cr_uid != 0) {
    253 		accessmode = VREAD;
    254 		if ((mp->mnt_flag & MNT_RDONLY) == 0)
    255 			accessmode |= VWRITE;
    256 		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
    257 		error = VOP_ACCESS(devvp, accessmode, p->p_ucred, p);
    258 		VOP_UNLOCK(devvp, 0);
    259 		if (error) {
    260 			vrele(devvp);
    261 			return (error);
    262 		}
    263 	}
    264 	if ((mp->mnt_flag & MNT_UPDATE) == 0)
    265 		error = ffs_mountfs(devvp, mp, p);
    266 	else {
    267 		if (devvp != ump->um_devvp)
    268 			error = EINVAL;	/* needs translation */
    269 		else
    270 			vrele(devvp);
    271 	}
    272 	if (error) {
    273 		vrele(devvp);
    274 		return (error);
    275 	}
    276 	ump = VFSTOUFS(mp);
    277 	fs = ump->um_fs;
    278 	(void) copyinstr(path, fs->fs_fsmnt, sizeof(fs->fs_fsmnt) - 1, &size);
    279 	bzero(fs->fs_fsmnt + size, sizeof(fs->fs_fsmnt) - size);
    280 	bcopy(fs->fs_fsmnt, mp->mnt_stat.f_mntonname, MNAMELEN);
    281 	(void) copyinstr(args.fspec, mp->mnt_stat.f_mntfromname, MNAMELEN - 1,
    282 	    &size);
    283 	bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
    284 	if (fs->fs_fmod != 0) {	/* XXX */
    285 		fs->fs_fmod = 0;
    286 		if (fs->fs_clean & FS_WASCLEAN)
    287 			fs->fs_time = time.tv_sec;
    288 		else
    289 			printf("%s: file system not clean (fs_flags=%x); please fsck(8)\n",
    290 			    mp->mnt_stat.f_mntfromname, fs->fs_clean);
    291 		(void) ffs_cgupdate(ump, MNT_WAIT);
    292 	}
    293 	return (0);
    294 }
    295 
    296 /*
    297  * Reload all incore data for a filesystem (used after running fsck on
    298  * the root filesystem and finding things to fix). The filesystem must
    299  * be mounted read-only.
    300  *
    301  * Things to do to update the mount:
    302  *	1) invalidate all cached meta-data.
    303  *	2) re-read superblock from disk.
    304  *	3) re-read summary information from disk.
    305  *	4) invalidate all inactive vnodes.
    306  *	5) invalidate all cached file data.
    307  *	6) re-read inode data for all active vnodes.
    308  */
    309 int
    310 ffs_reload(mountp, cred, p)
    311 	register struct mount *mountp;
    312 	struct ucred *cred;
    313 	struct proc *p;
    314 {
    315 	register struct vnode *vp, *nvp, *devvp;
    316 	struct inode *ip;
    317 	struct buf *bp;
    318 	struct fs *fs, *newfs;
    319 	struct partinfo dpart;
    320 	int i, blks, size, error;
    321 	int32_t *lp;
    322 
    323 	if ((mountp->mnt_flag & MNT_RDONLY) == 0)
    324 		return (EINVAL);
    325 	/*
    326 	 * Step 1: invalidate all cached meta-data.
    327 	 */
    328 	devvp = VFSTOUFS(mountp)->um_devvp;
    329 	if (vinvalbuf(devvp, 0, cred, p, 0, 0))
    330 		panic("ffs_reload: dirty1");
    331 	/*
    332 	 * Step 2: re-read superblock from disk.
    333 	 */
    334 	if (VOP_IOCTL(devvp, DIOCGPART, (caddr_t)&dpart, FREAD, NOCRED, p) != 0)
    335 		size = DEV_BSIZE;
    336 	else
    337 		size = dpart.disklab->d_secsize;
    338 	error = bread(devvp, (ufs_daddr_t)(SBOFF / size), SBSIZE, NOCRED, &bp);
    339 	if (error)
    340 		return (error);
    341 	fs = VFSTOUFS(mountp)->um_fs;
    342 	newfs = malloc(fs->fs_sbsize, M_UFSMNT, M_WAITOK);
    343 	bcopy(bp->b_data, newfs, fs->fs_sbsize);
    344 #ifdef FFS_EI
    345 	if (VFSTOUFS(mountp)->um_flags & UFS_NEEDSWAP)
    346 		ffs_sb_swap((struct fs*)bp->b_data, newfs, 0);
    347 #endif
    348 	if (newfs->fs_magic != FS_MAGIC || newfs->fs_bsize > MAXBSIZE ||
    349 	    newfs->fs_bsize < sizeof(struct fs)) {
    350 		brelse(bp);
    351 		free(newfs, M_UFSMNT);
    352 		return (EIO);		/* XXX needs translation */
    353 	}
    354 	/*
    355 	 * Copy pointer fields back into superblock before copying in	XXX
    356 	 * new superblock. These should really be in the ufsmount.	XXX
    357 	 * Note that important parameters (eg fs_ncg) are unchanged.
    358 	 */
    359 	bcopy(&fs->fs_csp[0], &newfs->fs_csp[0], sizeof(fs->fs_csp));
    360 	newfs->fs_maxcluster = fs->fs_maxcluster;
    361 	bcopy(newfs, fs, (u_int)fs->fs_sbsize);
    362 	if (fs->fs_sbsize < SBSIZE)
    363 		bp->b_flags |= B_INVAL;
    364 	brelse(bp);
    365 	free(newfs, M_UFSMNT);
    366 	mountp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
    367 	ffs_oldfscompat(fs);
    368 	/*
    369 	 * Step 3: re-read summary information from disk.
    370 	 */
    371 	blks = howmany(fs->fs_cssize, fs->fs_fsize);
    372 	for (i = 0; i < blks; i += fs->fs_frag) {
    373 		size = fs->fs_bsize;
    374 		if (i + fs->fs_frag > blks)
    375 			size = (blks - i) * fs->fs_fsize;
    376 		error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
    377 			      NOCRED, &bp);
    378 		if (error)
    379 			return (error);
    380 #ifdef FFS_EI
    381 		if (UFS_MPNEEDSWAP(mountp))
    382 			ffs_csum_swap((struct csum*)bp->b_data,
    383 				(struct csum*)fs->fs_csp[fragstoblks(fs, i)], size);
    384 		else
    385 #endif
    386 			bcopy(bp->b_data, fs->fs_csp[fragstoblks(fs, i)], (u_int)size);
    387 		brelse(bp);
    388 	}
    389 	/*
    390 	 * We no longer know anything about clusters per cylinder group.
    391 	 */
    392 	if (fs->fs_contigsumsize > 0) {
    393 		lp = fs->fs_maxcluster;
    394 		for (i = 0; i < fs->fs_ncg; i++)
    395 			*lp++ = fs->fs_contigsumsize;
    396 	}
    397 
    398 loop:
    399 	simple_lock(&mntvnode_slock);
    400 	for (vp = mountp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) {
    401 		if (vp->v_mount != mountp) {
    402 			simple_unlock(&mntvnode_slock);
    403 			goto loop;
    404 		}
    405 		nvp = vp->v_mntvnodes.le_next;
    406 		/*
    407 		 * Step 4: invalidate all inactive vnodes.
    408 		 */
    409 		if (vrecycle(vp, &mntvnode_slock, p))
    410 			goto loop;
    411 		/*
    412 		 * Step 5: invalidate all cached file data.
    413 		 */
    414 		simple_lock(&vp->v_interlock);
    415 		simple_unlock(&mntvnode_slock);
    416 		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK))
    417 			goto loop;
    418 		if (vinvalbuf(vp, 0, cred, p, 0, 0))
    419 			panic("ffs_reload: dirty2");
    420 		/*
    421 		 * Step 6: re-read inode data for all active vnodes.
    422 		 */
    423 		ip = VTOI(vp);
    424 		error = bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
    425 			      (int)fs->fs_bsize, NOCRED, &bp);
    426 		if (error) {
    427 			vput(vp);
    428 			return (error);
    429 		}
    430 #ifdef FFS_EI
    431 		if (UFS_MPNEEDSWAP(mountp))
    432 			ffs_dinode_swap((struct dinode *)bp->b_data +
    433 				ino_to_fsbo(fs, ip->i_number), &ip->i_din.ffs_din);
    434 		else
    435 #endif
    436 			ip->i_din.ffs_din = *((struct dinode *)bp->b_data +
    437 		    	ino_to_fsbo(fs, ip->i_number));
    438 		brelse(bp);
    439 		vput(vp);
    440 		simple_lock(&mntvnode_slock);
    441 	}
    442 	simple_unlock(&mntvnode_slock);
    443 	return (0);
    444 }
    445 
    446 /*
    447  * Common code for mount and mountroot
    448  */
    449 int
    450 ffs_mountfs(devvp, mp, p)
    451 	register struct vnode *devvp;
    452 	struct mount *mp;
    453 	struct proc *p;
    454 {
    455 	struct ufsmount *ump;
    456 	struct buf *bp;
    457 	struct fs *fs;
    458 	dev_t dev;
    459 	struct partinfo dpart;
    460 	caddr_t base, space;
    461 	int blks;
    462 	int error, i, size, ronly, needswap;
    463 	int32_t *lp;
    464 	struct ucred *cred;
    465 	extern struct vnode *rootvp;
    466 	u_int64_t maxfilesize;					/* XXX */
    467 	u_int32_t sbsize;
    468 
    469 	dev = devvp->v_rdev;
    470 	cred = p ? p->p_ucred : NOCRED;
    471 	/*
    472 	 * Disallow multiple mounts of the same device.
    473 	 * Disallow mounting of a device that is currently in use
    474 	 * (except for root, which might share swap device for miniroot).
    475 	 * Flush out any old buffers remaining from a previous use.
    476 	 */
    477 	if ((error = vfs_mountedon(devvp)) != 0)
    478 		return (error);
    479 	if (vcount(devvp) > 1 && devvp != rootvp)
    480 		return (EBUSY);
    481 	if ((error = vinvalbuf(devvp, V_SAVE, cred, p, 0, 0)) != 0)
    482 		return (error);
    483 
    484 	ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
    485 	error = VOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, FSCRED, p);
    486 	if (error)
    487 		return (error);
    488 	if (VOP_IOCTL(devvp, DIOCGPART, (caddr_t)&dpart, FREAD, cred, p) != 0)
    489 		size = DEV_BSIZE;
    490 	else
    491 		size = dpart.disklab->d_secsize;
    492 
    493 	bp = NULL;
    494 	ump = NULL;
    495 	error = bread(devvp, (ufs_daddr_t)(SBOFF / size), SBSIZE, cred, &bp);
    496 	if (error)
    497 		goto out;
    498 
    499 	fs = (struct fs*)bp->b_data;
    500 	if (fs->fs_magic == FS_MAGIC) {
    501 		needswap = 0;
    502 		sbsize = fs->fs_sbsize;
    503 #ifdef FFS_EI
    504 	} else if (fs->fs_magic == bswap32(FS_MAGIC)) {
    505 		needswap = 1;
    506 		sbsize = bswap32(fs->fs_sbsize);
    507 #endif
    508 	} else {
    509 		error = EINVAL;
    510 		goto out;
    511 
    512 	}
    513 
    514 	fs = malloc((u_long)sbsize, M_UFSMNT, M_WAITOK);
    515 	bcopy(bp->b_data, fs, sbsize);
    516 #ifdef FFS_EI
    517 	if (needswap)
    518 		ffs_sb_swap((struct fs*)bp->b_data, fs, 0);
    519 #endif
    520 
    521 	if (fs->fs_magic != FS_MAGIC || fs->fs_bsize > MAXBSIZE ||
    522 	    fs->fs_bsize < sizeof(struct fs)) {
    523 		error = EINVAL;		/* XXX needs translation */
    524 		goto out2;
    525 	}
    526 	/* XXX updating 4.2 FFS superblocks trashes rotational layout tables */
    527 	if (fs->fs_postblformat == FS_42POSTBLFMT && !ronly) {
    528 		error = EROFS;		/* XXX what should be returned? */
    529 		goto out2;
    530 	}
    531 	ump = malloc(sizeof *ump, M_UFSMNT, M_WAITOK);
    532 	bzero((caddr_t)ump, sizeof *ump);
    533 	ump->um_fs = fs;
    534 	if (fs->fs_sbsize < SBSIZE)
    535 		bp->b_flags |= B_INVAL;
    536 	brelse(bp);
    537 	bp = NULL;
    538 	fs->fs_ronly = ronly;
    539 	if (ronly == 0) {
    540 		fs->fs_clean <<= 1;
    541 		fs->fs_fmod = 1;
    542 	}
    543 	size = fs->fs_cssize;
    544 	blks = howmany(size, fs->fs_fsize);
    545 	if (fs->fs_contigsumsize > 0)
    546 		size += fs->fs_ncg * sizeof(int32_t);
    547 	base = space = malloc((u_long)size, M_UFSMNT, M_WAITOK);
    548 	for (i = 0; i < blks; i += fs->fs_frag) {
    549 		size = fs->fs_bsize;
    550 		if (i + fs->fs_frag > blks)
    551 			size = (blks - i) * fs->fs_fsize;
    552 		error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
    553 			      cred, &bp);
    554 		if (error) {
    555 			free(base, M_UFSMNT);
    556 			goto out2;
    557 		}
    558 #ifdef FFS_EI
    559 		if (needswap)
    560 			ffs_csum_swap((struct csum*)bp->b_data,
    561 				(struct csum*)space, size);
    562 		else
    563 #endif
    564 			bcopy(bp->b_data, space, (u_int)size);
    565 
    566 		fs->fs_csp[fragstoblks(fs, i)] = (struct csum *)space;
    567 		space += size;
    568 		brelse(bp);
    569 		bp = NULL;
    570 	}
    571 	if (fs->fs_contigsumsize > 0) {
    572 		fs->fs_maxcluster = lp = (int32_t *)space;
    573 		for (i = 0; i < fs->fs_ncg; i++)
    574 			*lp++ = fs->fs_contigsumsize;
    575 	}
    576 	mp->mnt_data = (qaddr_t)ump;
    577 	mp->mnt_stat.f_fsid.val[0] = (long)dev;
    578 	mp->mnt_stat.f_fsid.val[1] = makefstype(MOUNT_FFS);
    579 	mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
    580 	mp->mnt_flag |= MNT_LOCAL;
    581 #ifdef FFS_EI
    582 	if (needswap)
    583 		ump->um_flags |= UFS_NEEDSWAP;
    584 #endif
    585 	ump->um_mountp = mp;
    586 	ump->um_dev = dev;
    587 	ump->um_devvp = devvp;
    588 	ump->um_nindir = fs->fs_nindir;
    589 	ump->um_bptrtodb = fs->fs_fsbtodb;
    590 	ump->um_seqinc = fs->fs_frag;
    591 	for (i = 0; i < MAXQUOTAS; i++)
    592 		ump->um_quotas[i] = NULLVP;
    593 	devvp->v_specflags |= SI_MOUNTEDON;
    594 	ffs_oldfscompat(fs);
    595 	ump->um_savedmaxfilesize = fs->fs_maxfilesize;		/* XXX */
    596 	maxfilesize = (u_int64_t)0x80000000 * fs->fs_bsize - 1;	/* XXX */
    597 	if (fs->fs_maxfilesize > maxfilesize)			/* XXX */
    598 		fs->fs_maxfilesize = maxfilesize;		/* XXX */
    599 	return (0);
    600 out2:
    601 	free(fs, M_UFSMNT);
    602 out:
    603 	if (bp)
    604 		brelse(bp);
    605 	(void)VOP_CLOSE(devvp, ronly ? FREAD : FREAD|FWRITE, cred, p);
    606 	if (ump) {
    607 		free(ump, M_UFSMNT);
    608 		mp->mnt_data = (qaddr_t)0;
    609 	}
    610 	return (error);
    611 }
    612 
    613 /*
    614  * Sanity checks for old file systems.
    615  *
    616  * XXX - goes away some day.
    617  */
    618 int
    619 ffs_oldfscompat(fs)
    620 	struct fs *fs;
    621 {
    622 	int i;
    623 
    624 	fs->fs_npsect = max(fs->fs_npsect, fs->fs_nsect);	/* XXX */
    625 	fs->fs_interleave = max(fs->fs_interleave, 1);		/* XXX */
    626 	if (fs->fs_postblformat == FS_42POSTBLFMT)		/* XXX */
    627 		fs->fs_nrpos = 8;				/* XXX */
    628 	if (fs->fs_inodefmt < FS_44INODEFMT) {			/* XXX */
    629 		u_int64_t sizepb = fs->fs_bsize;		/* XXX */
    630 								/* XXX */
    631 		fs->fs_maxfilesize = fs->fs_bsize * NDADDR - 1;	/* XXX */
    632 		for (i = 0; i < NIADDR; i++) {			/* XXX */
    633 			sizepb *= NINDIR(fs);			/* XXX */
    634 			fs->fs_maxfilesize += sizepb;		/* XXX */
    635 		}						/* XXX */
    636 		fs->fs_qbmask = ~fs->fs_bmask;			/* XXX */
    637 		fs->fs_qfmask = ~fs->fs_fmask;			/* XXX */
    638 	}							/* XXX */
    639 	return (0);
    640 }
    641 
    642 /*
    643  * unmount system call
    644  */
    645 int
    646 ffs_unmount(mp, mntflags, p)
    647 	struct mount *mp;
    648 	int mntflags;
    649 	struct proc *p;
    650 {
    651 	register struct ufsmount *ump;
    652 	register struct fs *fs;
    653 	int error, flags;
    654 
    655 	flags = 0;
    656 	if (mntflags & MNT_FORCE)
    657 		flags |= FORCECLOSE;
    658 	if ((error = ffs_flushfiles(mp, flags, p)) != 0)
    659 		return (error);
    660 	ump = VFSTOUFS(mp);
    661 	fs = ump->um_fs;
    662 	if (fs->fs_ronly == 0 &&
    663 	    ffs_cgupdate(ump, MNT_WAIT) == 0 &&
    664 	    fs->fs_clean & FS_WASCLEAN) {
    665 		fs->fs_clean = FS_ISCLEAN;
    666 		(void) ffs_sbupdate(ump, MNT_WAIT);
    667 	}
    668 	ump->um_devvp->v_specflags &= ~SI_MOUNTEDON;
    669 	error = VOP_CLOSE(ump->um_devvp, fs->fs_ronly ? FREAD : FREAD|FWRITE,
    670 		NOCRED, p);
    671 	vrele(ump->um_devvp);
    672 	free(fs->fs_csp[0], M_UFSMNT);
    673 	free(fs, M_UFSMNT);
    674 	free(ump, M_UFSMNT);
    675 	mp->mnt_data = (qaddr_t)0;
    676 	mp->mnt_flag &= ~MNT_LOCAL;
    677 	return (error);
    678 }
    679 
    680 /*
    681  * Flush out all the files in a filesystem.
    682  */
    683 int
    684 ffs_flushfiles(mp, flags, p)
    685 	register struct mount *mp;
    686 	int flags;
    687 	struct proc *p;
    688 {
    689 	extern int doforce;
    690 	register struct ufsmount *ump;
    691 	int error;
    692 
    693 	if (!doforce)
    694 		flags &= ~FORCECLOSE;
    695 	ump = VFSTOUFS(mp);
    696 #ifdef QUOTA
    697 	if (mp->mnt_flag & MNT_QUOTA) {
    698 		int i;
    699 		if ((error = vflush(mp, NULLVP, SKIPSYSTEM|flags)) != 0)
    700 			return (error);
    701 		for (i = 0; i < MAXQUOTAS; i++) {
    702 			if (ump->um_quotas[i] == NULLVP)
    703 				continue;
    704 			quotaoff(p, mp, i);
    705 		}
    706 		/*
    707 		 * Here we fall through to vflush again to ensure
    708 		 * that we have gotten rid of all the system vnodes.
    709 		 */
    710 	}
    711 #endif
    712 	error = vflush(mp, NULLVP, flags);
    713 	return (error);
    714 }
    715 
    716 /*
    717  * Get file system statistics.
    718  */
    719 int
    720 ffs_statfs(mp, sbp, p)
    721 	struct mount *mp;
    722 	register struct statfs *sbp;
    723 	struct proc *p;
    724 {
    725 	register struct ufsmount *ump;
    726 	register struct fs *fs;
    727 
    728 	ump = VFSTOUFS(mp);
    729 	fs = ump->um_fs;
    730 	if (fs->fs_magic != FS_MAGIC)
    731 		panic("ffs_statfs");
    732 #ifdef COMPAT_09
    733 	sbp->f_type = 1;
    734 #else
    735 	sbp->f_type = 0;
    736 #endif
    737 	sbp->f_bsize = fs->fs_fsize;
    738 	sbp->f_iosize = fs->fs_bsize;
    739 	sbp->f_blocks = fs->fs_dsize;
    740 	sbp->f_bfree = fs->fs_cstotal.cs_nbfree * fs->fs_frag +
    741 		fs->fs_cstotal.cs_nffree;
    742 	sbp->f_bavail = (long) (((u_int64_t) fs->fs_dsize * (u_int64_t)
    743 		(100 - fs->fs_minfree) / (u_int64_t) 100) -
    744 		(u_int64_t) (fs->fs_dsize - sbp->f_bfree));
    745 	sbp->f_files =  fs->fs_ncg * fs->fs_ipg - ROOTINO;
    746 	sbp->f_ffree = fs->fs_cstotal.cs_nifree;
    747 	if (sbp != &mp->mnt_stat) {
    748 		bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
    749 		bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
    750 	}
    751 	strncpy(sbp->f_fstypename, mp->mnt_op->vfs_name, MFSNAMELEN);
    752 	return (0);
    753 }
    754 
    755 /*
    756  * Go through the disk queues to initiate sandbagged IO;
    757  * go through the inodes to write those that have been modified;
    758  * initiate the writing of the super block if it has been modified.
    759  *
    760  * Note: we are always called with the filesystem marked `MPBUSY'.
    761  */
    762 int
    763 ffs_sync(mp, waitfor, cred, p)
    764 	struct mount *mp;
    765 	int waitfor;
    766 	struct ucred *cred;
    767 	struct proc *p;
    768 {
    769 	struct vnode *vp, *nvp;
    770 	struct inode *ip;
    771 	struct ufsmount *ump = VFSTOUFS(mp);
    772 	struct fs *fs;
    773 	int error, allerror = 0;
    774 
    775 	fs = ump->um_fs;
    776 	if (fs->fs_fmod != 0 && fs->fs_ronly != 0) {		/* XXX */
    777 		printf("fs = %s\n", fs->fs_fsmnt);
    778 		panic("update: rofs mod");
    779 	}
    780 	/*
    781 	 * Write back each (modified) inode.
    782 	 */
    783 	simple_lock(&mntvnode_slock);
    784 loop:
    785 	for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) {
    786 		/*
    787 		 * If the vnode that we are about to sync is no longer
    788 		 * associated with this mount point, start over.
    789 		 */
    790 		if (vp->v_mount != mp)
    791 			goto loop;
    792 		simple_lock(&vp->v_interlock);
    793 		nvp = vp->v_mntvnodes.le_next;
    794 		ip = VTOI(vp);
    795 		if ((ip->i_flag &
    796 		    (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 &&
    797 		    vp->v_dirtyblkhd.lh_first == NULL) {
    798 			simple_unlock(&vp->v_interlock);
    799 			continue;
    800 		}
    801 		simple_unlock(&mntvnode_slock);
    802 		error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK);
    803 		if (error) {
    804 			simple_lock(&mntvnode_slock);
    805 			if (error == ENOENT)
    806 				goto loop;
    807 			continue;
    808 		}
    809 		if ((error = VOP_FSYNC(vp, cred,
    810 		    waitfor == MNT_WAIT ? FSYNC_WAIT : 0, p)) != 0)
    811 			allerror = error;
    812 		vput(vp);
    813 		simple_lock(&mntvnode_slock);
    814 	}
    815 	simple_unlock(&mntvnode_slock);
    816 	/*
    817 	 * Force stale file system control information to be flushed.
    818 	 */
    819 	if ((error = VOP_FSYNC(ump->um_devvp, cred,
    820 	    waitfor == MNT_WAIT ? FSYNC_WAIT : 0, p)) != 0)
    821 		allerror = error;
    822 #ifdef QUOTA
    823 	qsync(mp);
    824 #endif
    825 	/*
    826 	 * Write back modified superblock.
    827 	 */
    828 	if (fs->fs_fmod != 0) {
    829 		fs->fs_fmod = 0;
    830 		fs->fs_time = time.tv_sec;
    831 		allerror = ffs_cgupdate(ump, waitfor);
    832 	}
    833 	return (allerror);
    834 }
    835 
    836 /*
    837  * Look up a FFS dinode number to find its incore vnode, otherwise read it
    838  * in from disk.  If it is in core, wait for the lock bit to clear, then
    839  * return the inode locked.  Detection and handling of mount points must be
    840  * done by the calling routine.
    841  */
    842 int
    843 ffs_vget(mp, ino, vpp)
    844 	struct mount *mp;
    845 	ino_t ino;
    846 	struct vnode **vpp;
    847 {
    848 	struct fs *fs;
    849 	struct inode *ip;
    850 	struct ufsmount *ump;
    851 	struct buf *bp;
    852 	struct vnode *vp;
    853 	dev_t dev;
    854 	int type, error;
    855 
    856 	ump = VFSTOUFS(mp);
    857 	dev = ump->um_dev;
    858 	do {
    859 		if ((*vpp = ufs_ihashget(dev, ino)) != NULL)
    860 			return (0);
    861 	} while (lockmgr(&ufs_hashlock, LK_EXCLUSIVE|LK_SLEEPFAIL, 0));
    862 
    863 	/* Allocate a new vnode/inode. */
    864 	if ((error = getnewvnode(VT_UFS, mp, ffs_vnodeop_p, &vp)) != 0) {
    865 		*vpp = NULL;
    866 		lockmgr(&ufs_hashlock, LK_RELEASE, 0);
    867 		return (error);
    868 	}
    869 	type = ump->um_devvp->v_tag == VT_MFS ? M_MFSNODE : M_FFSNODE; /* XXX */
    870 	MALLOC(ip, struct inode *, sizeof(struct inode), type, M_WAITOK);
    871 	bzero((caddr_t)ip, sizeof(struct inode));
    872 	lockinit(&ip->i_lock, PINOD, "inode", 0, 0);
    873 	vp->v_data = ip;
    874 	ip->i_vnode = vp;
    875 	ip->i_fs = fs = ump->um_fs;
    876 	ip->i_dev = dev;
    877 	ip->i_number = ino;
    878 #ifdef QUOTA
    879 	{
    880 		int i;
    881 
    882 		for (i = 0; i < MAXQUOTAS; i++)
    883 			ip->i_dquot[i] = NODQUOT;
    884 	}
    885 #endif
    886 	/*
    887 	 * Put it onto its hash chain and lock it so that other requests for
    888 	 * this inode will block if they arrive while we are sleeping waiting
    889 	 * for old data structures to be purged or for the contents of the
    890 	 * disk portion of this inode to be read.
    891 	 */
    892 	ufs_ihashins(ip);
    893 	lockmgr(&ufs_hashlock, LK_RELEASE, 0);
    894 
    895 	/* Read in the disk contents for the inode, copy into the inode. */
    896 	error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)),
    897 		      (int)fs->fs_bsize, NOCRED, &bp);
    898 	if (error) {
    899 		/*
    900 		 * The inode does not contain anything useful, so it would
    901 		 * be misleading to leave it on its hash chain. With mode
    902 		 * still zero, it will be unlinked and returned to the free
    903 		 * list by vput().
    904 		 */
    905 		vput(vp);
    906 		brelse(bp);
    907 		*vpp = NULL;
    908 		return (error);
    909 	}
    910 #ifdef FFS_EI
    911 	if (UFS_MPNEEDSWAP(mp))
    912 		ffs_dinode_swap((struct dinode *)bp->b_data + ino_to_fsbo(fs, ino),
    913 			&(ip->i_din.ffs_din));
    914 	else
    915 #endif
    916 		ip->i_din.ffs_din =
    917 			*((struct dinode *)bp->b_data + ino_to_fsbo(fs, ino));
    918 	brelse(bp);
    919 
    920 	/*
    921 	 * Initialize the vnode from the inode, check for aliases.
    922 	 * Note that the underlying vnode may have changed.
    923 	 */
    924 	error = ufs_vinit(mp, ffs_specop_p, FFS_FIFOOPS, &vp);
    925 	if (error) {
    926 		vput(vp);
    927 		*vpp = NULL;
    928 		return (error);
    929 	}
    930 	/*
    931 	 * Finish inode initialization now that aliasing has been resolved.
    932 	 */
    933 	ip->i_devvp = ump->um_devvp;
    934 	VREF(ip->i_devvp);
    935 	/*
    936 	 * Ensure that uid and gid are correct. This is a temporary
    937 	 * fix until fsck has been changed to do the update.
    938 	 */
    939 	if (fs->fs_inodefmt < FS_44INODEFMT) {		/* XXX */
    940 		ip->i_ffs_uid = ip->i_din.ffs_din.di_ouid;		/* XXX */
    941 		ip->i_ffs_gid = ip->i_din.ffs_din.di_ogid;		/* XXX */
    942 	}						/* XXX */
    943 
    944 	*vpp = vp;
    945 	return (0);
    946 }
    947 
    948 /*
    949  * File handle to vnode
    950  *
    951  * Have to be really careful about stale file handles:
    952  * - check that the inode number is valid
    953  * - call ffs_vget() to get the locked inode
    954  * - check for an unallocated inode (i_mode == 0)
    955  * - check that the given client host has export rights and return
    956  *   those rights via. exflagsp and credanonp
    957  */
    958 int
    959 ffs_fhtovp(mp, fhp, nam, vpp, exflagsp, credanonp)
    960 	register struct mount *mp;
    961 	struct fid *fhp;
    962 	struct mbuf *nam;
    963 	struct vnode **vpp;
    964 	int *exflagsp;
    965 	struct ucred **credanonp;
    966 {
    967 	register struct ufid *ufhp;
    968 	struct fs *fs;
    969 
    970 	ufhp = (struct ufid *)fhp;
    971 	fs = VFSTOUFS(mp)->um_fs;
    972 	if (ufhp->ufid_ino < ROOTINO ||
    973 	    ufhp->ufid_ino >= fs->fs_ncg * fs->fs_ipg)
    974 		return (ESTALE);
    975 	return (ufs_check_export(mp, ufhp, nam, vpp, exflagsp, credanonp));
    976 }
    977 
    978 /*
    979  * Vnode pointer to File handle
    980  */
    981 /* ARGSUSED */
    982 int
    983 ffs_vptofh(vp, fhp)
    984 	struct vnode *vp;
    985 	struct fid *fhp;
    986 {
    987 	register struct inode *ip;
    988 	register struct ufid *ufhp;
    989 
    990 	ip = VTOI(vp);
    991 	ufhp = (struct ufid *)fhp;
    992 	ufhp->ufid_len = sizeof(struct ufid);
    993 	ufhp->ufid_ino = ip->i_number;
    994 	ufhp->ufid_gen = ip->i_ffs_gen;
    995 	return (0);
    996 }
    997 
    998 void
    999 ffs_init()
   1000 {
   1001 	ufs_init();
   1002 }
   1003 
   1004 int
   1005 ffs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p)
   1006 	int *name;
   1007 	u_int namelen;
   1008 	void *oldp;
   1009 	size_t *oldlenp;
   1010 	void *newp;
   1011 	size_t newlen;
   1012 	struct proc *p;
   1013 {
   1014 	extern int doclusterread, doclusterwrite, doreallocblks, doasyncfree;
   1015 
   1016 	/* all sysctl names at this level are terminal */
   1017 	if (namelen != 1)
   1018 		return (ENOTDIR);		/* overloaded */
   1019 
   1020 	switch (name[0]) {
   1021 	case FFS_CLUSTERREAD:
   1022 		return (sysctl_int(oldp, oldlenp, newp, newlen,
   1023 		    &doclusterread));
   1024 	case FFS_CLUSTERWRITE:
   1025 		return (sysctl_int(oldp, oldlenp, newp, newlen,
   1026 		    &doclusterwrite));
   1027 	case FFS_REALLOCBLKS:
   1028 		return (sysctl_int(oldp, oldlenp, newp, newlen,
   1029 		    &doreallocblks));
   1030 	case FFS_ASYNCFREE:
   1031 		return (sysctl_int(oldp, oldlenp, newp, newlen, &doasyncfree));
   1032 	default:
   1033 		return (EOPNOTSUPP);
   1034 	}
   1035 	/* NOTREACHED */
   1036 }
   1037 
   1038 /*
   1039  * Write a superblock and associated information back to disk.
   1040  */
   1041 int
   1042 ffs_sbupdate(mp, waitfor)
   1043 	struct ufsmount *mp;
   1044 	int waitfor;
   1045 {
   1046 	register struct fs *fs = mp->um_fs;
   1047 	register struct buf *bp;
   1048 	int i, error = 0;
   1049 	int32_t saved_nrpos = fs->fs_nrpos;
   1050 	int64_t saved_qbmask = fs->fs_qbmask;
   1051 	int64_t saved_qfmask = fs->fs_qfmask;
   1052 	u_int64_t saved_maxfilesize = fs->fs_maxfilesize;
   1053 
   1054 	/* Restore compatibility to old file systems.		   XXX */
   1055 	if (fs->fs_postblformat == FS_42POSTBLFMT)		/* XXX */
   1056 		fs->fs_nrpos = -1;		/* XXX */
   1057 	if (fs->fs_inodefmt < FS_44INODEFMT) {			/* XXX */
   1058 		int32_t *lp, tmp;				/* XXX */
   1059 								/* XXX */
   1060 		lp = (int32_t *)&fs->fs_qbmask;		/* XXX nuke qfmask too */
   1061 		tmp = lp[4];					/* XXX */
   1062 		for (i = 4; i > 0; i--)				/* XXX */
   1063 			lp[i] = lp[i-1];			/* XXX */
   1064 		lp[0] = tmp;					/* XXX */
   1065 	}							/* XXX */
   1066 	fs->fs_maxfilesize = mp->um_savedmaxfilesize;	/* XXX */
   1067 
   1068 	bp = getblk(mp->um_devvp, SBOFF >> (fs->fs_fshift - fs->fs_fsbtodb),
   1069 	    (int)fs->fs_sbsize, 0, 0);
   1070 	bcopy(fs, bp->b_data, fs->fs_sbsize);
   1071 #ifdef FFS_EI
   1072 	if (mp->um_flags & UFS_NEEDSWAP)
   1073 		ffs_sb_swap(fs, (struct fs*)bp->b_data, 1);
   1074 #endif
   1075 
   1076 	fs->fs_nrpos = saved_nrpos; /* XXX */
   1077 	fs->fs_qbmask = saved_qbmask; /* XXX */
   1078 	fs->fs_qfmask = saved_qfmask; /* XXX */
   1079 	fs->fs_maxfilesize = saved_maxfilesize; /* XXX */
   1080 
   1081 	if (waitfor == MNT_WAIT)
   1082 		error = bwrite(bp);
   1083 	else
   1084 		bawrite(bp);
   1085 	return (error);
   1086 }
   1087 
   1088 int
   1089 ffs_cgupdate(mp, waitfor)
   1090 	struct ufsmount *mp;
   1091 	int waitfor;
   1092 {
   1093 	register struct fs *fs = mp->um_fs;
   1094 	register struct buf *bp;
   1095 	int blks;
   1096 	caddr_t space;
   1097 	int i, size, error = 0, allerror = 0;
   1098 
   1099 	allerror = ffs_sbupdate(mp, waitfor);
   1100 	blks = howmany(fs->fs_cssize, fs->fs_fsize);
   1101 	space = (caddr_t)fs->fs_csp[0];
   1102 	for (i = 0; i < blks; i += fs->fs_frag) {
   1103 		size = fs->fs_bsize;
   1104 		if (i + fs->fs_frag > blks)
   1105 			size = (blks - i) * fs->fs_fsize;
   1106 		bp = getblk(mp->um_devvp, fsbtodb(fs, fs->fs_csaddr + i),
   1107 		    size, 0, 0);
   1108 #ifdef FFS_EI
   1109 		if (mp->um_flags & UFS_NEEDSWAP)
   1110 			ffs_csum_swap((struct csum*)space,
   1111 				(struct csum*)bp->b_data, size);
   1112 		else
   1113 #endif
   1114 			bcopy(space, bp->b_data, (u_int)size);
   1115 		space += size;
   1116 		if (waitfor == MNT_WAIT)
   1117 			error = bwrite(bp);
   1118 		else
   1119 			bawrite(bp);
   1120 	}
   1121 	if (!allerror && error)
   1122 		allerror = error;
   1123 	return (allerror);
   1124 }
   1125