Home | History | Annotate | Line # | Download | only in dev
vnd.c revision 1.36
      1  1.35        pk /*	$NetBSD: vnd.c,v 1.36 1997/05/25 19:37:36 pk Exp $	*/
      2  1.11       cgd 
      3   1.1    brezak /*
      4   1.1    brezak  * Copyright (c) 1988 University of Utah.
      5   1.1    brezak  * Copyright (c) 1990, 1993
      6   1.1    brezak  *	The Regents of the University of California.  All rights reserved.
      7   1.1    brezak  *
      8   1.1    brezak  * This code is derived from software contributed to Berkeley by
      9   1.1    brezak  * the Systems Programming Group of the University of Utah Computer
     10   1.1    brezak  * Science Department.
     11   1.1    brezak  *
     12   1.1    brezak  * Redistribution and use in source and binary forms, with or without
     13   1.1    brezak  * modification, are permitted provided that the following conditions
     14   1.1    brezak  * are met:
     15   1.1    brezak  * 1. Redistributions of source code must retain the above copyright
     16   1.1    brezak  *    notice, this list of conditions and the following disclaimer.
     17   1.1    brezak  * 2. Redistributions in binary form must reproduce the above copyright
     18   1.1    brezak  *    notice, this list of conditions and the following disclaimer in the
     19   1.1    brezak  *    documentation and/or other materials provided with the distribution.
     20   1.1    brezak  * 3. All advertising materials mentioning features or use of this software
     21   1.1    brezak  *    must display the following acknowledgement:
     22   1.1    brezak  *	This product includes software developed by the University of
     23   1.1    brezak  *	California, Berkeley and its contributors.
     24   1.1    brezak  * 4. Neither the name of the University nor the names of its contributors
     25   1.1    brezak  *    may be used to endorse or promote products derived from this software
     26   1.1    brezak  *    without specific prior written permission.
     27   1.1    brezak  *
     28   1.1    brezak  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     29   1.1    brezak  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     30   1.1    brezak  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     31   1.1    brezak  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     32   1.1    brezak  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     33   1.1    brezak  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     34   1.1    brezak  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     35   1.1    brezak  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     36   1.1    brezak  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     37   1.1    brezak  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     38   1.1    brezak  * SUCH DAMAGE.
     39   1.1    brezak  *
     40   1.5       cgd  * from: Utah $Hdr: vn.c 1.13 94/04/02$
     41   1.1    brezak  *
     42   1.5       cgd  *	@(#)vn.c	8.6 (Berkeley) 4/1/94
     43   1.1    brezak  */
     44   1.1    brezak 
     45   1.1    brezak /*
     46   1.1    brezak  * Vnode disk driver.
     47   1.1    brezak  *
     48   1.1    brezak  * Block/character interface to a vnode.  Allows one to treat a file
     49   1.1    brezak  * as a disk (e.g. build a filesystem in it, mount it, etc.).
     50   1.1    brezak  *
     51   1.1    brezak  * NOTE 1: This uses the VOP_BMAP/VOP_STRATEGY interface to the vnode
     52   1.1    brezak  * instead of a simple VOP_RDWR.  We do this to avoid distorting the
     53   1.1    brezak  * local buffer cache.
     54   1.1    brezak  *
     55   1.1    brezak  * NOTE 2: There is a security issue involved with this driver.
     56   1.1    brezak  * Once mounted all access to the contents of the "mapped" file via
     57   1.1    brezak  * the special file is controlled by the permissions on the special
     58   1.1    brezak  * file, the protection of the mapped file is ignored (effectively,
     59   1.1    brezak  * by using root credentials in all transactions).
     60   1.1    brezak  *
     61   1.1    brezak  * NOTE 3: Doesn't interact with leases, should it?
     62   1.1    brezak  */
     63   1.1    brezak 
     64   1.1    brezak #include <sys/param.h>
     65   1.1    brezak #include <sys/systm.h>
     66   1.1    brezak #include <sys/namei.h>
     67   1.1    brezak #include <sys/proc.h>
     68   1.1    brezak #include <sys/errno.h>
     69   1.1    brezak #include <sys/buf.h>
     70   1.1    brezak #include <sys/malloc.h>
     71   1.1    brezak #include <sys/ioctl.h>
     72  1.16       cgd #include <sys/disklabel.h>
     73  1.22   thorpej #include <sys/device.h>
     74  1.22   thorpej #include <sys/disk.h>
     75  1.22   thorpej #include <sys/stat.h>
     76   1.1    brezak #include <sys/mount.h>
     77   1.1    brezak #include <sys/vnode.h>
     78   1.1    brezak #include <sys/file.h>
     79   1.1    brezak #include <sys/uio.h>
     80  1.26  christos #include <sys/conf.h>
     81   1.1    brezak 
     82   1.1    brezak #include <miscfs/specfs/specdev.h>
     83   1.1    brezak 
     84  1.17       cgd #include <dev/vndioctl.h>
     85   1.1    brezak 
     86   1.1    brezak #ifdef DEBUG
     87  1.17       cgd int dovndcluster = 1;
     88  1.17       cgd int vnddebug = 0x00;
     89   1.1    brezak #define VDB_FOLLOW	0x01
     90   1.1    brezak #define VDB_INIT	0x02
     91   1.1    brezak #define VDB_IO		0x04
     92   1.1    brezak #endif
     93   1.1    brezak 
     94   1.1    brezak #define b_cylin	b_resid
     95   1.1    brezak 
     96  1.18       cgd #define	vndunit(x)	DISKUNIT(x)
     97  1.18       cgd 
     98  1.36        pk struct vndxfer {
     99  1.36        pk 	struct buf	*vx_bp;		/* Pointer to parent buffer */
    100  1.36        pk 	int		vx_error;
    101  1.36        pk 	int		vx_pending;	/* # of pending aux buffers */
    102  1.36        pk };
    103  1.36        pk 
    104  1.18       cgd struct vndbuf {
    105  1.18       cgd 	struct buf	vb_buf;
    106  1.36        pk 	struct vndxfer	*vb_xfer;
    107  1.18       cgd };
    108   1.1    brezak 
    109  1.36        pk #define	getvndxfer()	\
    110  1.36        pk 	((struct vndxfer *)malloc(sizeof(struct vndxfer), M_DEVBUF, M_WAITOK))
    111  1.36        pk #define putvndxfer(vnx)	\
    112  1.36        pk 	free((caddr_t)(vnx), M_DEVBUF)
    113  1.17       cgd #define	getvndbuf()	\
    114  1.18       cgd 	((struct vndbuf *)malloc(sizeof(struct vndbuf), M_DEVBUF, M_WAITOK))
    115  1.18       cgd #define putvndbuf(vbp)	\
    116  1.18       cgd 	free((caddr_t)(vbp), M_DEVBUF)
    117   1.1    brezak 
    118  1.17       cgd struct vnd_softc {
    119   1.1    brezak 	int		 sc_flags;	/* flags */
    120  1.17       cgd 	size_t		 sc_size;	/* size of vnd */
    121   1.1    brezak 	struct vnode	*sc_vp;		/* vnode */
    122   1.1    brezak 	struct ucred	*sc_cred;	/* credentials */
    123   1.1    brezak 	int		 sc_maxactive;	/* max # of active requests */
    124   1.1    brezak 	struct buf	 sc_tab;	/* transfer queue */
    125  1.23   thorpej 	char		 sc_xname[8];	/* XXX external name */
    126  1.23   thorpej 	struct disk	 sc_dkdev;	/* generic disk device info */
    127   1.1    brezak };
    128   1.1    brezak 
    129   1.1    brezak /* sc_flags */
    130   1.1    brezak #define	VNF_ALIVE	0x01
    131   1.1    brezak #define VNF_INITED	0x02
    132  1.22   thorpej #define VNF_WANTED	0x40
    133  1.22   thorpej #define VNF_LOCKED	0x80
    134   1.1    brezak 
    135  1.17       cgd struct vnd_softc *vnd_softc;
    136  1.22   thorpej int numvnd = 0;
    137  1.22   thorpej 
    138  1.22   thorpej /* called by main() at boot time */
    139  1.22   thorpej void	vndattach __P((int));
    140   1.1    brezak 
    141  1.17       cgd void	vndclear __P((struct vnd_softc *));
    142  1.17       cgd void	vndstart __P((struct vnd_softc *));
    143  1.17       cgd int	vndsetcred __P((struct vnd_softc *, struct ucred *));
    144  1.17       cgd void	vndthrottle __P((struct vnd_softc *, struct vnode *));
    145  1.24  christos void	vndiodone __P((struct buf *));
    146  1.24  christos void	vndshutdown __P((void));
    147  1.16       cgd 
    148  1.22   thorpej static	int vndlock __P((struct vnd_softc *));
    149  1.22   thorpej static	void vndunlock __P((struct vnd_softc *));
    150  1.22   thorpej 
    151   1.1    brezak void
    152  1.17       cgd vndattach(num)
    153   1.1    brezak 	int num;
    154   1.1    brezak {
    155   1.1    brezak 	char *mem;
    156   1.1    brezak 	register u_long size;
    157   1.1    brezak 
    158   1.1    brezak 	if (num <= 0)
    159   1.1    brezak 		return;
    160  1.17       cgd 	size = num * sizeof(struct vnd_softc);
    161   1.1    brezak 	mem = malloc(size, M_DEVBUF, M_NOWAIT);
    162   1.1    brezak 	if (mem == NULL) {
    163  1.30  christos 		printf("WARNING: no memory for vnode disks\n");
    164   1.1    brezak 		return;
    165   1.1    brezak 	}
    166   1.1    brezak 	bzero(mem, size);
    167  1.17       cgd 	vnd_softc = (struct vnd_softc *)mem;
    168   1.1    brezak 	numvnd = num;
    169   1.1    brezak }
    170   1.1    brezak 
    171   1.1    brezak int
    172  1.17       cgd vndopen(dev, flags, mode, p)
    173   1.1    brezak 	dev_t dev;
    174   1.1    brezak 	int flags, mode;
    175   1.1    brezak 	struct proc *p;
    176   1.1    brezak {
    177  1.17       cgd 	int unit = vndunit(dev);
    178  1.22   thorpej 	struct vnd_softc *sc;
    179  1.22   thorpej 	int error = 0, part, pmask;
    180  1.22   thorpej 
    181  1.22   thorpej 	/*
    182  1.22   thorpej 	 * XXX Should support disklabels.
    183  1.22   thorpej 	 */
    184   1.1    brezak 
    185   1.1    brezak #ifdef DEBUG
    186  1.17       cgd 	if (vnddebug & VDB_FOLLOW)
    187  1.30  christos 		printf("vndopen(%x, %x, %x, %p)\n", dev, flags, mode, p);
    188   1.1    brezak #endif
    189   1.1    brezak 	if (unit >= numvnd)
    190  1.22   thorpej 		return (ENXIO);
    191  1.22   thorpej 	sc = &vnd_softc[unit];
    192  1.22   thorpej 
    193  1.24  christos 	if ((error = vndlock(sc)) != 0)
    194  1.22   thorpej 		return (error);
    195  1.22   thorpej 
    196  1.22   thorpej 	part = DISKPART(dev);
    197  1.22   thorpej 	pmask = (1 << part);
    198  1.22   thorpej 
    199  1.22   thorpej 	/* Prevent our unit from being unconfigured while open. */
    200  1.22   thorpej 	switch (mode) {
    201  1.22   thorpej 	case S_IFCHR:
    202  1.22   thorpej 		sc->sc_dkdev.dk_copenmask |= pmask;
    203  1.22   thorpej 		break;
    204  1.22   thorpej 
    205  1.22   thorpej 	case S_IFBLK:
    206  1.22   thorpej 		sc->sc_dkdev.dk_bopenmask |= pmask;
    207  1.22   thorpej 		break;
    208  1.22   thorpej 	}
    209  1.22   thorpej 	sc->sc_dkdev.dk_openmask =
    210  1.22   thorpej 	    sc->sc_dkdev.dk_copenmask | sc->sc_dkdev.dk_bopenmask;
    211  1.22   thorpej 
    212  1.22   thorpej 	vndunlock(sc);
    213  1.22   thorpej 	return (0);
    214   1.1    brezak }
    215   1.1    brezak 
    216   1.4   deraadt int
    217  1.17       cgd vndclose(dev, flags, mode, p)
    218   1.4   deraadt 	dev_t dev;
    219   1.4   deraadt 	int flags, mode;
    220   1.4   deraadt 	struct proc *p;
    221   1.4   deraadt {
    222  1.22   thorpej 	int unit = vndunit(dev);
    223  1.22   thorpej 	struct vnd_softc *sc;
    224  1.22   thorpej 	int error = 0, part;
    225  1.22   thorpej 
    226   1.4   deraadt #ifdef DEBUG
    227  1.17       cgd 	if (vnddebug & VDB_FOLLOW)
    228  1.30  christos 		printf("vndclose(%x, %x, %x, %p)\n", dev, flags, mode, p);
    229   1.4   deraadt #endif
    230  1.22   thorpej 
    231  1.22   thorpej 	if (unit >= numvnd)
    232  1.22   thorpej 		return (ENXIO);
    233  1.22   thorpej 	sc = &vnd_softc[unit];
    234  1.22   thorpej 
    235  1.24  christos 	if ((error = vndlock(sc)) != 0)
    236  1.22   thorpej 		return (error);
    237  1.22   thorpej 
    238  1.22   thorpej 	part = DISKPART(dev);
    239  1.22   thorpej 
    240  1.22   thorpej 	/* ...that much closer to allowing unconfiguration... */
    241  1.22   thorpej 	switch (mode) {
    242  1.22   thorpej 	case S_IFCHR:
    243  1.22   thorpej 		sc->sc_dkdev.dk_copenmask &= ~(1 << part);
    244  1.22   thorpej 		break;
    245  1.22   thorpej 
    246  1.22   thorpej 	case S_IFBLK:
    247  1.22   thorpej 		sc->sc_dkdev.dk_bopenmask &= ~(1 << part);
    248  1.22   thorpej 		break;
    249  1.22   thorpej 	}
    250  1.22   thorpej 	sc->sc_dkdev.dk_openmask =
    251  1.22   thorpej 	    sc->sc_dkdev.dk_copenmask | sc->sc_dkdev.dk_bopenmask;
    252  1.22   thorpej 
    253  1.22   thorpej 	vndunlock(sc);
    254  1.22   thorpej 	return (0);
    255   1.4   deraadt }
    256   1.4   deraadt 
    257   1.1    brezak /*
    258   1.1    brezak  * Break the request into bsize pieces and submit using VOP_BMAP/VOP_STRATEGY.
    259   1.1    brezak  * Note that this driver can only be used for swapping over NFS on the hp
    260   1.1    brezak  * since nfs_strategy on the vax cannot handle u-areas and page tables.
    261   1.1    brezak  */
    262   1.6   mycroft void
    263  1.17       cgd vndstrategy(bp)
    264   1.1    brezak 	register struct buf *bp;
    265   1.1    brezak {
    266  1.17       cgd 	int unit = vndunit(bp->b_dev);
    267  1.17       cgd 	register struct vnd_softc *vnd = &vnd_softc[unit];
    268  1.18       cgd 	register struct vndbuf *nbp;
    269  1.36        pk 	struct vndxfer *vnx;
    270   1.1    brezak 	register int bn, bsize, resid;
    271   1.1    brezak 	register caddr_t addr;
    272   1.5       cgd 	int sz, flags, error;
    273   1.1    brezak 
    274   1.1    brezak #ifdef DEBUG
    275  1.17       cgd 	if (vnddebug & VDB_FOLLOW)
    276  1.30  christos 		printf("vndstrategy(%p): unit %d\n", bp, unit);
    277   1.1    brezak #endif
    278  1.17       cgd 	if ((vnd->sc_flags & VNF_INITED) == 0) {
    279   1.1    brezak 		bp->b_error = ENXIO;
    280   1.1    brezak 		bp->b_flags |= B_ERROR;
    281   1.1    brezak 		biodone(bp);
    282   1.1    brezak 		return;
    283   1.1    brezak 	}
    284   1.1    brezak 	bn = bp->b_blkno;
    285   1.1    brezak 	sz = howmany(bp->b_bcount, DEV_BSIZE);
    286   1.1    brezak 	bp->b_resid = bp->b_bcount;
    287  1.17       cgd 	if (bn < 0 || bn + sz > vnd->sc_size) {
    288  1.17       cgd 		if (bn != vnd->sc_size) {
    289   1.1    brezak 			bp->b_error = EINVAL;
    290   1.1    brezak 			bp->b_flags |= B_ERROR;
    291   1.1    brezak 		}
    292   1.1    brezak 		biodone(bp);
    293   1.1    brezak 		return;
    294   1.1    brezak 	}
    295   1.1    brezak 	bn = dbtob(bn);
    296  1.17       cgd  	bsize = vnd->sc_vp->v_mount->mnt_stat.f_iosize;
    297   1.5       cgd 	addr = bp->b_data;
    298   1.1    brezak 	flags = bp->b_flags | B_CALL;
    299  1.36        pk 
    300  1.36        pk 	/* Allocate a header for this transfer and link it to the buffer */
    301  1.36        pk 	vnx = getvndxfer();
    302  1.36        pk 	vnx->vx_error = 0;
    303  1.36        pk 	vnx->vx_pending = 0;
    304  1.36        pk 	vnx->vx_bp = bp;
    305  1.36        pk 
    306   1.1    brezak 	for (resid = bp->b_resid; resid; resid -= sz) {
    307   1.1    brezak 		struct vnode *vp;
    308   1.1    brezak 		daddr_t nbn;
    309   1.5       cgd 		int off, s, nra;
    310   1.1    brezak 
    311   1.5       cgd 		nra = 0;
    312  1.21   mycroft 		VOP_LOCK(vnd->sc_vp);
    313  1.17       cgd 		error = VOP_BMAP(vnd->sc_vp, bn / bsize, &vp, &nbn, &nra);
    314  1.21   mycroft 		VOP_UNLOCK(vnd->sc_vp);
    315   1.5       cgd 		if (error == 0 && (long)nbn == -1)
    316   1.5       cgd 			error = EIO;
    317  1.36        pk 
    318  1.36        pk 		/*
    319  1.36        pk 		 * If there was an error or a hole in the file...punt.
    320  1.36        pk 		 * Note that we may have to wait for any operations
    321  1.36        pk 		 * that we have already fired off before releasing
    322  1.36        pk 		 * the buffer.
    323  1.36        pk 		 *
    324  1.36        pk 		 * XXX we could deal with holes here but it would be
    325  1.36        pk 		 * a hassle (in the write case).
    326  1.36        pk 		 */
    327  1.36        pk 		if (error) {
    328  1.36        pk 			vnx->vx_error = error;
    329  1.36        pk 			if (vnx->vx_pending == 0) {
    330  1.36        pk 				bp->b_error = error;
    331  1.36        pk 				bp->b_flags |= B_ERROR;
    332  1.36        pk 				putvndxfer(vnx);
    333  1.36        pk 				biodone(bp);
    334  1.36        pk 			}
    335  1.36        pk 			return;
    336  1.36        pk 		}
    337  1.36        pk 
    338   1.5       cgd #ifdef DEBUG
    339  1.17       cgd 		if (!dovndcluster)
    340   1.5       cgd 			nra = 0;
    341   1.1    brezak #endif
    342   1.5       cgd 
    343  1.24  christos 		if ((off = bn % bsize) != 0)
    344   1.5       cgd 			sz = bsize - off;
    345   1.5       cgd 		else
    346   1.5       cgd 			sz = (1 + nra) * bsize;
    347   1.5       cgd 		if (resid < sz)
    348   1.5       cgd 			sz = resid;
    349   1.1    brezak #ifdef DEBUG
    350  1.17       cgd 		if (vnddebug & VDB_IO)
    351  1.30  christos 			printf("vndstrategy: vp %p/%p bn %x/%x sz %x\n",
    352  1.29  christos 			    vnd->sc_vp, vp, bn, nbn, sz);
    353   1.1    brezak #endif
    354   1.5       cgd 
    355  1.17       cgd 		nbp = getvndbuf();
    356  1.18       cgd 		nbp->vb_buf.b_flags = flags;
    357  1.18       cgd 		nbp->vb_buf.b_bcount = sz;
    358  1.18       cgd 		nbp->vb_buf.b_bufsize = bp->b_bufsize;
    359  1.18       cgd 		nbp->vb_buf.b_error = 0;
    360   1.1    brezak 		if (vp->v_type == VBLK || vp->v_type == VCHR)
    361  1.18       cgd 			nbp->vb_buf.b_dev = vp->v_rdev;
    362   1.1    brezak 		else
    363  1.18       cgd 			nbp->vb_buf.b_dev = NODEV;
    364  1.18       cgd 		nbp->vb_buf.b_data = addr;
    365  1.18       cgd 		nbp->vb_buf.b_blkno = nbn + btodb(off);
    366  1.18       cgd 		nbp->vb_buf.b_proc = bp->b_proc;
    367  1.18       cgd 		nbp->vb_buf.b_iodone = vndiodone;
    368  1.18       cgd 		nbp->vb_buf.b_vp = vp;
    369  1.18       cgd 		nbp->vb_buf.b_rcred = vnd->sc_cred;	/* XXX crdup? */
    370  1.18       cgd 		nbp->vb_buf.b_wcred = vnd->sc_cred;	/* XXX crdup? */
    371  1.33        pk 		if (bp->b_dirtyend == 0) {
    372  1.33        pk 			nbp->vb_buf.b_dirtyoff = 0;
    373  1.33        pk 			nbp->vb_buf.b_dirtyend = sz;
    374  1.33        pk 		} else {
    375  1.33        pk 			nbp->vb_buf.b_dirtyoff =
    376  1.33        pk 			    max(0, bp->b_dirtyoff - (bp->b_bcount - resid));
    377  1.33        pk 			nbp->vb_buf.b_dirtyend =
    378  1.34        pk 			    min(sz,
    379  1.34        pk 				max(0, bp->b_dirtyend - (bp->b_bcount-resid)));
    380  1.33        pk 		}
    381  1.33        pk 		if (bp->b_validend == 0) {
    382  1.33        pk 			nbp->vb_buf.b_validoff = 0;
    383  1.33        pk 			nbp->vb_buf.b_validend = sz;
    384  1.33        pk 		} else {
    385  1.33        pk 			nbp->vb_buf.b_validoff =
    386  1.33        pk 			    max(0, bp->b_validoff - (bp->b_bcount - resid));
    387  1.33        pk 			nbp->vb_buf.b_validend =
    388  1.34        pk 			    min(sz,
    389  1.34        pk 				max(0, bp->b_validend - (bp->b_bcount-resid)));
    390  1.33        pk 		}
    391  1.18       cgd 
    392  1.36        pk 		nbp->vb_xfer = vnx;
    393  1.36        pk 		vnx->vx_pending++;
    394  1.18       cgd 
    395   1.1    brezak 		/*
    396   1.1    brezak 		 * Just sort by block number
    397   1.1    brezak 		 */
    398  1.18       cgd 		nbp->vb_buf.b_cylin = nbp->vb_buf.b_blkno;
    399   1.1    brezak 		s = splbio();
    400  1.18       cgd 		disksort(&vnd->sc_tab, &nbp->vb_buf);
    401  1.17       cgd 		if (vnd->sc_tab.b_active < vnd->sc_maxactive) {
    402  1.17       cgd 			vnd->sc_tab.b_active++;
    403  1.17       cgd 			vndstart(vnd);
    404   1.1    brezak 		}
    405   1.1    brezak 		splx(s);
    406   1.1    brezak 		bn += sz;
    407   1.1    brezak 		addr += sz;
    408   1.1    brezak 	}
    409   1.1    brezak }
    410   1.1    brezak 
    411   1.1    brezak /*
    412   1.1    brezak  * Feed requests sequentially.
    413   1.1    brezak  * We do it this way to keep from flooding NFS servers if we are connected
    414   1.1    brezak  * to an NFS file.  This places the burden on the client rather than the
    415   1.1    brezak  * server.
    416   1.1    brezak  */
    417  1.16       cgd void
    418  1.17       cgd vndstart(vnd)
    419  1.17       cgd 	register struct vnd_softc *vnd;
    420   1.1    brezak {
    421   1.1    brezak 	register struct buf *bp;
    422   1.1    brezak 
    423   1.1    brezak 	/*
    424   1.1    brezak 	 * Dequeue now since lower level strategy routine might
    425   1.1    brezak 	 * queue using same links
    426   1.1    brezak 	 */
    427  1.17       cgd 	bp = vnd->sc_tab.b_actf;
    428  1.17       cgd 	vnd->sc_tab.b_actf = bp->b_actf;
    429   1.1    brezak #ifdef DEBUG
    430  1.17       cgd 	if (vnddebug & VDB_IO)
    431  1.30  christos 		printf("vndstart(%ld): bp %p vp %p blkno %x addr %p cnt %lx\n",
    432  1.28  christos 		    (long) (vnd-vnd_softc), bp, bp->b_vp, bp->b_blkno,
    433  1.28  christos 		    bp->b_data, bp->b_bcount);
    434   1.1    brezak #endif
    435  1.23   thorpej 
    436  1.23   thorpej 	/* Instrumentation. */
    437  1.23   thorpej 	disk_busy(&vnd->sc_dkdev);
    438  1.23   thorpej 
    439   1.1    brezak 	if ((bp->b_flags & B_READ) == 0)
    440   1.1    brezak 		bp->b_vp->v_numoutput++;
    441   1.1    brezak 	VOP_STRATEGY(bp);
    442   1.1    brezak }
    443   1.1    brezak 
    444   1.1    brezak void
    445  1.24  christos vndiodone(bp)
    446  1.24  christos 	struct buf *bp;
    447   1.1    brezak {
    448  1.24  christos 	register struct vndbuf *vbp = (struct vndbuf *) bp;
    449  1.36        pk 	register struct vndxfer *vnx = (struct vndxfer *)vbp->vb_xfer;
    450  1.36        pk 	register struct buf *pbp = vnx->vx_bp;
    451  1.17       cgd 	register struct vnd_softc *vnd = &vnd_softc[vndunit(pbp->b_dev)];
    452  1.36        pk 	int s, resid;
    453   1.1    brezak 
    454   1.1    brezak 	s = splbio();
    455   1.1    brezak #ifdef DEBUG
    456  1.17       cgd 	if (vnddebug & VDB_IO)
    457  1.30  christos 		printf("vndiodone(%ld): vbp %p vp %p blkno %x addr %p cnt %lx\n",
    458  1.28  christos 		    (long) (vnd-vnd_softc), vbp, vbp->vb_buf.b_vp,
    459  1.28  christos 		    vbp->vb_buf.b_blkno, vbp->vb_buf.b_data,
    460  1.28  christos 		    vbp->vb_buf.b_bcount);
    461   1.1    brezak #endif
    462  1.23   thorpej 
    463  1.36        pk 	resid = vbp->vb_buf.b_bcount - vbp->vb_buf.b_resid;
    464  1.36        pk 	pbp->b_resid -= resid;
    465  1.36        pk 	disk_unbusy(&vnd->sc_dkdev, resid);
    466  1.36        pk 	vnx->vx_pending--;
    467  1.36        pk 
    468  1.18       cgd 	if (vbp->vb_buf.b_error) {
    469   1.1    brezak #ifdef DEBUG
    470  1.17       cgd 		if (vnddebug & VDB_IO)
    471  1.30  christos 			printf("vndiodone: vbp %p error %d\n", vbp,
    472  1.18       cgd 			    vbp->vb_buf.b_error);
    473   1.1    brezak #endif
    474  1.36        pk 		vnx->vx_error = vbp->vb_buf.b_error;
    475   1.1    brezak 	}
    476  1.18       cgd 	putvndbuf(vbp);
    477  1.36        pk 
    478  1.36        pk 	/*
    479  1.36        pk 	 * Wrap up this transaction if it has run to completion or, in
    480  1.36        pk 	 * case of an error, when all auxiliary buffers have returned.
    481  1.36        pk 	 */
    482  1.36        pk 	if (pbp->b_resid == 0 || (vnx->vx_error && vnx->vx_pending == 0)) {
    483  1.36        pk 
    484  1.36        pk 		if (vnx->vx_error != 0) {
    485  1.36        pk 			pbp->b_flags |= B_ERROR;
    486  1.36        pk 			pbp->b_error = vnx->vx_error;
    487  1.36        pk 		}
    488  1.36        pk 		putvndxfer(vnx);
    489   1.1    brezak #ifdef DEBUG
    490  1.17       cgd 		if (vnddebug & VDB_IO)
    491  1.30  christos 			printf("vndiodone: pbp %p iodone\n", pbp);
    492   1.1    brezak #endif
    493   1.1    brezak 		biodone(pbp);
    494   1.1    brezak 	}
    495  1.36        pk 
    496  1.17       cgd 	if (vnd->sc_tab.b_actf)
    497  1.17       cgd 		vndstart(vnd);
    498   1.1    brezak 	else
    499  1.17       cgd 		vnd->sc_tab.b_active--;
    500   1.1    brezak 	splx(s);
    501  1.20   mycroft }
    502  1.20   mycroft 
    503  1.22   thorpej /* ARGSUSED */
    504  1.20   mycroft int
    505  1.22   thorpej vndread(dev, uio, flags)
    506  1.20   mycroft 	dev_t dev;
    507  1.20   mycroft 	struct uio *uio;
    508  1.22   thorpej 	int flags;
    509  1.20   mycroft {
    510  1.22   thorpej 	int unit = vndunit(dev);
    511  1.22   thorpej 	struct vnd_softc *sc;
    512  1.20   mycroft 
    513  1.20   mycroft #ifdef DEBUG
    514  1.20   mycroft 	if (vnddebug & VDB_FOLLOW)
    515  1.30  christos 		printf("vndread(%x, %p)\n", dev, uio);
    516  1.20   mycroft #endif
    517  1.22   thorpej 
    518  1.22   thorpej 	if (unit >= numvnd)
    519  1.22   thorpej 		return (ENXIO);
    520  1.22   thorpej 	sc = &vnd_softc[unit];
    521  1.22   thorpej 
    522  1.22   thorpej 	if ((sc->sc_flags & VNF_INITED) == 0)
    523  1.22   thorpej 		return (ENXIO);
    524  1.22   thorpej 
    525  1.20   mycroft 	return (physio(vndstrategy, NULL, dev, B_READ, minphys, uio));
    526  1.20   mycroft }
    527  1.20   mycroft 
    528  1.22   thorpej /* ARGSUSED */
    529  1.20   mycroft int
    530  1.22   thorpej vndwrite(dev, uio, flags)
    531  1.20   mycroft 	dev_t dev;
    532  1.20   mycroft 	struct uio *uio;
    533  1.22   thorpej 	int flags;
    534  1.20   mycroft {
    535  1.22   thorpej 	int unit = vndunit(dev);
    536  1.22   thorpej 	struct vnd_softc *sc;
    537  1.20   mycroft 
    538  1.20   mycroft #ifdef DEBUG
    539  1.20   mycroft 	if (vnddebug & VDB_FOLLOW)
    540  1.30  christos 		printf("vndwrite(%x, %p)\n", dev, uio);
    541  1.20   mycroft #endif
    542  1.22   thorpej 
    543  1.22   thorpej 	if (unit >= numvnd)
    544  1.22   thorpej 		return (ENXIO);
    545  1.22   thorpej 	sc = &vnd_softc[unit];
    546  1.22   thorpej 
    547  1.22   thorpej 	if ((sc->sc_flags & VNF_INITED) == 0)
    548  1.22   thorpej 		return (ENXIO);
    549  1.22   thorpej 
    550  1.20   mycroft 	return (physio(vndstrategy, NULL, dev, B_WRITE, minphys, uio));
    551   1.1    brezak }
    552   1.1    brezak 
    553   1.1    brezak /* ARGSUSED */
    554  1.16       cgd int
    555  1.17       cgd vndioctl(dev, cmd, data, flag, p)
    556   1.1    brezak 	dev_t dev;
    557   1.1    brezak 	u_long cmd;
    558   1.1    brezak 	caddr_t data;
    559   1.1    brezak 	int flag;
    560   1.1    brezak 	struct proc *p;
    561   1.1    brezak {
    562  1.17       cgd 	int unit = vndunit(dev);
    563  1.17       cgd 	register struct vnd_softc *vnd;
    564  1.17       cgd 	struct vnd_ioctl *vio;
    565   1.1    brezak 	struct vattr vattr;
    566   1.1    brezak 	struct nameidata nd;
    567  1.32   mycroft 	int error, part, pmask;
    568   1.1    brezak 
    569   1.1    brezak #ifdef DEBUG
    570  1.17       cgd 	if (vnddebug & VDB_FOLLOW)
    571  1.30  christos 		printf("vndioctl(%x, %lx, %p, %x, %p): unit %d\n",
    572   1.5       cgd 		    dev, cmd, data, flag, p, unit);
    573   1.1    brezak #endif
    574   1.1    brezak 	error = suser(p->p_ucred, &p->p_acflag);
    575   1.1    brezak 	if (error)
    576   1.1    brezak 		return (error);
    577   1.1    brezak 	if (unit >= numvnd)
    578   1.1    brezak 		return (ENXIO);
    579   1.1    brezak 
    580  1.17       cgd 	vnd = &vnd_softc[unit];
    581  1.17       cgd 	vio = (struct vnd_ioctl *)data;
    582   1.1    brezak 	switch (cmd) {
    583   1.1    brezak 
    584  1.17       cgd 	case VNDIOCSET:
    585  1.17       cgd 		if (vnd->sc_flags & VNF_INITED)
    586  1.22   thorpej 			return (EBUSY);
    587  1.22   thorpej 
    588  1.24  christos 		if ((error = vndlock(vnd)) != 0)
    589  1.22   thorpej 			return (error);
    590  1.22   thorpej 
    591   1.1    brezak 		/*
    592   1.1    brezak 		 * Always open for read and write.
    593   1.1    brezak 		 * This is probably bogus, but it lets vn_open()
    594   1.1    brezak 		 * weed out directories, sockets, etc. so we don't
    595   1.1    brezak 		 * have to worry about them.
    596   1.1    brezak 		 */
    597  1.17       cgd 		NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, vio->vnd_file, p);
    598  1.24  christos 		if ((error = vn_open(&nd, FREAD|FWRITE, 0)) != 0) {
    599  1.22   thorpej 			vndunlock(vnd);
    600   1.1    brezak 			return(error);
    601  1.22   thorpej 		}
    602  1.24  christos 		error = VOP_GETATTR(nd.ni_vp, &vattr, p->p_ucred, p);
    603  1.24  christos 		if (error) {
    604   1.1    brezak 			VOP_UNLOCK(nd.ni_vp);
    605   1.1    brezak 			(void) vn_close(nd.ni_vp, FREAD|FWRITE, p->p_ucred, p);
    606  1.22   thorpej 			vndunlock(vnd);
    607   1.1    brezak 			return(error);
    608   1.1    brezak 		}
    609   1.1    brezak 		VOP_UNLOCK(nd.ni_vp);
    610  1.17       cgd 		vnd->sc_vp = nd.ni_vp;
    611  1.17       cgd 		vnd->sc_size = btodb(vattr.va_size);	/* note truncation */
    612  1.24  christos 		if ((error = vndsetcred(vnd, p->p_ucred)) != 0) {
    613   1.5       cgd 			(void) vn_close(nd.ni_vp, FREAD|FWRITE, p->p_ucred, p);
    614  1.22   thorpej 			vndunlock(vnd);
    615   1.1    brezak 			return(error);
    616   1.1    brezak 		}
    617  1.17       cgd 		vndthrottle(vnd, vnd->sc_vp);
    618  1.17       cgd 		vio->vnd_size = dbtob(vnd->sc_size);
    619  1.17       cgd 		vnd->sc_flags |= VNF_INITED;
    620  1.17       cgd #ifdef DEBUG
    621  1.17       cgd 		if (vnddebug & VDB_INIT)
    622  1.30  christos 			printf("vndioctl: SET vp %p size %lx\n",
    623  1.28  christos 			    vnd->sc_vp, (unsigned long) vnd->sc_size);
    624   1.1    brezak #endif
    625  1.22   thorpej 
    626  1.23   thorpej 		/* Attach the disk. */
    627  1.23   thorpej 		bzero(vnd->sc_xname, sizeof(vnd->sc_xname));	/* XXX */
    628  1.30  christos 		sprintf(vnd->sc_xname, "vnd%d", unit);		/* XXX */
    629  1.23   thorpej 		vnd->sc_dkdev.dk_name = vnd->sc_xname;
    630  1.23   thorpej 		disk_attach(&vnd->sc_dkdev);
    631  1.23   thorpej 
    632  1.22   thorpej 		vndunlock(vnd);
    633  1.22   thorpej 
    634   1.1    brezak 		break;
    635   1.1    brezak 
    636  1.17       cgd 	case VNDIOCCLR:
    637  1.17       cgd 		if ((vnd->sc_flags & VNF_INITED) == 0)
    638  1.22   thorpej 			return (ENXIO);
    639  1.22   thorpej 
    640  1.24  christos 		if ((error = vndlock(vnd)) != 0)
    641  1.22   thorpej 			return (error);
    642  1.22   thorpej 
    643  1.22   thorpej 		/*
    644  1.22   thorpej 		 * Don't unconfigure if any other partitions are open
    645  1.22   thorpej 		 * or if both the character and block flavors of this
    646  1.22   thorpej 		 * partition are open.
    647  1.22   thorpej 		 */
    648  1.22   thorpej 		part = DISKPART(dev);
    649  1.22   thorpej 		pmask = (1 << part);
    650  1.22   thorpej 		if ((vnd->sc_dkdev.dk_openmask & ~pmask) ||
    651  1.22   thorpej 		    ((vnd->sc_dkdev.dk_bopenmask & pmask) &&
    652  1.22   thorpej 		    (vnd->sc_dkdev.dk_copenmask & pmask))) {
    653  1.22   thorpej 			vndunlock(vnd);
    654  1.22   thorpej 			return (EBUSY);
    655  1.22   thorpej 		}
    656  1.22   thorpej 
    657  1.17       cgd 		vndclear(vnd);
    658   1.1    brezak #ifdef DEBUG
    659  1.17       cgd 		if (vnddebug & VDB_INIT)
    660  1.30  christos 			printf("vndioctl: CLRed\n");
    661   1.1    brezak #endif
    662  1.23   thorpej 
    663  1.23   thorpej 		/* Detatch the disk. */
    664  1.24  christos 		disk_detach(&vnd->sc_dkdev);
    665  1.22   thorpej 
    666  1.22   thorpej 		vndunlock(vnd);
    667  1.22   thorpej 
    668   1.1    brezak 		break;
    669   1.1    brezak 
    670  1.22   thorpej 	/*
    671  1.22   thorpej 	 * XXX Should support disklabels.
    672  1.22   thorpej 	 */
    673  1.22   thorpej 
    674   1.1    brezak 	default:
    675  1.12       cgd 		return(ENOTTY);
    676   1.1    brezak 	}
    677  1.22   thorpej 
    678  1.22   thorpej 	return (0);
    679   1.1    brezak }
    680   1.1    brezak 
    681   1.1    brezak /*
    682   1.1    brezak  * Duplicate the current processes' credentials.  Since we are called only
    683   1.1    brezak  * as the result of a SET ioctl and only root can do that, any future access
    684   1.1    brezak  * to this "disk" is essentially as root.  Note that credentials may change
    685   1.1    brezak  * if some other uid can write directly to the mapped file (NFS).
    686   1.1    brezak  */
    687  1.16       cgd int
    688  1.17       cgd vndsetcred(vnd, cred)
    689  1.17       cgd 	register struct vnd_softc *vnd;
    690   1.1    brezak 	struct ucred *cred;
    691   1.1    brezak {
    692   1.1    brezak 	struct uio auio;
    693   1.1    brezak 	struct iovec aiov;
    694   1.5       cgd 	char *tmpbuf;
    695   1.5       cgd 	int error;
    696   1.1    brezak 
    697  1.17       cgd 	vnd->sc_cred = crdup(cred);
    698   1.5       cgd 	tmpbuf = malloc(DEV_BSIZE, M_TEMP, M_WAITOK);
    699   1.5       cgd 
    700   1.1    brezak 	/* XXX: Horrible kludge to establish credentials for NFS */
    701   1.1    brezak 	aiov.iov_base = tmpbuf;
    702  1.17       cgd 	aiov.iov_len = min(DEV_BSIZE, dbtob(vnd->sc_size));
    703   1.1    brezak 	auio.uio_iov = &aiov;
    704   1.1    brezak 	auio.uio_iovcnt = 1;
    705   1.1    brezak 	auio.uio_offset = 0;
    706   1.1    brezak 	auio.uio_rw = UIO_READ;
    707   1.1    brezak 	auio.uio_segflg = UIO_SYSSPACE;
    708   1.1    brezak 	auio.uio_resid = aiov.iov_len;
    709  1.21   mycroft 	VOP_LOCK(vnd->sc_vp);
    710  1.17       cgd 	error = VOP_READ(vnd->sc_vp, &auio, 0, vnd->sc_cred);
    711  1.21   mycroft 	VOP_UNLOCK(vnd->sc_vp);
    712   1.5       cgd 
    713   1.5       cgd 	free(tmpbuf, M_TEMP);
    714   1.5       cgd 	return (error);
    715   1.1    brezak }
    716   1.1    brezak 
    717   1.1    brezak /*
    718   1.1    brezak  * Set maxactive based on FS type
    719   1.1    brezak  */
    720  1.16       cgd void
    721  1.17       cgd vndthrottle(vnd, vp)
    722  1.17       cgd 	register struct vnd_softc *vnd;
    723   1.1    brezak 	struct vnode *vp;
    724   1.1    brezak {
    725  1.31   thorpej #ifdef NFS
    726  1.24  christos 	extern int (**nfsv2_vnodeop_p) __P((void *));
    727   1.5       cgd 
    728   1.2    brezak 	if (vp->v_op == nfsv2_vnodeop_p)
    729  1.17       cgd 		vnd->sc_maxactive = 2;
    730   1.1    brezak 	else
    731   1.1    brezak #endif
    732  1.17       cgd 		vnd->sc_maxactive = 8;
    733   1.1    brezak 
    734  1.17       cgd 	if (vnd->sc_maxactive < 1)
    735  1.17       cgd 		vnd->sc_maxactive = 1;
    736   1.1    brezak }
    737   1.1    brezak 
    738  1.16       cgd void
    739  1.17       cgd vndshutdown()
    740   1.1    brezak {
    741  1.17       cgd 	register struct vnd_softc *vnd;
    742   1.1    brezak 
    743  1.17       cgd 	for (vnd = &vnd_softc[0]; vnd < &vnd_softc[numvnd]; vnd++)
    744  1.17       cgd 		if (vnd->sc_flags & VNF_INITED)
    745  1.17       cgd 			vndclear(vnd);
    746   1.1    brezak }
    747   1.1    brezak 
    748  1.16       cgd void
    749  1.17       cgd vndclear(vnd)
    750  1.17       cgd 	register struct vnd_softc *vnd;
    751   1.1    brezak {
    752  1.17       cgd 	register struct vnode *vp = vnd->sc_vp;
    753   1.1    brezak 	struct proc *p = curproc;		/* XXX */
    754   1.1    brezak 
    755   1.1    brezak #ifdef DEBUG
    756  1.17       cgd 	if (vnddebug & VDB_FOLLOW)
    757  1.30  christos 		printf("vndclear(%p): vp %p\n", vnd, vp);
    758   1.1    brezak #endif
    759  1.17       cgd 	vnd->sc_flags &= ~VNF_INITED;
    760   1.1    brezak 	if (vp == (struct vnode *)0)
    761  1.17       cgd 		panic("vndioctl: null vp");
    762  1.17       cgd 	(void) vn_close(vp, FREAD|FWRITE, vnd->sc_cred, p);
    763  1.17       cgd 	crfree(vnd->sc_cred);
    764  1.17       cgd 	vnd->sc_vp = (struct vnode *)0;
    765  1.17       cgd 	vnd->sc_cred = (struct ucred *)0;
    766  1.17       cgd 	vnd->sc_size = 0;
    767   1.1    brezak }
    768   1.1    brezak 
    769  1.16       cgd int
    770  1.17       cgd vndsize(dev)
    771   1.1    brezak 	dev_t dev;
    772   1.1    brezak {
    773  1.17       cgd 	int unit = vndunit(dev);
    774  1.17       cgd 	register struct vnd_softc *vnd = &vnd_softc[unit];
    775   1.1    brezak 
    776  1.17       cgd 	if (unit >= numvnd || (vnd->sc_flags & VNF_INITED) == 0)
    777   1.1    brezak 		return(-1);
    778  1.17       cgd 	return(vnd->sc_size);
    779   1.1    brezak }
    780   1.1    brezak 
    781  1.16       cgd int
    782  1.19       cgd vnddump(dev, blkno, va, size)
    783  1.16       cgd 	dev_t dev;
    784  1.19       cgd 	daddr_t blkno;
    785  1.19       cgd 	caddr_t va;
    786  1.19       cgd 	size_t size;
    787   1.1    brezak {
    788  1.16       cgd 
    789  1.19       cgd 	/* Not implemented. */
    790  1.19       cgd 	return ENXIO;
    791   1.1    brezak }
    792  1.22   thorpej 
    793  1.22   thorpej /*
    794  1.22   thorpej  * Wait interruptibly for an exclusive lock.
    795  1.22   thorpej  *
    796  1.22   thorpej  * XXX
    797  1.22   thorpej  * Several drivers do this; it should be abstracted and made MP-safe.
    798  1.22   thorpej  */
    799  1.22   thorpej static int
    800  1.22   thorpej vndlock(sc)
    801  1.22   thorpej 	struct vnd_softc *sc;
    802  1.22   thorpej {
    803  1.22   thorpej 	int error;
    804  1.22   thorpej 
    805  1.22   thorpej 	while ((sc->sc_flags & VNF_LOCKED) != 0) {
    806  1.22   thorpej 		sc->sc_flags |= VNF_WANTED;
    807  1.22   thorpej 		if ((error = tsleep(sc, PRIBIO | PCATCH, "vndlck", 0)) != 0)
    808  1.22   thorpej 			return (error);
    809  1.22   thorpej 	}
    810  1.22   thorpej 	sc->sc_flags |= VNF_LOCKED;
    811  1.22   thorpej 	return (0);
    812  1.22   thorpej }
    813  1.22   thorpej 
    814  1.22   thorpej /*
    815  1.22   thorpej  * Unlock and wake up any waiters.
    816  1.22   thorpej  */
    817  1.22   thorpej static void
    818  1.22   thorpej vndunlock(sc)
    819  1.22   thorpej 	struct vnd_softc *sc;
    820  1.22   thorpej {
    821  1.22   thorpej 
    822  1.22   thorpej 	sc->sc_flags &= ~VNF_LOCKED;
    823  1.22   thorpej 	if ((sc->sc_flags & VNF_WANTED) != 0) {
    824  1.22   thorpej 		sc->sc_flags &= ~VNF_WANTED;
    825  1.22   thorpej 		wakeup(sc);
    826  1.22   thorpej 	}
    827  1.22   thorpej }
    828