Home | History | Annotate | Line # | Download | only in dev
vnd.c revision 1.37
      1  1.37        pk /*	$NetBSD: vnd.c,v 1.37 1997/05/25 22:27:16 pk Exp $	*/
      2  1.11       cgd 
      3   1.1    brezak /*
      4   1.1    brezak  * Copyright (c) 1988 University of Utah.
      5   1.1    brezak  * Copyright (c) 1990, 1993
      6   1.1    brezak  *	The Regents of the University of California.  All rights reserved.
      7   1.1    brezak  *
      8   1.1    brezak  * This code is derived from software contributed to Berkeley by
      9   1.1    brezak  * the Systems Programming Group of the University of Utah Computer
     10   1.1    brezak  * Science Department.
     11   1.1    brezak  *
     12   1.1    brezak  * Redistribution and use in source and binary forms, with or without
     13   1.1    brezak  * modification, are permitted provided that the following conditions
     14   1.1    brezak  * are met:
     15   1.1    brezak  * 1. Redistributions of source code must retain the above copyright
     16   1.1    brezak  *    notice, this list of conditions and the following disclaimer.
     17   1.1    brezak  * 2. Redistributions in binary form must reproduce the above copyright
     18   1.1    brezak  *    notice, this list of conditions and the following disclaimer in the
     19   1.1    brezak  *    documentation and/or other materials provided with the distribution.
     20   1.1    brezak  * 3. All advertising materials mentioning features or use of this software
     21   1.1    brezak  *    must display the following acknowledgement:
     22   1.1    brezak  *	This product includes software developed by the University of
     23   1.1    brezak  *	California, Berkeley and its contributors.
     24   1.1    brezak  * 4. Neither the name of the University nor the names of its contributors
     25   1.1    brezak  *    may be used to endorse or promote products derived from this software
     26   1.1    brezak  *    without specific prior written permission.
     27   1.1    brezak  *
     28   1.1    brezak  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     29   1.1    brezak  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     30   1.1    brezak  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     31   1.1    brezak  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     32   1.1    brezak  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     33   1.1    brezak  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     34   1.1    brezak  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     35   1.1    brezak  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     36   1.1    brezak  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     37   1.1    brezak  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     38   1.1    brezak  * SUCH DAMAGE.
     39   1.1    brezak  *
     40   1.5       cgd  * from: Utah $Hdr: vn.c 1.13 94/04/02$
     41   1.1    brezak  *
     42   1.5       cgd  *	@(#)vn.c	8.6 (Berkeley) 4/1/94
     43   1.1    brezak  */
     44   1.1    brezak 
     45   1.1    brezak /*
     46   1.1    brezak  * Vnode disk driver.
     47   1.1    brezak  *
     48   1.1    brezak  * Block/character interface to a vnode.  Allows one to treat a file
     49   1.1    brezak  * as a disk (e.g. build a filesystem in it, mount it, etc.).
     50   1.1    brezak  *
     51   1.1    brezak  * NOTE 1: This uses the VOP_BMAP/VOP_STRATEGY interface to the vnode
     52   1.1    brezak  * instead of a simple VOP_RDWR.  We do this to avoid distorting the
     53   1.1    brezak  * local buffer cache.
     54   1.1    brezak  *
     55   1.1    brezak  * NOTE 2: There is a security issue involved with this driver.
     56   1.1    brezak  * Once mounted all access to the contents of the "mapped" file via
     57   1.1    brezak  * the special file is controlled by the permissions on the special
     58   1.1    brezak  * file, the protection of the mapped file is ignored (effectively,
     59   1.1    brezak  * by using root credentials in all transactions).
     60   1.1    brezak  *
     61   1.1    brezak  * NOTE 3: Doesn't interact with leases, should it?
     62   1.1    brezak  */
     63   1.1    brezak 
     64   1.1    brezak #include <sys/param.h>
     65   1.1    brezak #include <sys/systm.h>
     66   1.1    brezak #include <sys/namei.h>
     67   1.1    brezak #include <sys/proc.h>
     68   1.1    brezak #include <sys/errno.h>
     69   1.1    brezak #include <sys/buf.h>
     70   1.1    brezak #include <sys/malloc.h>
     71   1.1    brezak #include <sys/ioctl.h>
     72  1.16       cgd #include <sys/disklabel.h>
     73  1.22   thorpej #include <sys/device.h>
     74  1.22   thorpej #include <sys/disk.h>
     75  1.22   thorpej #include <sys/stat.h>
     76   1.1    brezak #include <sys/mount.h>
     77   1.1    brezak #include <sys/vnode.h>
     78   1.1    brezak #include <sys/file.h>
     79   1.1    brezak #include <sys/uio.h>
     80  1.26  christos #include <sys/conf.h>
     81   1.1    brezak 
     82   1.1    brezak #include <miscfs/specfs/specdev.h>
     83   1.1    brezak 
     84  1.17       cgd #include <dev/vndioctl.h>
     85   1.1    brezak 
     86   1.1    brezak #ifdef DEBUG
     87  1.17       cgd int dovndcluster = 1;
     88  1.17       cgd int vnddebug = 0x00;
     89   1.1    brezak #define VDB_FOLLOW	0x01
     90   1.1    brezak #define VDB_INIT	0x02
     91   1.1    brezak #define VDB_IO		0x04
     92   1.1    brezak #endif
     93   1.1    brezak 
     94   1.1    brezak #define b_cylin	b_resid
     95   1.1    brezak 
     96  1.18       cgd #define	vndunit(x)	DISKUNIT(x)
     97  1.18       cgd 
     98  1.36        pk struct vndxfer {
     99  1.36        pk 	struct buf	*vx_bp;		/* Pointer to parent buffer */
    100  1.36        pk 	int		vx_error;
    101  1.36        pk 	int		vx_pending;	/* # of pending aux buffers */
    102  1.36        pk };
    103  1.36        pk 
    104  1.18       cgd struct vndbuf {
    105  1.18       cgd 	struct buf	vb_buf;
    106  1.36        pk 	struct vndxfer	*vb_xfer;
    107  1.18       cgd };
    108   1.1    brezak 
    109  1.36        pk #define	getvndxfer()	\
    110  1.36        pk 	((struct vndxfer *)malloc(sizeof(struct vndxfer), M_DEVBUF, M_WAITOK))
    111  1.36        pk #define putvndxfer(vnx)	\
    112  1.36        pk 	free((caddr_t)(vnx), M_DEVBUF)
    113  1.17       cgd #define	getvndbuf()	\
    114  1.18       cgd 	((struct vndbuf *)malloc(sizeof(struct vndbuf), M_DEVBUF, M_WAITOK))
    115  1.18       cgd #define putvndbuf(vbp)	\
    116  1.18       cgd 	free((caddr_t)(vbp), M_DEVBUF)
    117   1.1    brezak 
    118  1.17       cgd struct vnd_softc {
    119   1.1    brezak 	int		 sc_flags;	/* flags */
    120  1.17       cgd 	size_t		 sc_size;	/* size of vnd */
    121   1.1    brezak 	struct vnode	*sc_vp;		/* vnode */
    122   1.1    brezak 	struct ucred	*sc_cred;	/* credentials */
    123   1.1    brezak 	int		 sc_maxactive;	/* max # of active requests */
    124   1.1    brezak 	struct buf	 sc_tab;	/* transfer queue */
    125  1.23   thorpej 	char		 sc_xname[8];	/* XXX external name */
    126  1.23   thorpej 	struct disk	 sc_dkdev;	/* generic disk device info */
    127   1.1    brezak };
    128   1.1    brezak 
    129   1.1    brezak /* sc_flags */
    130   1.1    brezak #define	VNF_ALIVE	0x01
    131   1.1    brezak #define VNF_INITED	0x02
    132  1.22   thorpej #define VNF_WANTED	0x40
    133  1.22   thorpej #define VNF_LOCKED	0x80
    134   1.1    brezak 
    135  1.17       cgd struct vnd_softc *vnd_softc;
    136  1.22   thorpej int numvnd = 0;
    137  1.22   thorpej 
    138  1.22   thorpej /* called by main() at boot time */
    139  1.22   thorpej void	vndattach __P((int));
    140   1.1    brezak 
    141  1.17       cgd void	vndclear __P((struct vnd_softc *));
    142  1.17       cgd void	vndstart __P((struct vnd_softc *));
    143  1.17       cgd int	vndsetcred __P((struct vnd_softc *, struct ucred *));
    144  1.17       cgd void	vndthrottle __P((struct vnd_softc *, struct vnode *));
    145  1.24  christos void	vndiodone __P((struct buf *));
    146  1.24  christos void	vndshutdown __P((void));
    147  1.16       cgd 
    148  1.22   thorpej static	int vndlock __P((struct vnd_softc *));
    149  1.22   thorpej static	void vndunlock __P((struct vnd_softc *));
    150  1.22   thorpej 
    151   1.1    brezak void
    152  1.17       cgd vndattach(num)
    153   1.1    brezak 	int num;
    154   1.1    brezak {
    155   1.1    brezak 	char *mem;
    156   1.1    brezak 	register u_long size;
    157   1.1    brezak 
    158   1.1    brezak 	if (num <= 0)
    159   1.1    brezak 		return;
    160  1.17       cgd 	size = num * sizeof(struct vnd_softc);
    161   1.1    brezak 	mem = malloc(size, M_DEVBUF, M_NOWAIT);
    162   1.1    brezak 	if (mem == NULL) {
    163  1.30  christos 		printf("WARNING: no memory for vnode disks\n");
    164   1.1    brezak 		return;
    165   1.1    brezak 	}
    166   1.1    brezak 	bzero(mem, size);
    167  1.17       cgd 	vnd_softc = (struct vnd_softc *)mem;
    168   1.1    brezak 	numvnd = num;
    169   1.1    brezak }
    170   1.1    brezak 
    171   1.1    brezak int
    172  1.17       cgd vndopen(dev, flags, mode, p)
    173   1.1    brezak 	dev_t dev;
    174   1.1    brezak 	int flags, mode;
    175   1.1    brezak 	struct proc *p;
    176   1.1    brezak {
    177  1.17       cgd 	int unit = vndunit(dev);
    178  1.22   thorpej 	struct vnd_softc *sc;
    179  1.22   thorpej 	int error = 0, part, pmask;
    180  1.22   thorpej 
    181  1.22   thorpej 	/*
    182  1.22   thorpej 	 * XXX Should support disklabels.
    183  1.22   thorpej 	 */
    184   1.1    brezak 
    185   1.1    brezak #ifdef DEBUG
    186  1.17       cgd 	if (vnddebug & VDB_FOLLOW)
    187  1.30  christos 		printf("vndopen(%x, %x, %x, %p)\n", dev, flags, mode, p);
    188   1.1    brezak #endif
    189   1.1    brezak 	if (unit >= numvnd)
    190  1.22   thorpej 		return (ENXIO);
    191  1.22   thorpej 	sc = &vnd_softc[unit];
    192  1.22   thorpej 
    193  1.24  christos 	if ((error = vndlock(sc)) != 0)
    194  1.22   thorpej 		return (error);
    195  1.22   thorpej 
    196  1.22   thorpej 	part = DISKPART(dev);
    197  1.22   thorpej 	pmask = (1 << part);
    198  1.22   thorpej 
    199  1.22   thorpej 	/* Prevent our unit from being unconfigured while open. */
    200  1.22   thorpej 	switch (mode) {
    201  1.22   thorpej 	case S_IFCHR:
    202  1.22   thorpej 		sc->sc_dkdev.dk_copenmask |= pmask;
    203  1.22   thorpej 		break;
    204  1.22   thorpej 
    205  1.22   thorpej 	case S_IFBLK:
    206  1.22   thorpej 		sc->sc_dkdev.dk_bopenmask |= pmask;
    207  1.22   thorpej 		break;
    208  1.22   thorpej 	}
    209  1.22   thorpej 	sc->sc_dkdev.dk_openmask =
    210  1.22   thorpej 	    sc->sc_dkdev.dk_copenmask | sc->sc_dkdev.dk_bopenmask;
    211  1.22   thorpej 
    212  1.22   thorpej 	vndunlock(sc);
    213  1.22   thorpej 	return (0);
    214   1.1    brezak }
    215   1.1    brezak 
    216   1.4   deraadt int
    217  1.17       cgd vndclose(dev, flags, mode, p)
    218   1.4   deraadt 	dev_t dev;
    219   1.4   deraadt 	int flags, mode;
    220   1.4   deraadt 	struct proc *p;
    221   1.4   deraadt {
    222  1.22   thorpej 	int unit = vndunit(dev);
    223  1.22   thorpej 	struct vnd_softc *sc;
    224  1.22   thorpej 	int error = 0, part;
    225  1.22   thorpej 
    226   1.4   deraadt #ifdef DEBUG
    227  1.17       cgd 	if (vnddebug & VDB_FOLLOW)
    228  1.30  christos 		printf("vndclose(%x, %x, %x, %p)\n", dev, flags, mode, p);
    229   1.4   deraadt #endif
    230  1.22   thorpej 
    231  1.22   thorpej 	if (unit >= numvnd)
    232  1.22   thorpej 		return (ENXIO);
    233  1.22   thorpej 	sc = &vnd_softc[unit];
    234  1.22   thorpej 
    235  1.24  christos 	if ((error = vndlock(sc)) != 0)
    236  1.22   thorpej 		return (error);
    237  1.22   thorpej 
    238  1.22   thorpej 	part = DISKPART(dev);
    239  1.22   thorpej 
    240  1.22   thorpej 	/* ...that much closer to allowing unconfiguration... */
    241  1.22   thorpej 	switch (mode) {
    242  1.22   thorpej 	case S_IFCHR:
    243  1.22   thorpej 		sc->sc_dkdev.dk_copenmask &= ~(1 << part);
    244  1.22   thorpej 		break;
    245  1.22   thorpej 
    246  1.22   thorpej 	case S_IFBLK:
    247  1.22   thorpej 		sc->sc_dkdev.dk_bopenmask &= ~(1 << part);
    248  1.22   thorpej 		break;
    249  1.22   thorpej 	}
    250  1.22   thorpej 	sc->sc_dkdev.dk_openmask =
    251  1.22   thorpej 	    sc->sc_dkdev.dk_copenmask | sc->sc_dkdev.dk_bopenmask;
    252  1.22   thorpej 
    253  1.22   thorpej 	vndunlock(sc);
    254  1.22   thorpej 	return (0);
    255   1.4   deraadt }
    256   1.4   deraadt 
    257   1.1    brezak /*
    258   1.1    brezak  * Break the request into bsize pieces and submit using VOP_BMAP/VOP_STRATEGY.
    259   1.1    brezak  * Note that this driver can only be used for swapping over NFS on the hp
    260   1.1    brezak  * since nfs_strategy on the vax cannot handle u-areas and page tables.
    261   1.1    brezak  */
    262   1.6   mycroft void
    263  1.17       cgd vndstrategy(bp)
    264   1.1    brezak 	register struct buf *bp;
    265   1.1    brezak {
    266  1.17       cgd 	int unit = vndunit(bp->b_dev);
    267  1.37        pk 	struct vnd_softc *vnd = &vnd_softc[unit];
    268  1.37        pk 	struct vndbuf *nbp;
    269  1.36        pk 	struct vndxfer *vnx;
    270  1.37        pk 	int bn, bsize, resid;
    271  1.37        pk 	caddr_t addr;
    272   1.5       cgd 	int sz, flags, error;
    273   1.1    brezak 
    274   1.1    brezak #ifdef DEBUG
    275  1.17       cgd 	if (vnddebug & VDB_FOLLOW)
    276  1.30  christos 		printf("vndstrategy(%p): unit %d\n", bp, unit);
    277   1.1    brezak #endif
    278  1.17       cgd 	if ((vnd->sc_flags & VNF_INITED) == 0) {
    279   1.1    brezak 		bp->b_error = ENXIO;
    280   1.1    brezak 		bp->b_flags |= B_ERROR;
    281   1.1    brezak 		biodone(bp);
    282   1.1    brezak 		return;
    283   1.1    brezak 	}
    284   1.1    brezak 	bn = bp->b_blkno;
    285   1.1    brezak 	sz = howmany(bp->b_bcount, DEV_BSIZE);
    286   1.1    brezak 	bp->b_resid = bp->b_bcount;
    287  1.17       cgd 	if (bn < 0 || bn + sz > vnd->sc_size) {
    288  1.17       cgd 		if (bn != vnd->sc_size) {
    289   1.1    brezak 			bp->b_error = EINVAL;
    290   1.1    brezak 			bp->b_flags |= B_ERROR;
    291   1.1    brezak 		}
    292   1.1    brezak 		biodone(bp);
    293   1.1    brezak 		return;
    294   1.1    brezak 	}
    295   1.1    brezak 	bn = dbtob(bn);
    296  1.17       cgd  	bsize = vnd->sc_vp->v_mount->mnt_stat.f_iosize;
    297   1.5       cgd 	addr = bp->b_data;
    298   1.1    brezak 	flags = bp->b_flags | B_CALL;
    299  1.36        pk 
    300  1.36        pk 	/* Allocate a header for this transfer and link it to the buffer */
    301  1.36        pk 	vnx = getvndxfer();
    302  1.36        pk 	vnx->vx_error = 0;
    303  1.36        pk 	vnx->vx_pending = 0;
    304  1.36        pk 	vnx->vx_bp = bp;
    305  1.36        pk 
    306   1.1    brezak 	for (resid = bp->b_resid; resid; resid -= sz) {
    307   1.1    brezak 		struct vnode *vp;
    308   1.1    brezak 		daddr_t nbn;
    309   1.5       cgd 		int off, s, nra;
    310   1.1    brezak 
    311   1.5       cgd 		nra = 0;
    312  1.21   mycroft 		VOP_LOCK(vnd->sc_vp);
    313  1.17       cgd 		error = VOP_BMAP(vnd->sc_vp, bn / bsize, &vp, &nbn, &nra);
    314  1.21   mycroft 		VOP_UNLOCK(vnd->sc_vp);
    315  1.37        pk 
    316  1.37        pk #ifdef VND_FILLHOLES
    317  1.37        pk 		if (error == 0 && (long)nbn == -1) {
    318  1.37        pk 			int rw = (flags & B_READ) ? UIO_READ : UIO_WRITE;
    319  1.37        pk 			sz = resid;
    320  1.37        pk 			error = vn_rdwr(rw, vnd->sc_vp, addr, sz,
    321  1.37        pk 					dbtob(bp->b_blkno), UIO_SYSSPACE,
    322  1.37        pk 					IO_SYNC | IO_NODELOCKED,
    323  1.37        pk 					vnd->sc_cred, &resid, bp->b_proc);
    324  1.37        pk 			bp->b_resid -= (sz - resid);
    325  1.37        pk 		}
    326  1.37        pk #else
    327   1.5       cgd 		if (error == 0 && (long)nbn == -1)
    328   1.5       cgd 			error = EIO;
    329  1.37        pk #endif
    330  1.36        pk 
    331  1.36        pk 		/*
    332  1.36        pk 		 * If there was an error or a hole in the file...punt.
    333  1.36        pk 		 * Note that we may have to wait for any operations
    334  1.36        pk 		 * that we have already fired off before releasing
    335  1.36        pk 		 * the buffer.
    336  1.36        pk 		 *
    337  1.36        pk 		 * XXX we could deal with holes here but it would be
    338  1.36        pk 		 * a hassle (in the write case).
    339  1.36        pk 		 */
    340  1.37        pk 		if (error || (long)nbn == -1) {
    341  1.36        pk 			vnx->vx_error = error;
    342  1.36        pk 			if (vnx->vx_pending == 0) {
    343  1.37        pk 				if (error) {
    344  1.37        pk 					bp->b_error = error;
    345  1.37        pk 					bp->b_flags |= B_ERROR;
    346  1.37        pk 				}
    347  1.36        pk 				putvndxfer(vnx);
    348  1.36        pk 				biodone(bp);
    349  1.36        pk 			}
    350  1.36        pk 			return;
    351  1.36        pk 		}
    352  1.36        pk 
    353   1.5       cgd #ifdef DEBUG
    354  1.17       cgd 		if (!dovndcluster)
    355   1.5       cgd 			nra = 0;
    356   1.1    brezak #endif
    357   1.5       cgd 
    358  1.24  christos 		if ((off = bn % bsize) != 0)
    359   1.5       cgd 			sz = bsize - off;
    360   1.5       cgd 		else
    361   1.5       cgd 			sz = (1 + nra) * bsize;
    362   1.5       cgd 		if (resid < sz)
    363   1.5       cgd 			sz = resid;
    364   1.1    brezak #ifdef DEBUG
    365  1.17       cgd 		if (vnddebug & VDB_IO)
    366  1.30  christos 			printf("vndstrategy: vp %p/%p bn %x/%x sz %x\n",
    367  1.29  christos 			    vnd->sc_vp, vp, bn, nbn, sz);
    368   1.1    brezak #endif
    369   1.5       cgd 
    370  1.17       cgd 		nbp = getvndbuf();
    371  1.18       cgd 		nbp->vb_buf.b_flags = flags;
    372  1.18       cgd 		nbp->vb_buf.b_bcount = sz;
    373  1.18       cgd 		nbp->vb_buf.b_bufsize = bp->b_bufsize;
    374  1.18       cgd 		nbp->vb_buf.b_error = 0;
    375   1.1    brezak 		if (vp->v_type == VBLK || vp->v_type == VCHR)
    376  1.18       cgd 			nbp->vb_buf.b_dev = vp->v_rdev;
    377   1.1    brezak 		else
    378  1.18       cgd 			nbp->vb_buf.b_dev = NODEV;
    379  1.18       cgd 		nbp->vb_buf.b_data = addr;
    380  1.18       cgd 		nbp->vb_buf.b_blkno = nbn + btodb(off);
    381  1.18       cgd 		nbp->vb_buf.b_proc = bp->b_proc;
    382  1.18       cgd 		nbp->vb_buf.b_iodone = vndiodone;
    383  1.18       cgd 		nbp->vb_buf.b_vp = vp;
    384  1.18       cgd 		nbp->vb_buf.b_rcred = vnd->sc_cred;	/* XXX crdup? */
    385  1.18       cgd 		nbp->vb_buf.b_wcred = vnd->sc_cred;	/* XXX crdup? */
    386  1.33        pk 		if (bp->b_dirtyend == 0) {
    387  1.33        pk 			nbp->vb_buf.b_dirtyoff = 0;
    388  1.33        pk 			nbp->vb_buf.b_dirtyend = sz;
    389  1.33        pk 		} else {
    390  1.33        pk 			nbp->vb_buf.b_dirtyoff =
    391  1.33        pk 			    max(0, bp->b_dirtyoff - (bp->b_bcount - resid));
    392  1.33        pk 			nbp->vb_buf.b_dirtyend =
    393  1.34        pk 			    min(sz,
    394  1.34        pk 				max(0, bp->b_dirtyend - (bp->b_bcount-resid)));
    395  1.33        pk 		}
    396  1.33        pk 		if (bp->b_validend == 0) {
    397  1.33        pk 			nbp->vb_buf.b_validoff = 0;
    398  1.33        pk 			nbp->vb_buf.b_validend = sz;
    399  1.33        pk 		} else {
    400  1.33        pk 			nbp->vb_buf.b_validoff =
    401  1.33        pk 			    max(0, bp->b_validoff - (bp->b_bcount - resid));
    402  1.33        pk 			nbp->vb_buf.b_validend =
    403  1.34        pk 			    min(sz,
    404  1.34        pk 				max(0, bp->b_validend - (bp->b_bcount-resid)));
    405  1.33        pk 		}
    406  1.18       cgd 
    407  1.36        pk 		nbp->vb_xfer = vnx;
    408  1.36        pk 		vnx->vx_pending++;
    409  1.18       cgd 
    410   1.1    brezak 		/*
    411   1.1    brezak 		 * Just sort by block number
    412   1.1    brezak 		 */
    413  1.18       cgd 		nbp->vb_buf.b_cylin = nbp->vb_buf.b_blkno;
    414   1.1    brezak 		s = splbio();
    415  1.18       cgd 		disksort(&vnd->sc_tab, &nbp->vb_buf);
    416  1.17       cgd 		if (vnd->sc_tab.b_active < vnd->sc_maxactive) {
    417  1.17       cgd 			vnd->sc_tab.b_active++;
    418  1.17       cgd 			vndstart(vnd);
    419   1.1    brezak 		}
    420   1.1    brezak 		splx(s);
    421   1.1    brezak 		bn += sz;
    422   1.1    brezak 		addr += sz;
    423   1.1    brezak 	}
    424   1.1    brezak }
    425   1.1    brezak 
    426   1.1    brezak /*
    427   1.1    brezak  * Feed requests sequentially.
    428   1.1    brezak  * We do it this way to keep from flooding NFS servers if we are connected
    429   1.1    brezak  * to an NFS file.  This places the burden on the client rather than the
    430   1.1    brezak  * server.
    431   1.1    brezak  */
    432  1.16       cgd void
    433  1.17       cgd vndstart(vnd)
    434  1.17       cgd 	register struct vnd_softc *vnd;
    435   1.1    brezak {
    436   1.1    brezak 	register struct buf *bp;
    437   1.1    brezak 
    438   1.1    brezak 	/*
    439   1.1    brezak 	 * Dequeue now since lower level strategy routine might
    440   1.1    brezak 	 * queue using same links
    441   1.1    brezak 	 */
    442  1.17       cgd 	bp = vnd->sc_tab.b_actf;
    443  1.17       cgd 	vnd->sc_tab.b_actf = bp->b_actf;
    444   1.1    brezak #ifdef DEBUG
    445  1.17       cgd 	if (vnddebug & VDB_IO)
    446  1.30  christos 		printf("vndstart(%ld): bp %p vp %p blkno %x addr %p cnt %lx\n",
    447  1.28  christos 		    (long) (vnd-vnd_softc), bp, bp->b_vp, bp->b_blkno,
    448  1.28  christos 		    bp->b_data, bp->b_bcount);
    449   1.1    brezak #endif
    450  1.23   thorpej 
    451  1.23   thorpej 	/* Instrumentation. */
    452  1.23   thorpej 	disk_busy(&vnd->sc_dkdev);
    453  1.23   thorpej 
    454   1.1    brezak 	if ((bp->b_flags & B_READ) == 0)
    455   1.1    brezak 		bp->b_vp->v_numoutput++;
    456   1.1    brezak 	VOP_STRATEGY(bp);
    457   1.1    brezak }
    458   1.1    brezak 
    459   1.1    brezak void
    460  1.24  christos vndiodone(bp)
    461  1.24  christos 	struct buf *bp;
    462   1.1    brezak {
    463  1.24  christos 	register struct vndbuf *vbp = (struct vndbuf *) bp;
    464  1.36        pk 	register struct vndxfer *vnx = (struct vndxfer *)vbp->vb_xfer;
    465  1.36        pk 	register struct buf *pbp = vnx->vx_bp;
    466  1.17       cgd 	register struct vnd_softc *vnd = &vnd_softc[vndunit(pbp->b_dev)];
    467  1.36        pk 	int s, resid;
    468   1.1    brezak 
    469   1.1    brezak 	s = splbio();
    470   1.1    brezak #ifdef DEBUG
    471  1.17       cgd 	if (vnddebug & VDB_IO)
    472  1.30  christos 		printf("vndiodone(%ld): vbp %p vp %p blkno %x addr %p cnt %lx\n",
    473  1.28  christos 		    (long) (vnd-vnd_softc), vbp, vbp->vb_buf.b_vp,
    474  1.28  christos 		    vbp->vb_buf.b_blkno, vbp->vb_buf.b_data,
    475  1.28  christos 		    vbp->vb_buf.b_bcount);
    476   1.1    brezak #endif
    477  1.23   thorpej 
    478  1.36        pk 	resid = vbp->vb_buf.b_bcount - vbp->vb_buf.b_resid;
    479  1.36        pk 	pbp->b_resid -= resid;
    480  1.36        pk 	disk_unbusy(&vnd->sc_dkdev, resid);
    481  1.36        pk 	vnx->vx_pending--;
    482  1.36        pk 
    483  1.18       cgd 	if (vbp->vb_buf.b_error) {
    484   1.1    brezak #ifdef DEBUG
    485  1.17       cgd 		if (vnddebug & VDB_IO)
    486  1.30  christos 			printf("vndiodone: vbp %p error %d\n", vbp,
    487  1.18       cgd 			    vbp->vb_buf.b_error);
    488   1.1    brezak #endif
    489  1.36        pk 		vnx->vx_error = vbp->vb_buf.b_error;
    490   1.1    brezak 	}
    491  1.18       cgd 	putvndbuf(vbp);
    492  1.36        pk 
    493  1.36        pk 	/*
    494  1.36        pk 	 * Wrap up this transaction if it has run to completion or, in
    495  1.36        pk 	 * case of an error, when all auxiliary buffers have returned.
    496  1.36        pk 	 */
    497  1.36        pk 	if (pbp->b_resid == 0 || (vnx->vx_error && vnx->vx_pending == 0)) {
    498  1.36        pk 
    499  1.36        pk 		if (vnx->vx_error != 0) {
    500  1.36        pk 			pbp->b_flags |= B_ERROR;
    501  1.36        pk 			pbp->b_error = vnx->vx_error;
    502  1.36        pk 		}
    503  1.36        pk 		putvndxfer(vnx);
    504   1.1    brezak #ifdef DEBUG
    505  1.17       cgd 		if (vnddebug & VDB_IO)
    506  1.30  christos 			printf("vndiodone: pbp %p iodone\n", pbp);
    507   1.1    brezak #endif
    508   1.1    brezak 		biodone(pbp);
    509   1.1    brezak 	}
    510  1.36        pk 
    511  1.17       cgd 	if (vnd->sc_tab.b_actf)
    512  1.17       cgd 		vndstart(vnd);
    513   1.1    brezak 	else
    514  1.17       cgd 		vnd->sc_tab.b_active--;
    515   1.1    brezak 	splx(s);
    516  1.20   mycroft }
    517  1.20   mycroft 
    518  1.22   thorpej /* ARGSUSED */
    519  1.20   mycroft int
    520  1.22   thorpej vndread(dev, uio, flags)
    521  1.20   mycroft 	dev_t dev;
    522  1.20   mycroft 	struct uio *uio;
    523  1.22   thorpej 	int flags;
    524  1.20   mycroft {
    525  1.22   thorpej 	int unit = vndunit(dev);
    526  1.22   thorpej 	struct vnd_softc *sc;
    527  1.20   mycroft 
    528  1.20   mycroft #ifdef DEBUG
    529  1.20   mycroft 	if (vnddebug & VDB_FOLLOW)
    530  1.30  christos 		printf("vndread(%x, %p)\n", dev, uio);
    531  1.20   mycroft #endif
    532  1.22   thorpej 
    533  1.22   thorpej 	if (unit >= numvnd)
    534  1.22   thorpej 		return (ENXIO);
    535  1.22   thorpej 	sc = &vnd_softc[unit];
    536  1.22   thorpej 
    537  1.22   thorpej 	if ((sc->sc_flags & VNF_INITED) == 0)
    538  1.22   thorpej 		return (ENXIO);
    539  1.22   thorpej 
    540  1.20   mycroft 	return (physio(vndstrategy, NULL, dev, B_READ, minphys, uio));
    541  1.20   mycroft }
    542  1.20   mycroft 
    543  1.22   thorpej /* ARGSUSED */
    544  1.20   mycroft int
    545  1.22   thorpej vndwrite(dev, uio, flags)
    546  1.20   mycroft 	dev_t dev;
    547  1.20   mycroft 	struct uio *uio;
    548  1.22   thorpej 	int flags;
    549  1.20   mycroft {
    550  1.22   thorpej 	int unit = vndunit(dev);
    551  1.22   thorpej 	struct vnd_softc *sc;
    552  1.20   mycroft 
    553  1.20   mycroft #ifdef DEBUG
    554  1.20   mycroft 	if (vnddebug & VDB_FOLLOW)
    555  1.30  christos 		printf("vndwrite(%x, %p)\n", dev, uio);
    556  1.20   mycroft #endif
    557  1.22   thorpej 
    558  1.22   thorpej 	if (unit >= numvnd)
    559  1.22   thorpej 		return (ENXIO);
    560  1.22   thorpej 	sc = &vnd_softc[unit];
    561  1.22   thorpej 
    562  1.22   thorpej 	if ((sc->sc_flags & VNF_INITED) == 0)
    563  1.22   thorpej 		return (ENXIO);
    564  1.22   thorpej 
    565  1.20   mycroft 	return (physio(vndstrategy, NULL, dev, B_WRITE, minphys, uio));
    566   1.1    brezak }
    567   1.1    brezak 
    568   1.1    brezak /* ARGSUSED */
    569  1.16       cgd int
    570  1.17       cgd vndioctl(dev, cmd, data, flag, p)
    571   1.1    brezak 	dev_t dev;
    572   1.1    brezak 	u_long cmd;
    573   1.1    brezak 	caddr_t data;
    574   1.1    brezak 	int flag;
    575   1.1    brezak 	struct proc *p;
    576   1.1    brezak {
    577  1.17       cgd 	int unit = vndunit(dev);
    578  1.17       cgd 	register struct vnd_softc *vnd;
    579  1.17       cgd 	struct vnd_ioctl *vio;
    580   1.1    brezak 	struct vattr vattr;
    581   1.1    brezak 	struct nameidata nd;
    582  1.32   mycroft 	int error, part, pmask;
    583   1.1    brezak 
    584   1.1    brezak #ifdef DEBUG
    585  1.17       cgd 	if (vnddebug & VDB_FOLLOW)
    586  1.30  christos 		printf("vndioctl(%x, %lx, %p, %x, %p): unit %d\n",
    587   1.5       cgd 		    dev, cmd, data, flag, p, unit);
    588   1.1    brezak #endif
    589   1.1    brezak 	error = suser(p->p_ucred, &p->p_acflag);
    590   1.1    brezak 	if (error)
    591   1.1    brezak 		return (error);
    592   1.1    brezak 	if (unit >= numvnd)
    593   1.1    brezak 		return (ENXIO);
    594   1.1    brezak 
    595  1.17       cgd 	vnd = &vnd_softc[unit];
    596  1.17       cgd 	vio = (struct vnd_ioctl *)data;
    597   1.1    brezak 	switch (cmd) {
    598   1.1    brezak 
    599  1.17       cgd 	case VNDIOCSET:
    600  1.17       cgd 		if (vnd->sc_flags & VNF_INITED)
    601  1.22   thorpej 			return (EBUSY);
    602  1.22   thorpej 
    603  1.24  christos 		if ((error = vndlock(vnd)) != 0)
    604  1.22   thorpej 			return (error);
    605  1.22   thorpej 
    606   1.1    brezak 		/*
    607   1.1    brezak 		 * Always open for read and write.
    608   1.1    brezak 		 * This is probably bogus, but it lets vn_open()
    609   1.1    brezak 		 * weed out directories, sockets, etc. so we don't
    610   1.1    brezak 		 * have to worry about them.
    611   1.1    brezak 		 */
    612  1.17       cgd 		NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, vio->vnd_file, p);
    613  1.24  christos 		if ((error = vn_open(&nd, FREAD|FWRITE, 0)) != 0) {
    614  1.22   thorpej 			vndunlock(vnd);
    615   1.1    brezak 			return(error);
    616  1.22   thorpej 		}
    617  1.24  christos 		error = VOP_GETATTR(nd.ni_vp, &vattr, p->p_ucred, p);
    618  1.24  christos 		if (error) {
    619   1.1    brezak 			VOP_UNLOCK(nd.ni_vp);
    620   1.1    brezak 			(void) vn_close(nd.ni_vp, FREAD|FWRITE, p->p_ucred, p);
    621  1.22   thorpej 			vndunlock(vnd);
    622   1.1    brezak 			return(error);
    623   1.1    brezak 		}
    624   1.1    brezak 		VOP_UNLOCK(nd.ni_vp);
    625  1.17       cgd 		vnd->sc_vp = nd.ni_vp;
    626  1.17       cgd 		vnd->sc_size = btodb(vattr.va_size);	/* note truncation */
    627  1.24  christos 		if ((error = vndsetcred(vnd, p->p_ucred)) != 0) {
    628   1.5       cgd 			(void) vn_close(nd.ni_vp, FREAD|FWRITE, p->p_ucred, p);
    629  1.22   thorpej 			vndunlock(vnd);
    630   1.1    brezak 			return(error);
    631   1.1    brezak 		}
    632  1.17       cgd 		vndthrottle(vnd, vnd->sc_vp);
    633  1.17       cgd 		vio->vnd_size = dbtob(vnd->sc_size);
    634  1.17       cgd 		vnd->sc_flags |= VNF_INITED;
    635  1.17       cgd #ifdef DEBUG
    636  1.17       cgd 		if (vnddebug & VDB_INIT)
    637  1.30  christos 			printf("vndioctl: SET vp %p size %lx\n",
    638  1.28  christos 			    vnd->sc_vp, (unsigned long) vnd->sc_size);
    639   1.1    brezak #endif
    640  1.22   thorpej 
    641  1.23   thorpej 		/* Attach the disk. */
    642  1.23   thorpej 		bzero(vnd->sc_xname, sizeof(vnd->sc_xname));	/* XXX */
    643  1.30  christos 		sprintf(vnd->sc_xname, "vnd%d", unit);		/* XXX */
    644  1.23   thorpej 		vnd->sc_dkdev.dk_name = vnd->sc_xname;
    645  1.23   thorpej 		disk_attach(&vnd->sc_dkdev);
    646  1.23   thorpej 
    647  1.22   thorpej 		vndunlock(vnd);
    648  1.22   thorpej 
    649   1.1    brezak 		break;
    650   1.1    brezak 
    651  1.17       cgd 	case VNDIOCCLR:
    652  1.17       cgd 		if ((vnd->sc_flags & VNF_INITED) == 0)
    653  1.22   thorpej 			return (ENXIO);
    654  1.22   thorpej 
    655  1.24  christos 		if ((error = vndlock(vnd)) != 0)
    656  1.22   thorpej 			return (error);
    657  1.22   thorpej 
    658  1.22   thorpej 		/*
    659  1.22   thorpej 		 * Don't unconfigure if any other partitions are open
    660  1.22   thorpej 		 * or if both the character and block flavors of this
    661  1.22   thorpej 		 * partition are open.
    662  1.22   thorpej 		 */
    663  1.22   thorpej 		part = DISKPART(dev);
    664  1.22   thorpej 		pmask = (1 << part);
    665  1.22   thorpej 		if ((vnd->sc_dkdev.dk_openmask & ~pmask) ||
    666  1.22   thorpej 		    ((vnd->sc_dkdev.dk_bopenmask & pmask) &&
    667  1.22   thorpej 		    (vnd->sc_dkdev.dk_copenmask & pmask))) {
    668  1.22   thorpej 			vndunlock(vnd);
    669  1.22   thorpej 			return (EBUSY);
    670  1.22   thorpej 		}
    671  1.22   thorpej 
    672  1.17       cgd 		vndclear(vnd);
    673   1.1    brezak #ifdef DEBUG
    674  1.17       cgd 		if (vnddebug & VDB_INIT)
    675  1.30  christos 			printf("vndioctl: CLRed\n");
    676   1.1    brezak #endif
    677  1.23   thorpej 
    678  1.23   thorpej 		/* Detatch the disk. */
    679  1.24  christos 		disk_detach(&vnd->sc_dkdev);
    680  1.22   thorpej 
    681  1.22   thorpej 		vndunlock(vnd);
    682  1.22   thorpej 
    683   1.1    brezak 		break;
    684   1.1    brezak 
    685  1.22   thorpej 	/*
    686  1.22   thorpej 	 * XXX Should support disklabels.
    687  1.22   thorpej 	 */
    688  1.22   thorpej 
    689   1.1    brezak 	default:
    690  1.12       cgd 		return(ENOTTY);
    691   1.1    brezak 	}
    692  1.22   thorpej 
    693  1.22   thorpej 	return (0);
    694   1.1    brezak }
    695   1.1    brezak 
    696   1.1    brezak /*
    697   1.1    brezak  * Duplicate the current processes' credentials.  Since we are called only
    698   1.1    brezak  * as the result of a SET ioctl and only root can do that, any future access
    699   1.1    brezak  * to this "disk" is essentially as root.  Note that credentials may change
    700   1.1    brezak  * if some other uid can write directly to the mapped file (NFS).
    701   1.1    brezak  */
    702  1.16       cgd int
    703  1.17       cgd vndsetcred(vnd, cred)
    704  1.17       cgd 	register struct vnd_softc *vnd;
    705   1.1    brezak 	struct ucred *cred;
    706   1.1    brezak {
    707   1.1    brezak 	struct uio auio;
    708   1.1    brezak 	struct iovec aiov;
    709   1.5       cgd 	char *tmpbuf;
    710   1.5       cgd 	int error;
    711   1.1    brezak 
    712  1.17       cgd 	vnd->sc_cred = crdup(cred);
    713   1.5       cgd 	tmpbuf = malloc(DEV_BSIZE, M_TEMP, M_WAITOK);
    714   1.5       cgd 
    715   1.1    brezak 	/* XXX: Horrible kludge to establish credentials for NFS */
    716   1.1    brezak 	aiov.iov_base = tmpbuf;
    717  1.17       cgd 	aiov.iov_len = min(DEV_BSIZE, dbtob(vnd->sc_size));
    718   1.1    brezak 	auio.uio_iov = &aiov;
    719   1.1    brezak 	auio.uio_iovcnt = 1;
    720   1.1    brezak 	auio.uio_offset = 0;
    721   1.1    brezak 	auio.uio_rw = UIO_READ;
    722   1.1    brezak 	auio.uio_segflg = UIO_SYSSPACE;
    723   1.1    brezak 	auio.uio_resid = aiov.iov_len;
    724  1.21   mycroft 	VOP_LOCK(vnd->sc_vp);
    725  1.17       cgd 	error = VOP_READ(vnd->sc_vp, &auio, 0, vnd->sc_cred);
    726  1.21   mycroft 	VOP_UNLOCK(vnd->sc_vp);
    727   1.5       cgd 
    728   1.5       cgd 	free(tmpbuf, M_TEMP);
    729   1.5       cgd 	return (error);
    730   1.1    brezak }
    731   1.1    brezak 
    732   1.1    brezak /*
    733   1.1    brezak  * Set maxactive based on FS type
    734   1.1    brezak  */
    735  1.16       cgd void
    736  1.17       cgd vndthrottle(vnd, vp)
    737  1.17       cgd 	register struct vnd_softc *vnd;
    738   1.1    brezak 	struct vnode *vp;
    739   1.1    brezak {
    740  1.31   thorpej #ifdef NFS
    741  1.24  christos 	extern int (**nfsv2_vnodeop_p) __P((void *));
    742   1.5       cgd 
    743   1.2    brezak 	if (vp->v_op == nfsv2_vnodeop_p)
    744  1.17       cgd 		vnd->sc_maxactive = 2;
    745   1.1    brezak 	else
    746   1.1    brezak #endif
    747  1.17       cgd 		vnd->sc_maxactive = 8;
    748   1.1    brezak 
    749  1.17       cgd 	if (vnd->sc_maxactive < 1)
    750  1.17       cgd 		vnd->sc_maxactive = 1;
    751   1.1    brezak }
    752   1.1    brezak 
    753  1.16       cgd void
    754  1.17       cgd vndshutdown()
    755   1.1    brezak {
    756  1.17       cgd 	register struct vnd_softc *vnd;
    757   1.1    brezak 
    758  1.17       cgd 	for (vnd = &vnd_softc[0]; vnd < &vnd_softc[numvnd]; vnd++)
    759  1.17       cgd 		if (vnd->sc_flags & VNF_INITED)
    760  1.17       cgd 			vndclear(vnd);
    761   1.1    brezak }
    762   1.1    brezak 
    763  1.16       cgd void
    764  1.17       cgd vndclear(vnd)
    765  1.17       cgd 	register struct vnd_softc *vnd;
    766   1.1    brezak {
    767  1.17       cgd 	register struct vnode *vp = vnd->sc_vp;
    768   1.1    brezak 	struct proc *p = curproc;		/* XXX */
    769   1.1    brezak 
    770   1.1    brezak #ifdef DEBUG
    771  1.17       cgd 	if (vnddebug & VDB_FOLLOW)
    772  1.30  christos 		printf("vndclear(%p): vp %p\n", vnd, vp);
    773   1.1    brezak #endif
    774  1.17       cgd 	vnd->sc_flags &= ~VNF_INITED;
    775   1.1    brezak 	if (vp == (struct vnode *)0)
    776  1.17       cgd 		panic("vndioctl: null vp");
    777  1.17       cgd 	(void) vn_close(vp, FREAD|FWRITE, vnd->sc_cred, p);
    778  1.17       cgd 	crfree(vnd->sc_cred);
    779  1.17       cgd 	vnd->sc_vp = (struct vnode *)0;
    780  1.17       cgd 	vnd->sc_cred = (struct ucred *)0;
    781  1.17       cgd 	vnd->sc_size = 0;
    782   1.1    brezak }
    783   1.1    brezak 
    784  1.16       cgd int
    785  1.17       cgd vndsize(dev)
    786   1.1    brezak 	dev_t dev;
    787   1.1    brezak {
    788  1.17       cgd 	int unit = vndunit(dev);
    789  1.17       cgd 	register struct vnd_softc *vnd = &vnd_softc[unit];
    790   1.1    brezak 
    791  1.17       cgd 	if (unit >= numvnd || (vnd->sc_flags & VNF_INITED) == 0)
    792   1.1    brezak 		return(-1);
    793  1.17       cgd 	return(vnd->sc_size);
    794   1.1    brezak }
    795   1.1    brezak 
    796  1.16       cgd int
    797  1.19       cgd vnddump(dev, blkno, va, size)
    798  1.16       cgd 	dev_t dev;
    799  1.19       cgd 	daddr_t blkno;
    800  1.19       cgd 	caddr_t va;
    801  1.19       cgd 	size_t size;
    802   1.1    brezak {
    803  1.16       cgd 
    804  1.19       cgd 	/* Not implemented. */
    805  1.19       cgd 	return ENXIO;
    806   1.1    brezak }
    807  1.22   thorpej 
    808  1.22   thorpej /*
    809  1.22   thorpej  * Wait interruptibly for an exclusive lock.
    810  1.22   thorpej  *
    811  1.22   thorpej  * XXX
    812  1.22   thorpej  * Several drivers do this; it should be abstracted and made MP-safe.
    813  1.22   thorpej  */
    814  1.22   thorpej static int
    815  1.22   thorpej vndlock(sc)
    816  1.22   thorpej 	struct vnd_softc *sc;
    817  1.22   thorpej {
    818  1.22   thorpej 	int error;
    819  1.22   thorpej 
    820  1.22   thorpej 	while ((sc->sc_flags & VNF_LOCKED) != 0) {
    821  1.22   thorpej 		sc->sc_flags |= VNF_WANTED;
    822  1.22   thorpej 		if ((error = tsleep(sc, PRIBIO | PCATCH, "vndlck", 0)) != 0)
    823  1.22   thorpej 			return (error);
    824  1.22   thorpej 	}
    825  1.22   thorpej 	sc->sc_flags |= VNF_LOCKED;
    826  1.22   thorpej 	return (0);
    827  1.22   thorpej }
    828  1.22   thorpej 
    829  1.22   thorpej /*
    830  1.22   thorpej  * Unlock and wake up any waiters.
    831  1.22   thorpej  */
    832  1.22   thorpej static void
    833  1.22   thorpej vndunlock(sc)
    834  1.22   thorpej 	struct vnd_softc *sc;
    835  1.22   thorpej {
    836  1.22   thorpej 
    837  1.22   thorpej 	sc->sc_flags &= ~VNF_LOCKED;
    838  1.22   thorpej 	if ((sc->sc_flags & VNF_WANTED) != 0) {
    839  1.22   thorpej 		sc->sc_flags &= ~VNF_WANTED;
    840  1.22   thorpej 		wakeup(sc);
    841  1.22   thorpej 	}
    842  1.22   thorpej }
    843