Home | History | Annotate | Line # | Download | only in rumpvfs
rumpblk.c revision 1.9
      1  1.9   pooka /*	$NetBSD: rumpblk.c,v 1.9 2009/03/18 15:39:27 pooka Exp $	*/
      2  1.1   pooka 
      3  1.1   pooka /*
      4  1.1   pooka  * Copyright (c) 2009 Antti Kantee.  All Rights Reserved.
      5  1.1   pooka  *
      6  1.1   pooka  * Development of this software was supported by the
      7  1.1   pooka  * Finnish Cultural Foundation.
      8  1.1   pooka  *
      9  1.1   pooka  * Redistribution and use in source and binary forms, with or without
     10  1.1   pooka  * modification, are permitted provided that the following conditions
     11  1.1   pooka  * are met:
     12  1.1   pooka  * 1. Redistributions of source code must retain the above copyright
     13  1.1   pooka  *    notice, this list of conditions and the following disclaimer.
     14  1.1   pooka  * 2. Redistributions in binary form must reproduce the above copyright
     15  1.1   pooka  *    notice, this list of conditions and the following disclaimer in the
     16  1.1   pooka  *    documentation and/or other materials provided with the distribution.
     17  1.1   pooka  *
     18  1.1   pooka  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
     19  1.1   pooka  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     20  1.1   pooka  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     21  1.1   pooka  * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
     22  1.1   pooka  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     23  1.1   pooka  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
     24  1.1   pooka  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     25  1.1   pooka  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     26  1.1   pooka  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     27  1.1   pooka  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     28  1.1   pooka  * SUCH DAMAGE.
     29  1.1   pooka  */
     30  1.1   pooka 
     31  1.1   pooka /*
     32  1.1   pooka  * Block device emulation.  Presents a block device interface and
     33  1.1   pooka  * uses rumpuser system calls to satisfy I/O requests.
     34  1.1   pooka  */
     35  1.1   pooka 
     36  1.1   pooka #include <sys/cdefs.h>
     37  1.9   pooka __KERNEL_RCSID(0, "$NetBSD: rumpblk.c,v 1.9 2009/03/18 15:39:27 pooka Exp $");
     38  1.1   pooka 
     39  1.1   pooka #include <sys/param.h>
     40  1.1   pooka #include <sys/buf.h>
     41  1.1   pooka #include <sys/conf.h>
     42  1.1   pooka #include <sys/disklabel.h>
     43  1.1   pooka #include <sys/fcntl.h>
     44  1.1   pooka #include <sys/kmem.h>
     45  1.1   pooka #include <sys/malloc.h>
     46  1.1   pooka #include <sys/stat.h>
     47  1.1   pooka 
     48  1.1   pooka #include <rump/rumpuser.h>
     49  1.1   pooka 
     50  1.1   pooka #include "rump_private.h"
     51  1.1   pooka #include "rump_vfs_private.h"
     52  1.1   pooka 
     53  1.1   pooka #define RUMPBLK_SIZE 16
     54  1.1   pooka static struct rblkdev {
     55  1.1   pooka 	char *rblk_path;
     56  1.1   pooka 	int rblk_fd;
     57  1.9   pooka 	uint8_t *rblk_mem;
     58  1.9   pooka 	size_t rblk_size;
     59  1.1   pooka 
     60  1.1   pooka 	struct partition *rblk_curpi;
     61  1.1   pooka 	struct partition rblk_pi;
     62  1.1   pooka 	struct disklabel rblk_dl;
     63  1.1   pooka } minors[RUMPBLK_SIZE];
     64  1.1   pooka 
     65  1.1   pooka dev_type_open(rumpblk_open);
     66  1.1   pooka dev_type_close(rumpblk_close);
     67  1.1   pooka dev_type_read(rumpblk_read);
     68  1.1   pooka dev_type_write(rumpblk_write);
     69  1.1   pooka dev_type_ioctl(rumpblk_ioctl);
     70  1.1   pooka dev_type_strategy(rumpblk_strategy);
     71  1.3   pooka dev_type_strategy(rumpblk_strategy_fail);
     72  1.1   pooka dev_type_dump(rumpblk_dump);
     73  1.1   pooka dev_type_size(rumpblk_size);
     74  1.1   pooka 
     75  1.1   pooka static const struct bdevsw rumpblk_bdevsw = {
     76  1.1   pooka 	rumpblk_open, rumpblk_close, rumpblk_strategy, rumpblk_ioctl,
     77  1.1   pooka 	nodump, nosize, D_DISK
     78  1.1   pooka };
     79  1.1   pooka 
     80  1.3   pooka static const struct bdevsw rumpblk_bdevsw_fail = {
     81  1.3   pooka 	rumpblk_open, rumpblk_close, rumpblk_strategy_fail, rumpblk_ioctl,
     82  1.3   pooka 	nodump, nosize, D_DISK
     83  1.3   pooka };
     84  1.3   pooka 
     85  1.1   pooka static const struct cdevsw rumpblk_cdevsw = {
     86  1.1   pooka 	rumpblk_open, rumpblk_close, rumpblk_read, rumpblk_write,
     87  1.1   pooka 	rumpblk_ioctl, nostop, notty, nopoll, nommap, nokqfilter, D_DISK
     88  1.1   pooka };
     89  1.1   pooka 
     90  1.3   pooka /* fail every n out of BLKFAIL_MAX */
     91  1.3   pooka #define BLKFAIL_MAX 10000
     92  1.3   pooka static int blkfail;
     93  1.3   pooka static unsigned randstate;
     94  1.1   pooka 
     95  1.1   pooka int
     96  1.8  cegger rumpblk_init(void)
     97  1.1   pooka {
     98  1.3   pooka 	char buf[64];
     99  1.1   pooka 	int rumpblk = RUMPBLK;
    100  1.3   pooka 	int error;
    101  1.3   pooka 
    102  1.3   pooka 	if (rumpuser_getenv("RUMP_BLKFAIL", buf, sizeof(buf), &error) == 0) {
    103  1.3   pooka 		blkfail = strtoul(buf, NULL, 10);
    104  1.3   pooka 		/* fail everything */
    105  1.3   pooka 		if (blkfail > BLKFAIL_MAX)
    106  1.3   pooka 			blkfail = BLKFAIL_MAX;
    107  1.3   pooka 		if (rumpuser_getenv("RUMP_BLKFAIL_SEED", buf, sizeof(buf),
    108  1.3   pooka 		    &error) == 0) {
    109  1.3   pooka 			randstate = strtoul(buf, NULL, 10);
    110  1.3   pooka 		} else {
    111  1.3   pooka 			randstate = arc4random(); /* XXX: not enough entropy */
    112  1.3   pooka 		}
    113  1.3   pooka 		printf("rumpblk: FAULT INJECTION ACTIVE!  every %d out of"
    114  1.3   pooka 		    " %d I/O will fail.  key %u\n", blkfail, BLKFAIL_MAX,
    115  1.3   pooka 		    randstate);
    116  1.3   pooka 	} else {
    117  1.3   pooka 		blkfail = 0;
    118  1.3   pooka 	}
    119  1.1   pooka 
    120  1.3   pooka 	if (blkfail) {
    121  1.3   pooka 		return devsw_attach("rumpblk", &rumpblk_bdevsw_fail, &rumpblk,
    122  1.3   pooka 		    &rumpblk_cdevsw, &rumpblk);
    123  1.3   pooka 	} else {
    124  1.3   pooka 		return devsw_attach("rumpblk", &rumpblk_bdevsw, &rumpblk,
    125  1.3   pooka 		    &rumpblk_cdevsw, &rumpblk);
    126  1.3   pooka 	}
    127  1.1   pooka }
    128  1.1   pooka 
    129  1.1   pooka int
    130  1.1   pooka rumpblk_register(const char *path)
    131  1.1   pooka {
    132  1.1   pooka 	size_t len;
    133  1.1   pooka 	int i;
    134  1.1   pooka 
    135  1.1   pooka 	for (i = 0; i < RUMPBLK_SIZE; i++)
    136  1.1   pooka 		if (minors[i].rblk_path && strcmp(minors[i].rblk_path, path)==0)
    137  1.1   pooka 			return i;
    138  1.1   pooka 
    139  1.1   pooka 	for (i = 0; i < RUMPBLK_SIZE; i++)
    140  1.1   pooka 		if (minors[i].rblk_path == NULL)
    141  1.1   pooka 			break;
    142  1.1   pooka 	if (i == RUMPBLK_SIZE)
    143  1.1   pooka 		return -1;
    144  1.1   pooka 
    145  1.1   pooka 	len = strlen(path);
    146  1.1   pooka 	minors[i].rblk_path = malloc(len+1, M_TEMP, M_WAITOK);
    147  1.1   pooka 	strcpy(minors[i].rblk_path, path);
    148  1.1   pooka 	minors[i].rblk_fd = -1;
    149  1.1   pooka 	return i;
    150  1.1   pooka }
    151  1.1   pooka 
    152  1.1   pooka int
    153  1.1   pooka rumpblk_open(dev_t dev, int flag, int fmt, struct lwp *l)
    154  1.1   pooka {
    155  1.1   pooka 	struct rblkdev *rblk = &minors[minor(dev)];
    156  1.9   pooka 	uint8_t *mem = NULL;
    157  1.5   pooka 	uint64_t fsize;
    158  1.9   pooka 	int ft, dummy;
    159  1.1   pooka 	int error, fd;
    160  1.1   pooka 
    161  1.1   pooka 	KASSERT(rblk->rblk_fd == -1);
    162  1.1   pooka 	fd = rumpuser_open(rblk->rblk_path, OFLAGS(flag), &error);
    163  1.1   pooka 	if (error)
    164  1.1   pooka 		return error;
    165  1.1   pooka 
    166  1.9   pooka 	if (rumpuser_getfileinfo(rblk->rblk_path, &fsize, &ft, &error) == -1) {
    167  1.9   pooka 		rumpuser_close(fd, &dummy);
    168  1.9   pooka 		return error;
    169  1.9   pooka 	}
    170  1.9   pooka 
    171  1.9   pooka 	if (ft == RUMPUSER_FT_REG) {
    172  1.1   pooka 		/*
    173  1.9   pooka 		 * Try to mmap the file if it's size is max. half of
    174  1.9   pooka 		 * the address space.  If mmap fails due to e.g. limits,
    175  1.9   pooka 		 * we fall back to the read/write path.  This test is only
    176  1.9   pooka 		 * to prevent size_t vs. off_t wraparounds.
    177  1.1   pooka 		 */
    178  1.9   pooka 		if (fsize < 1<<(sizeof(void *)*8 - 1)) {
    179  1.9   pooka 			int mmflags;
    180  1.9   pooka 
    181  1.9   pooka 			mmflags = 0;
    182  1.9   pooka 			if (flag & FREAD)
    183  1.9   pooka 				mmflags |= RUMPUSER_FILEMMAP_READ;
    184  1.9   pooka 			if (flag & FWRITE)
    185  1.9   pooka 				mmflags |= RUMPUSER_FILEMMAP_WRITE;
    186  1.9   pooka 			mem = rumpuser_filemmap(fd, 0, fsize, mmflags, &error);
    187  1.9   pooka 		}
    188  1.9   pooka 
    189  1.1   pooka 		memset(&rblk->rblk_dl, 0, sizeof(rblk->rblk_dl));
    190  1.1   pooka 
    191  1.9   pooka 		rblk->rblk_size = fsize;
    192  1.9   pooka 		rblk->rblk_pi.p_size = fsize >> DEV_BSHIFT;
    193  1.9   pooka 		rblk->rblk_dl.d_secsize = DEV_BSIZE;
    194  1.9   pooka 		rblk->rblk_curpi = &rblk->rblk_pi;
    195  1.9   pooka 	} else {
    196  1.9   pooka 		if (rumpuser_ioctl(fd,DIOCGDINFO, &rblk->rblk_dl, &error)!=-1) {
    197  1.1   pooka 			rumpuser_close(fd, &dummy);
    198  1.1   pooka 			return error;
    199  1.1   pooka 		}
    200  1.9   pooka 
    201  1.9   pooka 		rblk->rblk_curpi = &rblk->rblk_dl.d_partitions[0];
    202  1.1   pooka 	}
    203  1.1   pooka 	rblk->rblk_fd = fd;
    204  1.9   pooka 	rblk->rblk_mem = mem;
    205  1.1   pooka 
    206  1.1   pooka 	return 0;
    207  1.1   pooka }
    208  1.1   pooka 
    209  1.1   pooka int
    210  1.1   pooka rumpblk_close(dev_t dev, int flag, int fmt, struct lwp *l)
    211  1.1   pooka {
    212  1.1   pooka 	struct rblkdev *rblk = &minors[minor(dev)];
    213  1.1   pooka 	int dummy;
    214  1.1   pooka 
    215  1.9   pooka 	if (rblk->rblk_mem) {
    216  1.9   pooka 		KASSERT(rblk->rblk_size);
    217  1.9   pooka 		rumpuser_memsync(rblk->rblk_mem, rblk->rblk_size, &dummy);
    218  1.9   pooka 		rumpuser_unmap(rblk->rblk_mem, rblk->rblk_size);
    219  1.9   pooka 		rblk->rblk_mem = NULL;
    220  1.9   pooka 	}
    221  1.1   pooka 	rumpuser_close(rblk->rblk_fd, &dummy);
    222  1.1   pooka 	rblk->rblk_fd = -1;
    223  1.1   pooka 
    224  1.1   pooka 	return 0;
    225  1.1   pooka }
    226  1.1   pooka 
    227  1.1   pooka int
    228  1.1   pooka rumpblk_ioctl(dev_t dev, u_long xfer, void *addr, int flag, struct lwp *l)
    229  1.1   pooka {
    230  1.1   pooka 	struct rblkdev *rblk = &minors[minor(dev)];
    231  1.1   pooka 	int rv, error;
    232  1.1   pooka 
    233  1.1   pooka 	if (xfer == DIOCGPART) {
    234  1.1   pooka 		struct partinfo *pi = (struct partinfo *)addr;
    235  1.1   pooka 
    236  1.1   pooka 		pi->part = rblk->rblk_curpi;
    237  1.1   pooka 		pi->disklab = &rblk->rblk_dl;
    238  1.1   pooka 
    239  1.1   pooka 		return 0;
    240  1.1   pooka 	}
    241  1.1   pooka 
    242  1.1   pooka 	rv = rumpuser_ioctl(rblk->rblk_fd, xfer, addr, &error);
    243  1.1   pooka 	if (rv == -1)
    244  1.1   pooka 		return error;
    245  1.1   pooka 
    246  1.1   pooka 	return 0;
    247  1.1   pooka }
    248  1.1   pooka 
    249  1.1   pooka int
    250  1.1   pooka rumpblk_read(dev_t dev, struct uio *uio, int flags)
    251  1.1   pooka {
    252  1.1   pooka 
    253  1.1   pooka 	panic("%s: unimplemented", __func__);
    254  1.1   pooka }
    255  1.1   pooka 
    256  1.1   pooka int
    257  1.1   pooka rumpblk_write(dev_t dev, struct uio *uio, int flags)
    258  1.1   pooka {
    259  1.1   pooka 
    260  1.1   pooka 	panic("%s: unimplemented", __func__);
    261  1.1   pooka }
    262  1.1   pooka 
    263  1.3   pooka static void
    264  1.3   pooka dostrategy(struct buf *bp)
    265  1.1   pooka {
    266  1.1   pooka 	struct rblkdev *rblk = &minors[minor(bp->b_dev)];
    267  1.1   pooka 	off_t off;
    268  1.9   pooka 	int async, error;
    269  1.1   pooka 
    270  1.1   pooka 	off = bp->b_blkno << DEV_BSHIFT;
    271  1.9   pooka 	async = bp->b_flags & B_ASYNC;
    272  1.1   pooka 	DPRINTF(("rumpblk_strategy: 0x%x bytes %s off 0x%" PRIx64
    273  1.1   pooka 	    " (0x%" PRIx64 " - 0x%" PRIx64")\n",
    274  1.1   pooka 	    bp->b_bcount, BUF_ISREAD(bp) "READ" : "WRITE",
    275  1.1   pooka 	    off, off, (off + bp->b_bcount)));
    276  1.1   pooka 
    277  1.9   pooka 	/* mem optimization?  handle here and return */
    278  1.9   pooka 	if (rblk->rblk_mem) {
    279  1.9   pooka 		uint8_t *ioaddr = rblk->rblk_mem + off;
    280  1.9   pooka 		if (BUF_ISREAD(bp)) {
    281  1.9   pooka 			memcpy(bp->b_data, ioaddr, bp->b_bcount);
    282  1.9   pooka 		} else {
    283  1.9   pooka 			memcpy(ioaddr, bp->b_data, bp->b_bcount);
    284  1.9   pooka 		}
    285  1.9   pooka 
    286  1.9   pooka 		/* synchronous write, sync necessary bits back to disk */
    287  1.9   pooka 		if (BUF_ISWRITE(bp) && !async) {
    288  1.9   pooka 			rumpuser_memsync(ioaddr, bp->b_bcount, &error);
    289  1.9   pooka 		}
    290  1.9   pooka 		rump_biodone(bp, bp->b_bcount, 0);
    291  1.9   pooka 
    292  1.9   pooka 		return;
    293  1.9   pooka 	}
    294  1.9   pooka 
    295  1.1   pooka 	/*
    296  1.1   pooka 	 * Do I/O.  We have different paths for async and sync I/O.
    297  1.1   pooka 	 * Async I/O is done by passing a request to rumpuser where
    298  1.1   pooka 	 * it is executed.  The rumpuser routine then calls
    299  1.1   pooka 	 * biodone() to signal any waiters in the kernel.  I/O's are
    300  1.1   pooka 	 * executed in series.  Technically executing them in parallel
    301  1.1   pooka 	 * would produce better results, but then we'd need either
    302  1.1   pooka 	 * more threads or posix aio.  Maybe worth investigating
    303  1.1   pooka 	 * this later.
    304  1.1   pooka 	 *
    305  1.1   pooka 	 * Synchronous I/O is done directly in the context mainly to
    306  1.1   pooka 	 * avoid unnecessary scheduling with the I/O thread.
    307  1.1   pooka 	 */
    308  1.1   pooka 	if (async && rump_threads) {
    309  1.1   pooka 		struct rumpuser_aio *rua;
    310  1.1   pooka 
    311  1.1   pooka 		rumpuser_mutex_enter(&rumpuser_aio_mtx);
    312  1.1   pooka 		/*
    313  1.1   pooka 		 * Check if our buffer is full.  Doing it this way
    314  1.1   pooka 		 * throttles the I/O a bit if we have a massive
    315  1.1   pooka 		 * async I/O burst.
    316  1.1   pooka 		 */
    317  1.1   pooka 		if ((rumpuser_aio_head+1) % N_AIOS == rumpuser_aio_tail) {
    318  1.1   pooka 			rumpuser_mutex_exit(&rumpuser_aio_mtx);
    319  1.1   pooka 			goto syncfallback;
    320  1.1   pooka 		}
    321  1.1   pooka 
    322  1.2   pooka 		rua = &rumpuser_aios[rumpuser_aio_head];
    323  1.2   pooka 		KASSERT(rua->rua_bp == NULL);
    324  1.2   pooka 		rua->rua_fd = rblk->rblk_fd;
    325  1.2   pooka 		rua->rua_data = bp->b_data;
    326  1.2   pooka 		rua->rua_dlen = bp->b_bcount;
    327  1.2   pooka 		rua->rua_off = off;
    328  1.2   pooka 		rua->rua_bp = bp;
    329  1.2   pooka 		rua->rua_op = BUF_ISREAD(bp);
    330  1.2   pooka 
    331  1.1   pooka 		/* insert into queue & signal */
    332  1.6   pooka 		rumpuser_aio_head = (rumpuser_aio_head+1) % N_AIOS;
    333  1.1   pooka 		rumpuser_cv_signal(&rumpuser_aio_cv);
    334  1.1   pooka 		rumpuser_mutex_exit(&rumpuser_aio_mtx);
    335  1.1   pooka 	} else {
    336  1.1   pooka  syncfallback:
    337  1.1   pooka 		if (BUF_ISREAD(bp)) {
    338  1.1   pooka 			rumpuser_read_bio(rblk->rblk_fd, bp->b_data,
    339  1.1   pooka 			    bp->b_bcount, off, rump_biodone, bp);
    340  1.1   pooka 		} else {
    341  1.1   pooka 			rumpuser_write_bio(rblk->rblk_fd, bp->b_data,
    342  1.1   pooka 			    bp->b_bcount, off, rump_biodone, bp);
    343  1.1   pooka 		}
    344  1.1   pooka 		if (!async) {
    345  1.1   pooka 			if (BUF_ISWRITE(bp))
    346  1.1   pooka 				rumpuser_fsync(rblk->rblk_fd, &error);
    347  1.1   pooka 		}
    348  1.1   pooka 	}
    349  1.1   pooka }
    350  1.3   pooka 
    351  1.3   pooka void
    352  1.3   pooka rumpblk_strategy(struct buf *bp)
    353  1.3   pooka {
    354  1.3   pooka 
    355  1.3   pooka 	dostrategy(bp);
    356  1.3   pooka }
    357  1.3   pooka 
    358  1.3   pooka /*
    359  1.4   pooka  * Simple random number generator.  This is private so that we can
    360  1.4   pooka  * very repeatedly control which blocks will fail.
    361  1.4   pooka  *
    362  1.3   pooka  * <mlelstv> pooka, rand()
    363  1.3   pooka  * <mlelstv> [paste]
    364  1.3   pooka  */
    365  1.3   pooka static unsigned
    366  1.3   pooka gimmerand(void)
    367  1.3   pooka {
    368  1.3   pooka 
    369  1.3   pooka 	return (randstate = randstate * 1103515245 + 12345) % (0x80000000L);
    370  1.3   pooka }
    371  1.3   pooka 
    372  1.3   pooka /*
    373  1.3   pooka  * Block device with very simple fault injection.  Fails every
    374  1.3   pooka  * n out of BLKFAIL_MAX I/O with EIO.  n is determined by the env
    375  1.3   pooka  * variable RUMP_BLKFAIL.
    376  1.3   pooka  */
    377  1.3   pooka void
    378  1.3   pooka rumpblk_strategy_fail(struct buf *bp)
    379  1.3   pooka {
    380  1.3   pooka 
    381  1.3   pooka 	if (gimmerand() % BLKFAIL_MAX >= blkfail) {
    382  1.3   pooka 		dostrategy(bp);
    383  1.3   pooka 	} else {
    384  1.3   pooka 		printf("block fault injection: failing I/O on block %lld\n",
    385  1.3   pooka 		    (long long)bp->b_blkno);
    386  1.3   pooka 		bp->b_error = EIO;
    387  1.3   pooka 		biodone(bp);
    388  1.3   pooka 	}
    389  1.3   pooka }
    390