Home | History | Annotate | Line # | Download | only in rumpvfs
rumpblk.c revision 1.5
      1 /*	$NetBSD: rumpblk.c,v 1.5 2009/02/26 00:37:48 pooka Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 2009 Antti Kantee.  All Rights Reserved.
      5  *
      6  * Development of this software was supported by the
      7  * Finnish Cultural Foundation.
      8  *
      9  * Redistribution and use in source and binary forms, with or without
     10  * modification, are permitted provided that the following conditions
     11  * are met:
     12  * 1. Redistributions of source code must retain the above copyright
     13  *    notice, this list of conditions and the following disclaimer.
     14  * 2. Redistributions in binary form must reproduce the above copyright
     15  *    notice, this list of conditions and the following disclaimer in the
     16  *    documentation and/or other materials provided with the distribution.
     17  *
     18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
     19  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     20  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     21  * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
     22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
     24  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     28  * SUCH DAMAGE.
     29  */
     30 
     31 /*
     32  * Block device emulation.  Presents a block device interface and
     33  * uses rumpuser system calls to satisfy I/O requests.
     34  */
     35 
     36 #include <sys/cdefs.h>
     37 __KERNEL_RCSID(0, "$NetBSD: rumpblk.c,v 1.5 2009/02/26 00:37:48 pooka Exp $");
     38 
     39 #include <sys/param.h>
     40 #include <sys/buf.h>
     41 #include <sys/conf.h>
     42 #include <sys/disklabel.h>
     43 #include <sys/fcntl.h>
     44 #include <sys/kmem.h>
     45 #include <sys/malloc.h>
     46 #include <sys/stat.h>
     47 
     48 #include <rump/rumpuser.h>
     49 
     50 #include "rump_private.h"
     51 #include "rump_vfs_private.h"
     52 
     53 #define RUMPBLK_SIZE 16
     54 static struct rblkdev {
     55 	char *rblk_path;
     56 	int rblk_fd;
     57 
     58 	struct partition *rblk_curpi;
     59 	struct partition rblk_pi;
     60 	struct disklabel rblk_dl;
     61 } minors[RUMPBLK_SIZE];
     62 
     63 dev_type_open(rumpblk_open);
     64 dev_type_close(rumpblk_close);
     65 dev_type_read(rumpblk_read);
     66 dev_type_write(rumpblk_write);
     67 dev_type_ioctl(rumpblk_ioctl);
     68 dev_type_strategy(rumpblk_strategy);
     69 dev_type_strategy(rumpblk_strategy_fail);
     70 dev_type_dump(rumpblk_dump);
     71 dev_type_size(rumpblk_size);
     72 
     73 static const struct bdevsw rumpblk_bdevsw = {
     74 	rumpblk_open, rumpblk_close, rumpblk_strategy, rumpblk_ioctl,
     75 	nodump, nosize, D_DISK
     76 };
     77 
     78 static const struct bdevsw rumpblk_bdevsw_fail = {
     79 	rumpblk_open, rumpblk_close, rumpblk_strategy_fail, rumpblk_ioctl,
     80 	nodump, nosize, D_DISK
     81 };
     82 
     83 static const struct cdevsw rumpblk_cdevsw = {
     84 	rumpblk_open, rumpblk_close, rumpblk_read, rumpblk_write,
     85 	rumpblk_ioctl, nostop, notty, nopoll, nommap, nokqfilter, D_DISK
     86 };
     87 
     88 /* fail every n out of BLKFAIL_MAX */
     89 #define BLKFAIL_MAX 10000
     90 static int blkfail;
     91 static unsigned randstate;
     92 
     93 int
     94 rumpblk_init()
     95 {
     96 	char buf[64];
     97 	int rumpblk = RUMPBLK;
     98 	int error;
     99 
    100 	if (rumpuser_getenv("RUMP_BLKFAIL", buf, sizeof(buf), &error) == 0) {
    101 		blkfail = strtoul(buf, NULL, 10);
    102 		/* fail everything */
    103 		if (blkfail > BLKFAIL_MAX)
    104 			blkfail = BLKFAIL_MAX;
    105 		if (rumpuser_getenv("RUMP_BLKFAIL_SEED", buf, sizeof(buf),
    106 		    &error) == 0) {
    107 			randstate = strtoul(buf, NULL, 10);
    108 		} else {
    109 			randstate = arc4random(); /* XXX: not enough entropy */
    110 		}
    111 		printf("rumpblk: FAULT INJECTION ACTIVE!  every %d out of"
    112 		    " %d I/O will fail.  key %u\n", blkfail, BLKFAIL_MAX,
    113 		    randstate);
    114 	} else {
    115 		blkfail = 0;
    116 	}
    117 
    118 	if (blkfail) {
    119 		return devsw_attach("rumpblk", &rumpblk_bdevsw_fail, &rumpblk,
    120 		    &rumpblk_cdevsw, &rumpblk);
    121 	} else {
    122 		return devsw_attach("rumpblk", &rumpblk_bdevsw, &rumpblk,
    123 		    &rumpblk_cdevsw, &rumpblk);
    124 	}
    125 }
    126 
    127 int
    128 rumpblk_register(const char *path)
    129 {
    130 	size_t len;
    131 	int i;
    132 
    133 	for (i = 0; i < RUMPBLK_SIZE; i++)
    134 		if (minors[i].rblk_path && strcmp(minors[i].rblk_path, path)==0)
    135 			return i;
    136 
    137 	for (i = 0; i < RUMPBLK_SIZE; i++)
    138 		if (minors[i].rblk_path == NULL)
    139 			break;
    140 	if (i == RUMPBLK_SIZE)
    141 		return -1;
    142 
    143 	len = strlen(path);
    144 	minors[i].rblk_path = malloc(len+1, M_TEMP, M_WAITOK);
    145 	strcpy(minors[i].rblk_path, path);
    146 	minors[i].rblk_fd = -1;
    147 	return i;
    148 }
    149 
    150 int
    151 rumpblk_open(dev_t dev, int flag, int fmt, struct lwp *l)
    152 {
    153 	struct rblkdev *rblk = &minors[minor(dev)];
    154 	uint64_t fsize;
    155 	int ft;
    156 	int error, fd;
    157 
    158 	KASSERT(rblk->rblk_fd == -1);
    159 	fd = rumpuser_open(rblk->rblk_path, OFLAGS(flag), &error);
    160 	if (error)
    161 		return error;
    162 
    163 	/*
    164 	 * Setup partition info.  First try the usual. */
    165 	if (rumpuser_ioctl(fd, DIOCGDINFO, &rblk->rblk_dl, &error) != -1) {
    166 		/*
    167 		 * If that works, use it.  We still need to guess
    168 		 * which partition we are on.
    169 		 */
    170 		rblk->rblk_curpi = &rblk->rblk_dl.d_partitions[0];
    171 	} else {
    172 		/*
    173 		 * If that didn't work, assume were a regular file
    174 		 * and just try to fake the info the best we can.
    175 		 */
    176 		memset(&rblk->rblk_dl, 0, sizeof(rblk->rblk_dl));
    177 
    178 		if (rumpuser_getfileinfo(rblk->rblk_path, &fsize,
    179 		    &ft, &error) == -1) {
    180 			int dummy;
    181 
    182 			rumpuser_close(fd, &dummy);
    183 			return error;
    184 		}
    185 		rblk->rblk_pi.p_size = fsize >> DEV_BSHIFT;
    186 		rblk->rblk_dl.d_secsize = DEV_BSIZE;
    187 		rblk->rblk_curpi = &rblk->rblk_pi;
    188 	}
    189 	rblk->rblk_fd = fd;
    190 
    191 	return 0;
    192 }
    193 
    194 int
    195 rumpblk_close(dev_t dev, int flag, int fmt, struct lwp *l)
    196 {
    197 	struct rblkdev *rblk = &minors[minor(dev)];
    198 	int dummy;
    199 
    200 	rumpuser_close(rblk->rblk_fd, &dummy);
    201 	rblk->rblk_fd = -1;
    202 
    203 	return 0;
    204 }
    205 
    206 int
    207 rumpblk_ioctl(dev_t dev, u_long xfer, void *addr, int flag, struct lwp *l)
    208 {
    209 	struct rblkdev *rblk = &minors[minor(dev)];
    210 	int rv, error;
    211 
    212 	if (xfer == DIOCGPART) {
    213 		struct partinfo *pi = (struct partinfo *)addr;
    214 
    215 		pi->part = rblk->rblk_curpi;
    216 		pi->disklab = &rblk->rblk_dl;
    217 
    218 		return 0;
    219 	}
    220 
    221 	rv = rumpuser_ioctl(rblk->rblk_fd, xfer, addr, &error);
    222 	if (rv == -1)
    223 		return error;
    224 
    225 	return 0;
    226 }
    227 
    228 int
    229 rumpblk_read(dev_t dev, struct uio *uio, int flags)
    230 {
    231 
    232 	panic("%s: unimplemented", __func__);
    233 }
    234 
    235 int
    236 rumpblk_write(dev_t dev, struct uio *uio, int flags)
    237 {
    238 
    239 	panic("%s: unimplemented", __func__);
    240 }
    241 
    242 static void
    243 dostrategy(struct buf *bp)
    244 {
    245 	struct rblkdev *rblk = &minors[minor(bp->b_dev)];
    246 	off_t off;
    247 	int async;
    248 
    249 	off = bp->b_blkno << DEV_BSHIFT;
    250 	DPRINTF(("rumpblk_strategy: 0x%x bytes %s off 0x%" PRIx64
    251 	    " (0x%" PRIx64 " - 0x%" PRIx64")\n",
    252 	    bp->b_bcount, BUF_ISREAD(bp) "READ" : "WRITE",
    253 	    off, off, (off + bp->b_bcount)));
    254 
    255 	/*
    256 	 * Do I/O.  We have different paths for async and sync I/O.
    257 	 * Async I/O is done by passing a request to rumpuser where
    258 	 * it is executed.  The rumpuser routine then calls
    259 	 * biodone() to signal any waiters in the kernel.  I/O's are
    260 	 * executed in series.  Technically executing them in parallel
    261 	 * would produce better results, but then we'd need either
    262 	 * more threads or posix aio.  Maybe worth investigating
    263 	 * this later.
    264 	 *
    265 	 * Synchronous I/O is done directly in the context mainly to
    266 	 * avoid unnecessary scheduling with the I/O thread.
    267 	 */
    268 	async = bp->b_flags & B_ASYNC;
    269 	if (async && rump_threads) {
    270 		struct rumpuser_aio *rua;
    271 
    272 		rumpuser_mutex_enter(&rumpuser_aio_mtx);
    273 		/*
    274 		 * Check if our buffer is full.  Doing it this way
    275 		 * throttles the I/O a bit if we have a massive
    276 		 * async I/O burst.
    277 		 *
    278 		 * XXX: this actually leads to deadlocks with spl()
    279 		 * (caller maybe be at splbio() legally for async I/O),
    280 		 * so for now set N_AIOS high and FIXXXME some day.
    281 		 */
    282 		if ((rumpuser_aio_head+1) % N_AIOS == rumpuser_aio_tail) {
    283 			rumpuser_mutex_exit(&rumpuser_aio_mtx);
    284 			goto syncfallback;
    285 		}
    286 
    287 		rua = &rumpuser_aios[rumpuser_aio_head];
    288 		KASSERT(rua->rua_bp == NULL);
    289 		rua->rua_fd = rblk->rblk_fd;
    290 		rua->rua_data = bp->b_data;
    291 		rua->rua_dlen = bp->b_bcount;
    292 		rua->rua_off = off;
    293 		rua->rua_bp = bp;
    294 		rua->rua_op = BUF_ISREAD(bp);
    295 
    296 		/* insert into queue & signal */
    297 		rumpuser_aio_head = (rumpuser_aio_head+1) % (N_AIOS-1);
    298 		rumpuser_cv_signal(&rumpuser_aio_cv);
    299 		rumpuser_mutex_exit(&rumpuser_aio_mtx);
    300 	} else {
    301  syncfallback:
    302 		if (BUF_ISREAD(bp)) {
    303 			rumpuser_read_bio(rblk->rblk_fd, bp->b_data,
    304 			    bp->b_bcount, off, rump_biodone, bp);
    305 		} else {
    306 			rumpuser_write_bio(rblk->rblk_fd, bp->b_data,
    307 			    bp->b_bcount, off, rump_biodone, bp);
    308 		}
    309 		if (!async) {
    310 			int error;
    311 
    312 			if (BUF_ISWRITE(bp))
    313 				rumpuser_fsync(rblk->rblk_fd, &error);
    314 		}
    315 	}
    316 }
    317 
    318 void
    319 rumpblk_strategy(struct buf *bp)
    320 {
    321 
    322 	dostrategy(bp);
    323 }
    324 
    325 /*
    326  * Simple random number generator.  This is private so that we can
    327  * very repeatedly control which blocks will fail.
    328  *
    329  * <mlelstv> pooka, rand()
    330  * <mlelstv> [paste]
    331  */
    332 static unsigned
    333 gimmerand(void)
    334 {
    335 
    336 	return (randstate = randstate * 1103515245 + 12345) % (0x80000000L);
    337 }
    338 
    339 /*
    340  * Block device with very simple fault injection.  Fails every
    341  * n out of BLKFAIL_MAX I/O with EIO.  n is determined by the env
    342  * variable RUMP_BLKFAIL.
    343  */
    344 void
    345 rumpblk_strategy_fail(struct buf *bp)
    346 {
    347 
    348 	if (gimmerand() % BLKFAIL_MAX >= blkfail) {
    349 		dostrategy(bp);
    350 	} else {
    351 		printf("block fault injection: failing I/O on block %lld\n",
    352 		    (long long)bp->b_blkno);
    353 		bp->b_error = EIO;
    354 		biodone(bp);
    355 	}
    356 }
    357