Home | History | Annotate | Line # | Download | only in rumpvfs
rumpblk.c revision 1.2
      1 /*	$NetBSD: rumpblk.c,v 1.2 2009/01/27 09:14:01 pooka Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 2009 Antti Kantee.  All Rights Reserved.
      5  *
      6  * Development of this software was supported by the
      7  * Finnish Cultural Foundation.
      8  *
      9  * Redistribution and use in source and binary forms, with or without
     10  * modification, are permitted provided that the following conditions
     11  * are met:
     12  * 1. Redistributions of source code must retain the above copyright
     13  *    notice, this list of conditions and the following disclaimer.
     14  * 2. Redistributions in binary form must reproduce the above copyright
     15  *    notice, this list of conditions and the following disclaimer in the
     16  *    documentation and/or other materials provided with the distribution.
     17  *
     18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
     19  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     20  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     21  * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
     22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
     24  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     28  * SUCH DAMAGE.
     29  */
     30 
     31 /*
     32  * Block device emulation.  Presents a block device interface and
     33  * uses rumpuser system calls to satisfy I/O requests.
     34  */
     35 
     36 #include <sys/cdefs.h>
     37 __KERNEL_RCSID(0, "$NetBSD: rumpblk.c,v 1.2 2009/01/27 09:14:01 pooka Exp $");
     38 
     39 #include <sys/param.h>
     40 #include <sys/buf.h>
     41 #include <sys/conf.h>
     42 #include <sys/disklabel.h>
     43 #include <sys/fcntl.h>
     44 #include <sys/kmem.h>
     45 #include <sys/malloc.h>
     46 #include <sys/stat.h>
     47 
     48 #include <rump/rumpuser.h>
     49 
     50 #include "rump_private.h"
     51 #include "rump_vfs_private.h"
     52 
     53 #define RUMPBLK_SIZE 16
     54 static struct rblkdev {
     55 	char *rblk_path;
     56 	int rblk_fd;
     57 
     58 	struct partition *rblk_curpi;
     59 	struct partition rblk_pi;
     60 	struct disklabel rblk_dl;
     61 } minors[RUMPBLK_SIZE];
     62 
     63 dev_type_open(rumpblk_open);
     64 dev_type_close(rumpblk_close);
     65 dev_type_read(rumpblk_read);
     66 dev_type_write(rumpblk_write);
     67 dev_type_ioctl(rumpblk_ioctl);
     68 dev_type_strategy(rumpblk_strategy);
     69 dev_type_dump(rumpblk_dump);
     70 dev_type_size(rumpblk_size);
     71 
     72 static const struct bdevsw rumpblk_bdevsw = {
     73 	rumpblk_open, rumpblk_close, rumpblk_strategy, rumpblk_ioctl,
     74 	nodump, nosize, D_DISK
     75 };
     76 
     77 static const struct cdevsw rumpblk_cdevsw = {
     78 	rumpblk_open, rumpblk_close, rumpblk_read, rumpblk_write,
     79 	rumpblk_ioctl, nostop, notty, nopoll, nommap, nokqfilter, D_DISK
     80 };
     81 
     82 /* XXX: not mpsafe */
     83 
     84 int
     85 rumpblk_init()
     86 {
     87 	int rumpblk = RUMPBLK;
     88 
     89 	return devsw_attach("rumpblk", &rumpblk_bdevsw, &rumpblk,
     90 	    &rumpblk_cdevsw, &rumpblk);
     91 }
     92 
     93 int
     94 rumpblk_register(const char *path)
     95 {
     96 	size_t len;
     97 	int i;
     98 
     99 	for (i = 0; i < RUMPBLK_SIZE; i++)
    100 		if (minors[i].rblk_path && strcmp(minors[i].rblk_path, path)==0)
    101 			return i;
    102 
    103 	for (i = 0; i < RUMPBLK_SIZE; i++)
    104 		if (minors[i].rblk_path == NULL)
    105 			break;
    106 	if (i == RUMPBLK_SIZE)
    107 		return -1;
    108 
    109 	len = strlen(path);
    110 	minors[i].rblk_path = malloc(len+1, M_TEMP, M_WAITOK);
    111 	strcpy(minors[i].rblk_path, path);
    112 	minors[i].rblk_fd = -1;
    113 	return i;
    114 }
    115 
    116 int
    117 rumpblk_open(dev_t dev, int flag, int fmt, struct lwp *l)
    118 {
    119 	struct rblkdev *rblk = &minors[minor(dev)];
    120 	struct stat sb;
    121 	int error, fd;
    122 
    123 	KASSERT(rblk->rblk_fd == -1);
    124 	fd = rumpuser_open(rblk->rblk_path, OFLAGS(flag), &error);
    125 	if (error)
    126 		return error;
    127 
    128 	/*
    129 	 * Setup partition info.  First try the usual. */
    130 	if (rumpuser_ioctl(fd, DIOCGDINFO, &rblk->rblk_dl, &error) != -1) {
    131 		/*
    132 		 * If that works, use it.  We still need to guess
    133 		 * which partition we are on.
    134 		 */
    135 		rblk->rblk_curpi = &rblk->rblk_dl.d_partitions[0];
    136 	} else {
    137 		/*
    138 		 * If that didn't work, assume were a regular file
    139 		 * and just try to fake the info the best we can.
    140 		 */
    141 		memset(&rblk->rblk_dl, 0, sizeof(rblk->rblk_dl));
    142 
    143 		if (rumpuser_stat(rblk->rblk_path, &sb, &error) == -1) {
    144 			int dummy;
    145 
    146 			rumpuser_close(fd, &dummy);
    147 			return error;
    148 		}
    149 		rblk->rblk_pi.p_size = sb.st_size >> DEV_BSHIFT;
    150 		rblk->rblk_dl.d_secsize = DEV_BSIZE;
    151 		rblk->rblk_curpi = &rblk->rblk_pi;
    152 	}
    153 	rblk->rblk_fd = fd;
    154 
    155 	return 0;
    156 }
    157 
    158 int
    159 rumpblk_close(dev_t dev, int flag, int fmt, struct lwp *l)
    160 {
    161 	struct rblkdev *rblk = &minors[minor(dev)];
    162 	int dummy;
    163 
    164 	rumpuser_close(rblk->rblk_fd, &dummy);
    165 	rblk->rblk_fd = -1;
    166 
    167 	return 0;
    168 }
    169 
    170 int
    171 rumpblk_ioctl(dev_t dev, u_long xfer, void *addr, int flag, struct lwp *l)
    172 {
    173 	struct rblkdev *rblk = &minors[minor(dev)];
    174 	int rv, error;
    175 
    176 	if (xfer == DIOCGPART) {
    177 		struct partinfo *pi = (struct partinfo *)addr;
    178 
    179 		pi->part = rblk->rblk_curpi;
    180 		pi->disklab = &rblk->rblk_dl;
    181 
    182 		return 0;
    183 	}
    184 
    185 	rv = rumpuser_ioctl(rblk->rblk_fd, xfer, addr, &error);
    186 	if (rv == -1)
    187 		return error;
    188 
    189 	return 0;
    190 }
    191 
    192 int
    193 rumpblk_read(dev_t dev, struct uio *uio, int flags)
    194 {
    195 
    196 	panic("%s: unimplemented", __func__);
    197 }
    198 
    199 int
    200 rumpblk_write(dev_t dev, struct uio *uio, int flags)
    201 {
    202 
    203 	panic("%s: unimplemented", __func__);
    204 }
    205 
    206 void
    207 rumpblk_strategy(struct buf *bp)
    208 {
    209 	struct rblkdev *rblk = &minors[minor(bp->b_dev)];
    210 	off_t off;
    211 	int async;
    212 
    213 	off = bp->b_blkno << DEV_BSHIFT;
    214 	DPRINTF(("rumpblk_strategy: 0x%x bytes %s off 0x%" PRIx64
    215 	    " (0x%" PRIx64 " - 0x%" PRIx64")\n",
    216 	    bp->b_bcount, BUF_ISREAD(bp) "READ" : "WRITE",
    217 	    off, off, (off + bp->b_bcount)));
    218 
    219 	/*
    220 	 * Do I/O.  We have different paths for async and sync I/O.
    221 	 * Async I/O is done by passing a request to rumpuser where
    222 	 * it is executed.  The rumpuser routine then calls
    223 	 * biodone() to signal any waiters in the kernel.  I/O's are
    224 	 * executed in series.  Technically executing them in parallel
    225 	 * would produce better results, but then we'd need either
    226 	 * more threads or posix aio.  Maybe worth investigating
    227 	 * this later.
    228 	 *
    229 	 * Synchronous I/O is done directly in the context mainly to
    230 	 * avoid unnecessary scheduling with the I/O thread.
    231 	 */
    232 	async = bp->b_flags & B_ASYNC;
    233 	if (async && rump_threads) {
    234 		struct rumpuser_aio *rua;
    235 
    236 		rumpuser_mutex_enter(&rumpuser_aio_mtx);
    237 		/*
    238 		 * Check if our buffer is full.  Doing it this way
    239 		 * throttles the I/O a bit if we have a massive
    240 		 * async I/O burst.
    241 		 *
    242 		 * XXX: this actually leads to deadlocks with spl()
    243 		 * (caller maybe be at splbio() legally for async I/O),
    244 		 * so for now set N_AIOS high and FIXXXME some day.
    245 		 */
    246 		if ((rumpuser_aio_head+1) % N_AIOS == rumpuser_aio_tail) {
    247 			rumpuser_mutex_exit(&rumpuser_aio_mtx);
    248 			goto syncfallback;
    249 		}
    250 
    251 		rua = &rumpuser_aios[rumpuser_aio_head];
    252 		KASSERT(rua->rua_bp == NULL);
    253 		rua->rua_fd = rblk->rblk_fd;
    254 		rua->rua_data = bp->b_data;
    255 		rua->rua_dlen = bp->b_bcount;
    256 		rua->rua_off = off;
    257 		rua->rua_bp = bp;
    258 		rua->rua_op = BUF_ISREAD(bp);
    259 
    260 		/* insert into queue & signal */
    261 		rumpuser_aio_head = (rumpuser_aio_head+1) % (N_AIOS-1);
    262 		rumpuser_cv_signal(&rumpuser_aio_cv);
    263 		rumpuser_mutex_exit(&rumpuser_aio_mtx);
    264 	} else {
    265  syncfallback:
    266 		if (BUF_ISREAD(bp)) {
    267 			rumpuser_read_bio(rblk->rblk_fd, bp->b_data,
    268 			    bp->b_bcount, off, rump_biodone, bp);
    269 		} else {
    270 			rumpuser_write_bio(rblk->rblk_fd, bp->b_data,
    271 			    bp->b_bcount, off, rump_biodone, bp);
    272 		}
    273 		if (!async) {
    274 			int error;
    275 
    276 			if (BUF_ISWRITE(bp))
    277 				rumpuser_fsync(rblk->rblk_fd, &error);
    278 		}
    279 	}
    280 }
    281