rumpblk.c revision 1.5 1 /* $NetBSD: rumpblk.c,v 1.5 2009/02/26 00:37:48 pooka Exp $ */
2
3 /*
4 * Copyright (c) 2009 Antti Kantee. All Rights Reserved.
5 *
6 * Development of this software was supported by the
7 * Finnish Cultural Foundation.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
19 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 */
30
31 /*
32 * Block device emulation. Presents a block device interface and
33 * uses rumpuser system calls to satisfy I/O requests.
34 */
35
36 #include <sys/cdefs.h>
37 __KERNEL_RCSID(0, "$NetBSD: rumpblk.c,v 1.5 2009/02/26 00:37:48 pooka Exp $");
38
39 #include <sys/param.h>
40 #include <sys/buf.h>
41 #include <sys/conf.h>
42 #include <sys/disklabel.h>
43 #include <sys/fcntl.h>
44 #include <sys/kmem.h>
45 #include <sys/malloc.h>
46 #include <sys/stat.h>
47
48 #include <rump/rumpuser.h>
49
50 #include "rump_private.h"
51 #include "rump_vfs_private.h"
52
53 #define RUMPBLK_SIZE 16
54 static struct rblkdev {
55 char *rblk_path;
56 int rblk_fd;
57
58 struct partition *rblk_curpi;
59 struct partition rblk_pi;
60 struct disklabel rblk_dl;
61 } minors[RUMPBLK_SIZE];
62
63 dev_type_open(rumpblk_open);
64 dev_type_close(rumpblk_close);
65 dev_type_read(rumpblk_read);
66 dev_type_write(rumpblk_write);
67 dev_type_ioctl(rumpblk_ioctl);
68 dev_type_strategy(rumpblk_strategy);
69 dev_type_strategy(rumpblk_strategy_fail);
70 dev_type_dump(rumpblk_dump);
71 dev_type_size(rumpblk_size);
72
73 static const struct bdevsw rumpblk_bdevsw = {
74 rumpblk_open, rumpblk_close, rumpblk_strategy, rumpblk_ioctl,
75 nodump, nosize, D_DISK
76 };
77
78 static const struct bdevsw rumpblk_bdevsw_fail = {
79 rumpblk_open, rumpblk_close, rumpblk_strategy_fail, rumpblk_ioctl,
80 nodump, nosize, D_DISK
81 };
82
83 static const struct cdevsw rumpblk_cdevsw = {
84 rumpblk_open, rumpblk_close, rumpblk_read, rumpblk_write,
85 rumpblk_ioctl, nostop, notty, nopoll, nommap, nokqfilter, D_DISK
86 };
87
88 /* fail every n out of BLKFAIL_MAX */
89 #define BLKFAIL_MAX 10000
90 static int blkfail;
91 static unsigned randstate;
92
93 int
94 rumpblk_init()
95 {
96 char buf[64];
97 int rumpblk = RUMPBLK;
98 int error;
99
100 if (rumpuser_getenv("RUMP_BLKFAIL", buf, sizeof(buf), &error) == 0) {
101 blkfail = strtoul(buf, NULL, 10);
102 /* fail everything */
103 if (blkfail > BLKFAIL_MAX)
104 blkfail = BLKFAIL_MAX;
105 if (rumpuser_getenv("RUMP_BLKFAIL_SEED", buf, sizeof(buf),
106 &error) == 0) {
107 randstate = strtoul(buf, NULL, 10);
108 } else {
109 randstate = arc4random(); /* XXX: not enough entropy */
110 }
111 printf("rumpblk: FAULT INJECTION ACTIVE! every %d out of"
112 " %d I/O will fail. key %u\n", blkfail, BLKFAIL_MAX,
113 randstate);
114 } else {
115 blkfail = 0;
116 }
117
118 if (blkfail) {
119 return devsw_attach("rumpblk", &rumpblk_bdevsw_fail, &rumpblk,
120 &rumpblk_cdevsw, &rumpblk);
121 } else {
122 return devsw_attach("rumpblk", &rumpblk_bdevsw, &rumpblk,
123 &rumpblk_cdevsw, &rumpblk);
124 }
125 }
126
127 int
128 rumpblk_register(const char *path)
129 {
130 size_t len;
131 int i;
132
133 for (i = 0; i < RUMPBLK_SIZE; i++)
134 if (minors[i].rblk_path && strcmp(minors[i].rblk_path, path)==0)
135 return i;
136
137 for (i = 0; i < RUMPBLK_SIZE; i++)
138 if (minors[i].rblk_path == NULL)
139 break;
140 if (i == RUMPBLK_SIZE)
141 return -1;
142
143 len = strlen(path);
144 minors[i].rblk_path = malloc(len+1, M_TEMP, M_WAITOK);
145 strcpy(minors[i].rblk_path, path);
146 minors[i].rblk_fd = -1;
147 return i;
148 }
149
150 int
151 rumpblk_open(dev_t dev, int flag, int fmt, struct lwp *l)
152 {
153 struct rblkdev *rblk = &minors[minor(dev)];
154 uint64_t fsize;
155 int ft;
156 int error, fd;
157
158 KASSERT(rblk->rblk_fd == -1);
159 fd = rumpuser_open(rblk->rblk_path, OFLAGS(flag), &error);
160 if (error)
161 return error;
162
163 /*
164 * Setup partition info. First try the usual. */
165 if (rumpuser_ioctl(fd, DIOCGDINFO, &rblk->rblk_dl, &error) != -1) {
166 /*
167 * If that works, use it. We still need to guess
168 * which partition we are on.
169 */
170 rblk->rblk_curpi = &rblk->rblk_dl.d_partitions[0];
171 } else {
172 /*
173 * If that didn't work, assume were a regular file
174 * and just try to fake the info the best we can.
175 */
176 memset(&rblk->rblk_dl, 0, sizeof(rblk->rblk_dl));
177
178 if (rumpuser_getfileinfo(rblk->rblk_path, &fsize,
179 &ft, &error) == -1) {
180 int dummy;
181
182 rumpuser_close(fd, &dummy);
183 return error;
184 }
185 rblk->rblk_pi.p_size = fsize >> DEV_BSHIFT;
186 rblk->rblk_dl.d_secsize = DEV_BSIZE;
187 rblk->rblk_curpi = &rblk->rblk_pi;
188 }
189 rblk->rblk_fd = fd;
190
191 return 0;
192 }
193
194 int
195 rumpblk_close(dev_t dev, int flag, int fmt, struct lwp *l)
196 {
197 struct rblkdev *rblk = &minors[minor(dev)];
198 int dummy;
199
200 rumpuser_close(rblk->rblk_fd, &dummy);
201 rblk->rblk_fd = -1;
202
203 return 0;
204 }
205
206 int
207 rumpblk_ioctl(dev_t dev, u_long xfer, void *addr, int flag, struct lwp *l)
208 {
209 struct rblkdev *rblk = &minors[minor(dev)];
210 int rv, error;
211
212 if (xfer == DIOCGPART) {
213 struct partinfo *pi = (struct partinfo *)addr;
214
215 pi->part = rblk->rblk_curpi;
216 pi->disklab = &rblk->rblk_dl;
217
218 return 0;
219 }
220
221 rv = rumpuser_ioctl(rblk->rblk_fd, xfer, addr, &error);
222 if (rv == -1)
223 return error;
224
225 return 0;
226 }
227
228 int
229 rumpblk_read(dev_t dev, struct uio *uio, int flags)
230 {
231
232 panic("%s: unimplemented", __func__);
233 }
234
235 int
236 rumpblk_write(dev_t dev, struct uio *uio, int flags)
237 {
238
239 panic("%s: unimplemented", __func__);
240 }
241
242 static void
243 dostrategy(struct buf *bp)
244 {
245 struct rblkdev *rblk = &minors[minor(bp->b_dev)];
246 off_t off;
247 int async;
248
249 off = bp->b_blkno << DEV_BSHIFT;
250 DPRINTF(("rumpblk_strategy: 0x%x bytes %s off 0x%" PRIx64
251 " (0x%" PRIx64 " - 0x%" PRIx64")\n",
252 bp->b_bcount, BUF_ISREAD(bp) "READ" : "WRITE",
253 off, off, (off + bp->b_bcount)));
254
255 /*
256 * Do I/O. We have different paths for async and sync I/O.
257 * Async I/O is done by passing a request to rumpuser where
258 * it is executed. The rumpuser routine then calls
259 * biodone() to signal any waiters in the kernel. I/O's are
260 * executed in series. Technically executing them in parallel
261 * would produce better results, but then we'd need either
262 * more threads or posix aio. Maybe worth investigating
263 * this later.
264 *
265 * Synchronous I/O is done directly in the context mainly to
266 * avoid unnecessary scheduling with the I/O thread.
267 */
268 async = bp->b_flags & B_ASYNC;
269 if (async && rump_threads) {
270 struct rumpuser_aio *rua;
271
272 rumpuser_mutex_enter(&rumpuser_aio_mtx);
273 /*
274 * Check if our buffer is full. Doing it this way
275 * throttles the I/O a bit if we have a massive
276 * async I/O burst.
277 *
278 * XXX: this actually leads to deadlocks with spl()
279 * (caller maybe be at splbio() legally for async I/O),
280 * so for now set N_AIOS high and FIXXXME some day.
281 */
282 if ((rumpuser_aio_head+1) % N_AIOS == rumpuser_aio_tail) {
283 rumpuser_mutex_exit(&rumpuser_aio_mtx);
284 goto syncfallback;
285 }
286
287 rua = &rumpuser_aios[rumpuser_aio_head];
288 KASSERT(rua->rua_bp == NULL);
289 rua->rua_fd = rblk->rblk_fd;
290 rua->rua_data = bp->b_data;
291 rua->rua_dlen = bp->b_bcount;
292 rua->rua_off = off;
293 rua->rua_bp = bp;
294 rua->rua_op = BUF_ISREAD(bp);
295
296 /* insert into queue & signal */
297 rumpuser_aio_head = (rumpuser_aio_head+1) % (N_AIOS-1);
298 rumpuser_cv_signal(&rumpuser_aio_cv);
299 rumpuser_mutex_exit(&rumpuser_aio_mtx);
300 } else {
301 syncfallback:
302 if (BUF_ISREAD(bp)) {
303 rumpuser_read_bio(rblk->rblk_fd, bp->b_data,
304 bp->b_bcount, off, rump_biodone, bp);
305 } else {
306 rumpuser_write_bio(rblk->rblk_fd, bp->b_data,
307 bp->b_bcount, off, rump_biodone, bp);
308 }
309 if (!async) {
310 int error;
311
312 if (BUF_ISWRITE(bp))
313 rumpuser_fsync(rblk->rblk_fd, &error);
314 }
315 }
316 }
317
318 void
319 rumpblk_strategy(struct buf *bp)
320 {
321
322 dostrategy(bp);
323 }
324
325 /*
326 * Simple random number generator. This is private so that we can
327 * very repeatedly control which blocks will fail.
328 *
329 * <mlelstv> pooka, rand()
330 * <mlelstv> [paste]
331 */
332 static unsigned
333 gimmerand(void)
334 {
335
336 return (randstate = randstate * 1103515245 + 12345) % (0x80000000L);
337 }
338
339 /*
340 * Block device with very simple fault injection. Fails every
341 * n out of BLKFAIL_MAX I/O with EIO. n is determined by the env
342 * variable RUMP_BLKFAIL.
343 */
344 void
345 rumpblk_strategy_fail(struct buf *bp)
346 {
347
348 if (gimmerand() % BLKFAIL_MAX >= blkfail) {
349 dostrategy(bp);
350 } else {
351 printf("block fault injection: failing I/O on block %lld\n",
352 (long long)bp->b_blkno);
353 bp->b_error = EIO;
354 biodone(bp);
355 }
356 }
357