rumpblk.c revision 1.55 1 /* $NetBSD: rumpblk.c,v 1.55 2014/03/16 05:20:30 dholland Exp $ */
2
3 /*
4 * Copyright (c) 2009 Antti Kantee. All Rights Reserved.
5 *
6 * Development of this software was supported by the
7 * Finnish Cultural Foundation.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
19 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 */
30
31 /*
32 * Block device emulation. Presents a block device interface and
33 * uses rumpuser system calls to satisfy I/O requests.
34 *
35 * We provide fault injection. The driver can be made to fail
36 * I/O occasionally.
37 */
38
39 #include <sys/cdefs.h>
40 __KERNEL_RCSID(0, "$NetBSD: rumpblk.c,v 1.55 2014/03/16 05:20:30 dholland Exp $");
41
42 #include <sys/param.h>
43 #include <sys/buf.h>
44 #include <sys/conf.h>
45 #include <sys/condvar.h>
46 #include <sys/disklabel.h>
47 #include <sys/evcnt.h>
48 #include <sys/fcntl.h>
49 #include <sys/kmem.h>
50 #include <sys/malloc.h>
51 #include <sys/queue.h>
52 #include <sys/stat.h>
53 #include <sys/cprng.h>
54
55 #include <rump/rumpuser.h>
56
57 #include "rump_private.h"
58 #include "rump_vfs_private.h"
59
60 #if 0
61 #define DPRINTF(x) printf x
62 #else
63 #define DPRINTF(x)
64 #endif
65
66 #define RUMPBLK_SIZE 16
67 static struct rblkdev {
68 char *rblk_path;
69 int rblk_fd;
70 int rblk_mode;
71
72 uint64_t rblk_size;
73 uint64_t rblk_hostoffset;
74 uint64_t rblk_hostsize;
75 int rblk_ftype;
76
77 struct disklabel rblk_label;
78 } minors[RUMPBLK_SIZE];
79
80 static struct evcnt ev_io_total;
81 static struct evcnt ev_io_async;
82
83 static struct evcnt ev_bwrite_total;
84 static struct evcnt ev_bwrite_async;
85 static struct evcnt ev_bread_total;
86
87 dev_type_open(rumpblk_open);
88 dev_type_close(rumpblk_close);
89 dev_type_read(rumpblk_read);
90 dev_type_write(rumpblk_write);
91 dev_type_ioctl(rumpblk_ioctl);
92 dev_type_strategy(rumpblk_strategy);
93 dev_type_strategy(rumpblk_strategy_fail);
94 dev_type_dump(rumpblk_dump);
95 dev_type_size(rumpblk_size);
96
97 static const struct bdevsw rumpblk_bdevsw = {
98 .d_open = rumpblk_open,
99 .d_close = rumpblk_close,
100 .d_strategy = rumpblk_strategy,
101 .d_ioctl = rumpblk_ioctl,
102 .d_dump = nodump,
103 .d_psize = nosize,
104 .d_flag = D_DISK
105 };
106
107 static const struct bdevsw rumpblk_bdevsw_fail = {
108 .d_open = rumpblk_open,
109 .d_close = rumpblk_close,
110 .d_strategy = rumpblk_strategy_fail,
111 .d_ioctl = rumpblk_ioctl,
112 .d_dump = nodump,
113 .d_psize = nosize,
114 .d_flag = D_DISK
115 };
116
117 static const struct cdevsw rumpblk_cdevsw = {
118 .d_open = rumpblk_open,
119 .d_close = rumpblk_close,
120 .d_read = rumpblk_read,
121 .d_write = rumpblk_write,
122 .d_ioctl = rumpblk_ioctl,
123 .d_stop = nostop,
124 .d_tty = notty,
125 .d_poll = nopoll,
126 .d_mmap = nommap,
127 .d_kqfilter = nokqfilter,
128 .d_flag = D_DISK
129 };
130
131 static int backend_open(struct rblkdev *, const char *);
132 static int backend_close(struct rblkdev *);
133
134 /* fail every n out of BLKFAIL_MAX */
135 #define BLKFAIL_MAX 10000
136 static int blkfail;
137 static unsigned randstate;
138 static kmutex_t rumpblk_lock;
139 static int sectshift = DEV_BSHIFT;
140
141 static void
142 makedefaultlabel(struct disklabel *lp, off_t size, int part)
143 {
144 int i;
145
146 memset(lp, 0, sizeof(*lp));
147
148 lp->d_secperunit = size;
149 lp->d_secsize = 1 << sectshift;
150 lp->d_nsectors = size >> sectshift;
151 lp->d_ntracks = 1;
152 lp->d_ncylinders = 1;
153 lp->d_secpercyl = lp->d_nsectors;
154
155 /* oh dear oh dear */
156 strncpy(lp->d_typename, "rumpd", sizeof(lp->d_typename));
157 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
158
159 lp->d_type = DTYPE_RUMPD;
160 lp->d_rpm = 11;
161 lp->d_interleave = 1;
162 lp->d_flags = 0;
163
164 /* XXX: RAW_PART handling? */
165 for (i = 0; i < part; i++) {
166 lp->d_partitions[i].p_fstype = FS_UNUSED;
167 }
168 lp->d_partitions[part].p_size = size >> sectshift;
169 lp->d_npartitions = part+1;
170 /* XXX: file system type? */
171
172 lp->d_magic = DISKMAGIC;
173 lp->d_magic2 = DISKMAGIC;
174 lp->d_checksum = 0; /* XXX */
175 }
176
177 int
178 rumpblk_init(void)
179 {
180 char buf[64];
181 devmajor_t rumpblkmaj = RUMPBLK_DEVMAJOR;
182 unsigned tmp;
183 int i;
184
185 mutex_init(&rumpblk_lock, MUTEX_DEFAULT, IPL_NONE);
186
187 if (rumpuser_getparam("RUMP_BLKFAIL", buf, sizeof(buf)) == 0) {
188 blkfail = strtoul(buf, NULL, 10);
189 /* fail everything */
190 if (blkfail > BLKFAIL_MAX)
191 blkfail = BLKFAIL_MAX;
192 if (rumpuser_getparam("RUMP_BLKFAIL_SEED",
193 buf, sizeof(buf)) == 0) {
194 randstate = strtoul(buf, NULL, 10);
195 } else {
196 randstate = cprng_fast32();
197 }
198 printf("rumpblk: FAULT INJECTION ACTIVE! fail %d/%d. "
199 "seed %u\n", blkfail, BLKFAIL_MAX, randstate);
200 } else {
201 blkfail = 0;
202 }
203
204 if (rumpuser_getparam("RUMP_BLKSECTSHIFT", buf, sizeof(buf)) == 0) {
205 printf("rumpblk: ");
206 tmp = strtoul(buf, NULL, 10);
207 if (tmp >= DEV_BSHIFT)
208 sectshift = tmp;
209 else
210 printf("RUMP_BLKSECTSHIFT must be least %d (now %d), ",
211 DEV_BSHIFT, tmp);
212 printf("using %d for sector shift (size %d)\n",
213 sectshift, 1<<sectshift);
214 }
215
216 memset(minors, 0, sizeof(minors));
217 for (i = 0; i < RUMPBLK_SIZE; i++) {
218 minors[i].rblk_fd = -1;
219 }
220
221 evcnt_attach_dynamic(&ev_io_total, EVCNT_TYPE_MISC, NULL,
222 "rumpblk", "I/O reqs");
223 evcnt_attach_dynamic(&ev_io_async, EVCNT_TYPE_MISC, NULL,
224 "rumpblk", "async I/O");
225
226 evcnt_attach_dynamic(&ev_bread_total, EVCNT_TYPE_MISC, NULL,
227 "rumpblk", "bytes read");
228 evcnt_attach_dynamic(&ev_bwrite_total, EVCNT_TYPE_MISC, NULL,
229 "rumpblk", "bytes written");
230 evcnt_attach_dynamic(&ev_bwrite_async, EVCNT_TYPE_MISC, NULL,
231 "rumpblk", "bytes written async");
232
233 if (blkfail) {
234 return devsw_attach("rumpblk",
235 &rumpblk_bdevsw_fail, &rumpblkmaj,
236 &rumpblk_cdevsw, &rumpblkmaj);
237 } else {
238 return devsw_attach("rumpblk",
239 &rumpblk_bdevsw, &rumpblkmaj,
240 &rumpblk_cdevsw, &rumpblkmaj);
241 }
242 }
243
244 int
245 rumpblk_register(const char *path, devminor_t *dmin,
246 uint64_t offset, uint64_t size)
247 {
248 struct rblkdev *rblk;
249 uint64_t flen;
250 size_t len;
251 int ftype, error, i;
252
253 /* devices might not report correct size unless they're open */
254 if ((error = rumpuser_getfileinfo(path, &flen, &ftype)) != 0)
255 return error;
256
257 /* verify host file is of supported type */
258 if (!(ftype == RUMPUSER_FT_REG
259 || ftype == RUMPUSER_FT_BLK
260 || ftype == RUMPUSER_FT_CHR))
261 return EINVAL;
262
263 mutex_enter(&rumpblk_lock);
264 for (i = 0; i < RUMPBLK_SIZE; i++) {
265 if (minors[i].rblk_path&&strcmp(minors[i].rblk_path, path)==0) {
266 mutex_exit(&rumpblk_lock);
267 *dmin = i;
268 return 0;
269 }
270 }
271
272 for (i = 0; i < RUMPBLK_SIZE; i++)
273 if (minors[i].rblk_path == NULL)
274 break;
275 if (i == RUMPBLK_SIZE) {
276 mutex_exit(&rumpblk_lock);
277 return EBUSY;
278 }
279
280 rblk = &minors[i];
281 rblk->rblk_path = __UNCONST("taken");
282 mutex_exit(&rumpblk_lock);
283
284 len = strlen(path);
285 rblk->rblk_path = malloc(len + 1, M_TEMP, M_WAITOK);
286 strcpy(rblk->rblk_path, path);
287 rblk->rblk_hostoffset = offset;
288 if (size != RUMPBLK_SIZENOTSET) {
289 KASSERT(size + offset <= flen);
290 rblk->rblk_size = size;
291 } else {
292 KASSERT(offset < flen);
293 rblk->rblk_size = flen - offset;
294 }
295 rblk->rblk_hostsize = flen;
296 rblk->rblk_ftype = ftype;
297 makedefaultlabel(&rblk->rblk_label, rblk->rblk_size, i);
298
299 if ((error = backend_open(rblk, path)) != 0) {
300 memset(&rblk->rblk_label, 0, sizeof(rblk->rblk_label));
301 free(rblk->rblk_path, M_TEMP);
302 rblk->rblk_path = NULL;
303 return error;
304 }
305
306 *dmin = i;
307 return 0;
308 }
309
310 /*
311 * Unregister rumpblk. It's the callers responsibility to make
312 * sure it's no longer in use.
313 */
314 int
315 rumpblk_deregister(const char *path)
316 {
317 struct rblkdev *rblk;
318 int i;
319
320 mutex_enter(&rumpblk_lock);
321 for (i = 0; i < RUMPBLK_SIZE; i++) {
322 if (minors[i].rblk_path&&strcmp(minors[i].rblk_path, path)==0) {
323 break;
324 }
325 }
326 mutex_exit(&rumpblk_lock);
327
328 if (i == RUMPBLK_SIZE)
329 return ENOENT;
330
331 rblk = &minors[i];
332 backend_close(rblk);
333
334 free(rblk->rblk_path, M_TEMP);
335 memset(&rblk->rblk_label, 0, sizeof(rblk->rblk_label));
336 rblk->rblk_path = NULL;
337
338 return 0;
339 }
340
341 static int
342 backend_open(struct rblkdev *rblk, const char *path)
343 {
344 int error, fd;
345
346 KASSERT(rblk->rblk_fd == -1);
347 error = rumpuser_open(path,
348 RUMPUSER_OPEN_RDWR | RUMPUSER_OPEN_BIO, &fd);
349 if (error) {
350 error = rumpuser_open(path,
351 RUMPUSER_OPEN_RDONLY | RUMPUSER_OPEN_BIO, &fd);
352 if (error)
353 return error;
354 rblk->rblk_mode = FREAD;
355 } else {
356 rblk->rblk_mode = FREAD|FWRITE;
357 }
358
359 rblk->rblk_fd = fd;
360 KASSERT(rblk->rblk_fd != -1);
361 return 0;
362 }
363
364 static int
365 backend_close(struct rblkdev *rblk)
366 {
367
368 rumpuser_close(rblk->rblk_fd);
369 rblk->rblk_fd = -1;
370
371 return 0;
372 }
373
374 int
375 rumpblk_open(dev_t dev, int flag, int fmt, struct lwp *l)
376 {
377 struct rblkdev *rblk = &minors[minor(dev)];
378
379 if (rblk->rblk_fd == -1)
380 return ENXIO;
381
382 if (((flag & (FREAD|FWRITE)) & ~rblk->rblk_mode) != 0) {
383 return EACCES;
384 }
385
386 return 0;
387 }
388
389 int
390 rumpblk_close(dev_t dev, int flag, int fmt, struct lwp *l)
391 {
392
393 return 0;
394 }
395
396 int
397 rumpblk_ioctl(dev_t dev, u_long xfer, void *addr, int flag, struct lwp *l)
398 {
399 devminor_t dmin = minor(dev);
400 struct rblkdev *rblk = &minors[dmin];
401 struct partinfo *pi;
402 int error = 0;
403
404 /* well, me should support a few more, but we don't for now */
405 switch (xfer) {
406 case DIOCGDINFO:
407 *(struct disklabel *)addr = rblk->rblk_label;
408 break;
409
410 case DIOCGPART:
411 pi = addr;
412 pi->part = &rblk->rblk_label.d_partitions[DISKPART(dmin)];
413 pi->disklab = &rblk->rblk_label;
414 break;
415
416 /* it's synced enough along the write path */
417 case DIOCCACHESYNC:
418 break;
419
420 default:
421 error = ENOTTY;
422 break;
423 }
424
425 return error;
426 }
427
428 static int
429 do_physio(dev_t dev, struct uio *uio, int which)
430 {
431 void (*strat)(struct buf *);
432
433 if (blkfail)
434 strat = rumpblk_strategy_fail;
435 else
436 strat = rumpblk_strategy;
437
438 return physio(strat, NULL, dev, which, minphys, uio);
439 }
440
441 int
442 rumpblk_read(dev_t dev, struct uio *uio, int flags)
443 {
444
445 return do_physio(dev, uio, B_READ);
446 }
447
448 int
449 rumpblk_write(dev_t dev, struct uio *uio, int flags)
450 {
451
452 return do_physio(dev, uio, B_WRITE);
453 }
454
455 static void
456 dostrategy(struct buf *bp)
457 {
458 struct rblkdev *rblk = &minors[minor(bp->b_dev)];
459 off_t off;
460 int async = bp->b_flags & B_ASYNC;
461 int op;
462
463 if (bp->b_bcount % (1<<sectshift) != 0) {
464 rump_biodone(bp, 0, EINVAL);
465 return;
466 }
467
468 /* collect statistics */
469 ev_io_total.ev_count++;
470 if (async)
471 ev_io_async.ev_count++;
472 if (BUF_ISWRITE(bp)) {
473 ev_bwrite_total.ev_count += bp->b_bcount;
474 if (async)
475 ev_bwrite_async.ev_count += bp->b_bcount;
476 } else {
477 ev_bread_total.ev_count++;
478 }
479
480 /*
481 * b_blkno is always in terms of DEV_BSIZE, and since we need
482 * to translate to a byte offset for the host read, this
483 * calculation does not need sectshift.
484 */
485 off = bp->b_blkno << DEV_BSHIFT;
486
487 /*
488 * Do bounds checking if we're working on a file. Otherwise
489 * invalid file systems might attempt to read beyond EOF. This
490 * is bad(tm) especially on mmapped images. This is essentially
491 * the kernel bounds_check() routines.
492 */
493 if (off + bp->b_bcount > rblk->rblk_size) {
494 int64_t sz = rblk->rblk_size - off;
495
496 /* EOF */
497 if (sz == 0) {
498 rump_biodone(bp, 0, 0);
499 return;
500 }
501 /* beyond EOF ==> error */
502 if (sz < 0) {
503 rump_biodone(bp, 0, EINVAL);
504 return;
505 }
506
507 /* truncate to device size */
508 bp->b_bcount = sz;
509 }
510
511 off += rblk->rblk_hostoffset;
512 DPRINTF(("rumpblk_strategy: 0x%x bytes %s off 0x%" PRIx64
513 " (0x%" PRIx64 " - 0x%" PRIx64 "), %ssync\n",
514 bp->b_bcount, BUF_ISREAD(bp) ? "READ" : "WRITE",
515 off, off, (off + bp->b_bcount), async ? "a" : ""));
516
517 op = BUF_ISREAD(bp) ? RUMPUSER_BIO_READ : RUMPUSER_BIO_WRITE;
518 if (BUF_ISWRITE(bp) && !async)
519 op |= RUMPUSER_BIO_SYNC;
520
521 rumpuser_bio(rblk->rblk_fd, op, bp->b_data, bp->b_bcount, off,
522 rump_biodone, bp);
523 }
524
525 void
526 rumpblk_strategy(struct buf *bp)
527 {
528
529 dostrategy(bp);
530 }
531
532 /*
533 * Simple random number generator. This is private so that we can
534 * very repeatedly control which blocks will fail.
535 *
536 * <mlelstv> pooka, rand()
537 * <mlelstv> [paste]
538 */
539 static unsigned
540 gimmerand(void)
541 {
542
543 return (randstate = randstate * 1103515245 + 12345) % (0x80000000L);
544 }
545
546 /*
547 * Block device with very simple fault injection. Fails every
548 * n out of BLKFAIL_MAX I/O with EIO. n is determined by the env
549 * variable RUMP_BLKFAIL.
550 */
551 void
552 rumpblk_strategy_fail(struct buf *bp)
553 {
554
555 if (gimmerand() % BLKFAIL_MAX >= blkfail) {
556 dostrategy(bp);
557 } else {
558 printf("block fault injection: failing I/O on block %lld\n",
559 (long long)bp->b_blkno);
560 bp->b_error = EIO;
561 biodone(bp);
562 }
563 }
564