rumpblk.c revision 1.1.4.3 1 1.1.4.3 skrll /* $NetBSD: rumpblk.c,v 1.1.4.3 2009/03/03 18:34:30 skrll Exp $ */
2 1.1.4.2 skrll
3 1.1.4.2 skrll /*
4 1.1.4.2 skrll * Copyright (c) 2009 Antti Kantee. All Rights Reserved.
5 1.1.4.2 skrll *
6 1.1.4.2 skrll * Development of this software was supported by the
7 1.1.4.2 skrll * Finnish Cultural Foundation.
8 1.1.4.2 skrll *
9 1.1.4.2 skrll * Redistribution and use in source and binary forms, with or without
10 1.1.4.2 skrll * modification, are permitted provided that the following conditions
11 1.1.4.2 skrll * are met:
12 1.1.4.2 skrll * 1. Redistributions of source code must retain the above copyright
13 1.1.4.2 skrll * notice, this list of conditions and the following disclaimer.
14 1.1.4.2 skrll * 2. Redistributions in binary form must reproduce the above copyright
15 1.1.4.2 skrll * notice, this list of conditions and the following disclaimer in the
16 1.1.4.2 skrll * documentation and/or other materials provided with the distribution.
17 1.1.4.2 skrll *
18 1.1.4.2 skrll * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
19 1.1.4.2 skrll * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20 1.1.4.2 skrll * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 1.1.4.2 skrll * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 1.1.4.2 skrll * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 1.1.4.2 skrll * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 1.1.4.2 skrll * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 1.1.4.2 skrll * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 1.1.4.2 skrll * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 1.1.4.2 skrll * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 1.1.4.2 skrll * SUCH DAMAGE.
29 1.1.4.2 skrll */
30 1.1.4.2 skrll
31 1.1.4.2 skrll /*
32 1.1.4.2 skrll * Block device emulation. Presents a block device interface and
33 1.1.4.2 skrll * uses rumpuser system calls to satisfy I/O requests.
34 1.1.4.2 skrll */
35 1.1.4.2 skrll
36 1.1.4.2 skrll #include <sys/cdefs.h>
37 1.1.4.3 skrll __KERNEL_RCSID(0, "$NetBSD: rumpblk.c,v 1.1.4.3 2009/03/03 18:34:30 skrll Exp $");
38 1.1.4.2 skrll
39 1.1.4.2 skrll #include <sys/param.h>
40 1.1.4.2 skrll #include <sys/buf.h>
41 1.1.4.2 skrll #include <sys/conf.h>
42 1.1.4.2 skrll #include <sys/disklabel.h>
43 1.1.4.2 skrll #include <sys/fcntl.h>
44 1.1.4.2 skrll #include <sys/kmem.h>
45 1.1.4.2 skrll #include <sys/malloc.h>
46 1.1.4.2 skrll #include <sys/stat.h>
47 1.1.4.2 skrll
48 1.1.4.2 skrll #include <rump/rumpuser.h>
49 1.1.4.2 skrll
50 1.1.4.2 skrll #include "rump_private.h"
51 1.1.4.2 skrll #include "rump_vfs_private.h"
52 1.1.4.2 skrll
53 1.1.4.2 skrll #define RUMPBLK_SIZE 16
54 1.1.4.2 skrll static struct rblkdev {
55 1.1.4.2 skrll char *rblk_path;
56 1.1.4.2 skrll int rblk_fd;
57 1.1.4.2 skrll
58 1.1.4.2 skrll struct partition *rblk_curpi;
59 1.1.4.2 skrll struct partition rblk_pi;
60 1.1.4.2 skrll struct disklabel rblk_dl;
61 1.1.4.2 skrll } minors[RUMPBLK_SIZE];
62 1.1.4.2 skrll
63 1.1.4.2 skrll dev_type_open(rumpblk_open);
64 1.1.4.2 skrll dev_type_close(rumpblk_close);
65 1.1.4.2 skrll dev_type_read(rumpblk_read);
66 1.1.4.2 skrll dev_type_write(rumpblk_write);
67 1.1.4.2 skrll dev_type_ioctl(rumpblk_ioctl);
68 1.1.4.2 skrll dev_type_strategy(rumpblk_strategy);
69 1.1.4.3 skrll dev_type_strategy(rumpblk_strategy_fail);
70 1.1.4.2 skrll dev_type_dump(rumpblk_dump);
71 1.1.4.2 skrll dev_type_size(rumpblk_size);
72 1.1.4.2 skrll
73 1.1.4.2 skrll static const struct bdevsw rumpblk_bdevsw = {
74 1.1.4.2 skrll rumpblk_open, rumpblk_close, rumpblk_strategy, rumpblk_ioctl,
75 1.1.4.2 skrll nodump, nosize, D_DISK
76 1.1.4.2 skrll };
77 1.1.4.2 skrll
78 1.1.4.3 skrll static const struct bdevsw rumpblk_bdevsw_fail = {
79 1.1.4.3 skrll rumpblk_open, rumpblk_close, rumpblk_strategy_fail, rumpblk_ioctl,
80 1.1.4.3 skrll nodump, nosize, D_DISK
81 1.1.4.3 skrll };
82 1.1.4.3 skrll
83 1.1.4.2 skrll static const struct cdevsw rumpblk_cdevsw = {
84 1.1.4.2 skrll rumpblk_open, rumpblk_close, rumpblk_read, rumpblk_write,
85 1.1.4.2 skrll rumpblk_ioctl, nostop, notty, nopoll, nommap, nokqfilter, D_DISK
86 1.1.4.2 skrll };
87 1.1.4.2 skrll
88 1.1.4.3 skrll /* fail every n out of BLKFAIL_MAX */
89 1.1.4.3 skrll #define BLKFAIL_MAX 10000
90 1.1.4.3 skrll static int blkfail;
91 1.1.4.3 skrll static unsigned randstate;
92 1.1.4.2 skrll
93 1.1.4.2 skrll int
94 1.1.4.2 skrll rumpblk_init()
95 1.1.4.2 skrll {
96 1.1.4.3 skrll char buf[64];
97 1.1.4.2 skrll int rumpblk = RUMPBLK;
98 1.1.4.3 skrll int error;
99 1.1.4.3 skrll
100 1.1.4.3 skrll if (rumpuser_getenv("RUMP_BLKFAIL", buf, sizeof(buf), &error) == 0) {
101 1.1.4.3 skrll blkfail = strtoul(buf, NULL, 10);
102 1.1.4.3 skrll /* fail everything */
103 1.1.4.3 skrll if (blkfail > BLKFAIL_MAX)
104 1.1.4.3 skrll blkfail = BLKFAIL_MAX;
105 1.1.4.3 skrll if (rumpuser_getenv("RUMP_BLKFAIL_SEED", buf, sizeof(buf),
106 1.1.4.3 skrll &error) == 0) {
107 1.1.4.3 skrll randstate = strtoul(buf, NULL, 10);
108 1.1.4.3 skrll } else {
109 1.1.4.3 skrll randstate = arc4random(); /* XXX: not enough entropy */
110 1.1.4.3 skrll }
111 1.1.4.3 skrll printf("rumpblk: FAULT INJECTION ACTIVE! every %d out of"
112 1.1.4.3 skrll " %d I/O will fail. key %u\n", blkfail, BLKFAIL_MAX,
113 1.1.4.3 skrll randstate);
114 1.1.4.3 skrll } else {
115 1.1.4.3 skrll blkfail = 0;
116 1.1.4.3 skrll }
117 1.1.4.2 skrll
118 1.1.4.3 skrll if (blkfail) {
119 1.1.4.3 skrll return devsw_attach("rumpblk", &rumpblk_bdevsw_fail, &rumpblk,
120 1.1.4.3 skrll &rumpblk_cdevsw, &rumpblk);
121 1.1.4.3 skrll } else {
122 1.1.4.3 skrll return devsw_attach("rumpblk", &rumpblk_bdevsw, &rumpblk,
123 1.1.4.3 skrll &rumpblk_cdevsw, &rumpblk);
124 1.1.4.3 skrll }
125 1.1.4.2 skrll }
126 1.1.4.2 skrll
127 1.1.4.2 skrll int
128 1.1.4.2 skrll rumpblk_register(const char *path)
129 1.1.4.2 skrll {
130 1.1.4.2 skrll size_t len;
131 1.1.4.2 skrll int i;
132 1.1.4.2 skrll
133 1.1.4.2 skrll for (i = 0; i < RUMPBLK_SIZE; i++)
134 1.1.4.2 skrll if (minors[i].rblk_path && strcmp(minors[i].rblk_path, path)==0)
135 1.1.4.2 skrll return i;
136 1.1.4.2 skrll
137 1.1.4.2 skrll for (i = 0; i < RUMPBLK_SIZE; i++)
138 1.1.4.2 skrll if (minors[i].rblk_path == NULL)
139 1.1.4.2 skrll break;
140 1.1.4.2 skrll if (i == RUMPBLK_SIZE)
141 1.1.4.2 skrll return -1;
142 1.1.4.2 skrll
143 1.1.4.2 skrll len = strlen(path);
144 1.1.4.2 skrll minors[i].rblk_path = malloc(len+1, M_TEMP, M_WAITOK);
145 1.1.4.2 skrll strcpy(minors[i].rblk_path, path);
146 1.1.4.2 skrll minors[i].rblk_fd = -1;
147 1.1.4.2 skrll return i;
148 1.1.4.2 skrll }
149 1.1.4.2 skrll
150 1.1.4.2 skrll int
151 1.1.4.2 skrll rumpblk_open(dev_t dev, int flag, int fmt, struct lwp *l)
152 1.1.4.2 skrll {
153 1.1.4.2 skrll struct rblkdev *rblk = &minors[minor(dev)];
154 1.1.4.3 skrll uint64_t fsize;
155 1.1.4.3 skrll int ft;
156 1.1.4.2 skrll int error, fd;
157 1.1.4.2 skrll
158 1.1.4.2 skrll KASSERT(rblk->rblk_fd == -1);
159 1.1.4.2 skrll fd = rumpuser_open(rblk->rblk_path, OFLAGS(flag), &error);
160 1.1.4.2 skrll if (error)
161 1.1.4.2 skrll return error;
162 1.1.4.2 skrll
163 1.1.4.2 skrll /*
164 1.1.4.2 skrll * Setup partition info. First try the usual. */
165 1.1.4.2 skrll if (rumpuser_ioctl(fd, DIOCGDINFO, &rblk->rblk_dl, &error) != -1) {
166 1.1.4.2 skrll /*
167 1.1.4.2 skrll * If that works, use it. We still need to guess
168 1.1.4.2 skrll * which partition we are on.
169 1.1.4.2 skrll */
170 1.1.4.2 skrll rblk->rblk_curpi = &rblk->rblk_dl.d_partitions[0];
171 1.1.4.2 skrll } else {
172 1.1.4.2 skrll /*
173 1.1.4.2 skrll * If that didn't work, assume were a regular file
174 1.1.4.2 skrll * and just try to fake the info the best we can.
175 1.1.4.2 skrll */
176 1.1.4.2 skrll memset(&rblk->rblk_dl, 0, sizeof(rblk->rblk_dl));
177 1.1.4.2 skrll
178 1.1.4.3 skrll if (rumpuser_getfileinfo(rblk->rblk_path, &fsize,
179 1.1.4.3 skrll &ft, &error) == -1) {
180 1.1.4.2 skrll int dummy;
181 1.1.4.2 skrll
182 1.1.4.2 skrll rumpuser_close(fd, &dummy);
183 1.1.4.2 skrll return error;
184 1.1.4.2 skrll }
185 1.1.4.3 skrll rblk->rblk_pi.p_size = fsize >> DEV_BSHIFT;
186 1.1.4.2 skrll rblk->rblk_dl.d_secsize = DEV_BSIZE;
187 1.1.4.2 skrll rblk->rblk_curpi = &rblk->rblk_pi;
188 1.1.4.2 skrll }
189 1.1.4.2 skrll rblk->rblk_fd = fd;
190 1.1.4.2 skrll
191 1.1.4.2 skrll return 0;
192 1.1.4.2 skrll }
193 1.1.4.2 skrll
194 1.1.4.2 skrll int
195 1.1.4.2 skrll rumpblk_close(dev_t dev, int flag, int fmt, struct lwp *l)
196 1.1.4.2 skrll {
197 1.1.4.2 skrll struct rblkdev *rblk = &minors[minor(dev)];
198 1.1.4.2 skrll int dummy;
199 1.1.4.2 skrll
200 1.1.4.2 skrll rumpuser_close(rblk->rblk_fd, &dummy);
201 1.1.4.2 skrll rblk->rblk_fd = -1;
202 1.1.4.2 skrll
203 1.1.4.2 skrll return 0;
204 1.1.4.2 skrll }
205 1.1.4.2 skrll
206 1.1.4.2 skrll int
207 1.1.4.2 skrll rumpblk_ioctl(dev_t dev, u_long xfer, void *addr, int flag, struct lwp *l)
208 1.1.4.2 skrll {
209 1.1.4.2 skrll struct rblkdev *rblk = &minors[minor(dev)];
210 1.1.4.2 skrll int rv, error;
211 1.1.4.2 skrll
212 1.1.4.2 skrll if (xfer == DIOCGPART) {
213 1.1.4.2 skrll struct partinfo *pi = (struct partinfo *)addr;
214 1.1.4.2 skrll
215 1.1.4.2 skrll pi->part = rblk->rblk_curpi;
216 1.1.4.2 skrll pi->disklab = &rblk->rblk_dl;
217 1.1.4.2 skrll
218 1.1.4.2 skrll return 0;
219 1.1.4.2 skrll }
220 1.1.4.2 skrll
221 1.1.4.2 skrll rv = rumpuser_ioctl(rblk->rblk_fd, xfer, addr, &error);
222 1.1.4.2 skrll if (rv == -1)
223 1.1.4.2 skrll return error;
224 1.1.4.2 skrll
225 1.1.4.2 skrll return 0;
226 1.1.4.2 skrll }
227 1.1.4.2 skrll
228 1.1.4.2 skrll int
229 1.1.4.2 skrll rumpblk_read(dev_t dev, struct uio *uio, int flags)
230 1.1.4.2 skrll {
231 1.1.4.2 skrll
232 1.1.4.2 skrll panic("%s: unimplemented", __func__);
233 1.1.4.2 skrll }
234 1.1.4.2 skrll
235 1.1.4.2 skrll int
236 1.1.4.2 skrll rumpblk_write(dev_t dev, struct uio *uio, int flags)
237 1.1.4.2 skrll {
238 1.1.4.2 skrll
239 1.1.4.2 skrll panic("%s: unimplemented", __func__);
240 1.1.4.2 skrll }
241 1.1.4.2 skrll
242 1.1.4.3 skrll static void
243 1.1.4.3 skrll dostrategy(struct buf *bp)
244 1.1.4.2 skrll {
245 1.1.4.2 skrll struct rblkdev *rblk = &minors[minor(bp->b_dev)];
246 1.1.4.2 skrll off_t off;
247 1.1.4.2 skrll int async;
248 1.1.4.2 skrll
249 1.1.4.2 skrll off = bp->b_blkno << DEV_BSHIFT;
250 1.1.4.2 skrll DPRINTF(("rumpblk_strategy: 0x%x bytes %s off 0x%" PRIx64
251 1.1.4.2 skrll " (0x%" PRIx64 " - 0x%" PRIx64")\n",
252 1.1.4.2 skrll bp->b_bcount, BUF_ISREAD(bp) "READ" : "WRITE",
253 1.1.4.2 skrll off, off, (off + bp->b_bcount)));
254 1.1.4.2 skrll
255 1.1.4.2 skrll /*
256 1.1.4.2 skrll * Do I/O. We have different paths for async and sync I/O.
257 1.1.4.2 skrll * Async I/O is done by passing a request to rumpuser where
258 1.1.4.2 skrll * it is executed. The rumpuser routine then calls
259 1.1.4.2 skrll * biodone() to signal any waiters in the kernel. I/O's are
260 1.1.4.2 skrll * executed in series. Technically executing them in parallel
261 1.1.4.2 skrll * would produce better results, but then we'd need either
262 1.1.4.2 skrll * more threads or posix aio. Maybe worth investigating
263 1.1.4.2 skrll * this later.
264 1.1.4.2 skrll *
265 1.1.4.2 skrll * Synchronous I/O is done directly in the context mainly to
266 1.1.4.2 skrll * avoid unnecessary scheduling with the I/O thread.
267 1.1.4.2 skrll */
268 1.1.4.2 skrll async = bp->b_flags & B_ASYNC;
269 1.1.4.2 skrll if (async && rump_threads) {
270 1.1.4.2 skrll struct rumpuser_aio *rua;
271 1.1.4.2 skrll
272 1.1.4.2 skrll rumpuser_mutex_enter(&rumpuser_aio_mtx);
273 1.1.4.2 skrll /*
274 1.1.4.2 skrll * Check if our buffer is full. Doing it this way
275 1.1.4.2 skrll * throttles the I/O a bit if we have a massive
276 1.1.4.2 skrll * async I/O burst.
277 1.1.4.2 skrll */
278 1.1.4.2 skrll if ((rumpuser_aio_head+1) % N_AIOS == rumpuser_aio_tail) {
279 1.1.4.2 skrll rumpuser_mutex_exit(&rumpuser_aio_mtx);
280 1.1.4.2 skrll goto syncfallback;
281 1.1.4.2 skrll }
282 1.1.4.2 skrll
283 1.1.4.3 skrll rua = &rumpuser_aios[rumpuser_aio_head];
284 1.1.4.3 skrll KASSERT(rua->rua_bp == NULL);
285 1.1.4.3 skrll rua->rua_fd = rblk->rblk_fd;
286 1.1.4.3 skrll rua->rua_data = bp->b_data;
287 1.1.4.3 skrll rua->rua_dlen = bp->b_bcount;
288 1.1.4.3 skrll rua->rua_off = off;
289 1.1.4.3 skrll rua->rua_bp = bp;
290 1.1.4.3 skrll rua->rua_op = BUF_ISREAD(bp);
291 1.1.4.3 skrll
292 1.1.4.2 skrll /* insert into queue & signal */
293 1.1.4.3 skrll rumpuser_aio_head = (rumpuser_aio_head+1) % N_AIOS;
294 1.1.4.2 skrll rumpuser_cv_signal(&rumpuser_aio_cv);
295 1.1.4.2 skrll rumpuser_mutex_exit(&rumpuser_aio_mtx);
296 1.1.4.2 skrll } else {
297 1.1.4.2 skrll syncfallback:
298 1.1.4.2 skrll if (BUF_ISREAD(bp)) {
299 1.1.4.2 skrll rumpuser_read_bio(rblk->rblk_fd, bp->b_data,
300 1.1.4.2 skrll bp->b_bcount, off, rump_biodone, bp);
301 1.1.4.2 skrll } else {
302 1.1.4.2 skrll rumpuser_write_bio(rblk->rblk_fd, bp->b_data,
303 1.1.4.2 skrll bp->b_bcount, off, rump_biodone, bp);
304 1.1.4.2 skrll }
305 1.1.4.2 skrll if (!async) {
306 1.1.4.2 skrll int error;
307 1.1.4.2 skrll
308 1.1.4.2 skrll if (BUF_ISWRITE(bp))
309 1.1.4.2 skrll rumpuser_fsync(rblk->rblk_fd, &error);
310 1.1.4.2 skrll }
311 1.1.4.2 skrll }
312 1.1.4.2 skrll }
313 1.1.4.3 skrll
314 1.1.4.3 skrll void
315 1.1.4.3 skrll rumpblk_strategy(struct buf *bp)
316 1.1.4.3 skrll {
317 1.1.4.3 skrll
318 1.1.4.3 skrll dostrategy(bp);
319 1.1.4.3 skrll }
320 1.1.4.3 skrll
321 1.1.4.3 skrll /*
322 1.1.4.3 skrll * Simple random number generator. This is private so that we can
323 1.1.4.3 skrll * very repeatedly control which blocks will fail.
324 1.1.4.3 skrll *
325 1.1.4.3 skrll * <mlelstv> pooka, rand()
326 1.1.4.3 skrll * <mlelstv> [paste]
327 1.1.4.3 skrll */
328 1.1.4.3 skrll static unsigned
329 1.1.4.3 skrll gimmerand(void)
330 1.1.4.3 skrll {
331 1.1.4.3 skrll
332 1.1.4.3 skrll return (randstate = randstate * 1103515245 + 12345) % (0x80000000L);
333 1.1.4.3 skrll }
334 1.1.4.3 skrll
335 1.1.4.3 skrll /*
336 1.1.4.3 skrll * Block device with very simple fault injection. Fails every
337 1.1.4.3 skrll * n out of BLKFAIL_MAX I/O with EIO. n is determined by the env
338 1.1.4.3 skrll * variable RUMP_BLKFAIL.
339 1.1.4.3 skrll */
340 1.1.4.3 skrll void
341 1.1.4.3 skrll rumpblk_strategy_fail(struct buf *bp)
342 1.1.4.3 skrll {
343 1.1.4.3 skrll
344 1.1.4.3 skrll if (gimmerand() % BLKFAIL_MAX >= blkfail) {
345 1.1.4.3 skrll dostrategy(bp);
346 1.1.4.3 skrll } else {
347 1.1.4.3 skrll printf("block fault injection: failing I/O on block %lld\n",
348 1.1.4.3 skrll (long long)bp->b_blkno);
349 1.1.4.3 skrll bp->b_error = EIO;
350 1.1.4.3 skrll biodone(bp);
351 1.1.4.3 skrll }
352 1.1.4.3 skrll }
353