rumpblk.c revision 1.2 1 /* $NetBSD: rumpblk.c,v 1.2 2009/01/27 09:14:01 pooka Exp $ */
2
3 /*
4 * Copyright (c) 2009 Antti Kantee. All Rights Reserved.
5 *
6 * Development of this software was supported by the
7 * Finnish Cultural Foundation.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
19 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 */
30
31 /*
32 * Block device emulation. Presents a block device interface and
33 * uses rumpuser system calls to satisfy I/O requests.
34 */
35
36 #include <sys/cdefs.h>
37 __KERNEL_RCSID(0, "$NetBSD: rumpblk.c,v 1.2 2009/01/27 09:14:01 pooka Exp $");
38
39 #include <sys/param.h>
40 #include <sys/buf.h>
41 #include <sys/conf.h>
42 #include <sys/disklabel.h>
43 #include <sys/fcntl.h>
44 #include <sys/kmem.h>
45 #include <sys/malloc.h>
46 #include <sys/stat.h>
47
48 #include <rump/rumpuser.h>
49
50 #include "rump_private.h"
51 #include "rump_vfs_private.h"
52
53 #define RUMPBLK_SIZE 16
54 static struct rblkdev {
55 char *rblk_path;
56 int rblk_fd;
57
58 struct partition *rblk_curpi;
59 struct partition rblk_pi;
60 struct disklabel rblk_dl;
61 } minors[RUMPBLK_SIZE];
62
63 dev_type_open(rumpblk_open);
64 dev_type_close(rumpblk_close);
65 dev_type_read(rumpblk_read);
66 dev_type_write(rumpblk_write);
67 dev_type_ioctl(rumpblk_ioctl);
68 dev_type_strategy(rumpblk_strategy);
69 dev_type_dump(rumpblk_dump);
70 dev_type_size(rumpblk_size);
71
72 static const struct bdevsw rumpblk_bdevsw = {
73 rumpblk_open, rumpblk_close, rumpblk_strategy, rumpblk_ioctl,
74 nodump, nosize, D_DISK
75 };
76
77 static const struct cdevsw rumpblk_cdevsw = {
78 rumpblk_open, rumpblk_close, rumpblk_read, rumpblk_write,
79 rumpblk_ioctl, nostop, notty, nopoll, nommap, nokqfilter, D_DISK
80 };
81
82 /* XXX: not mpsafe */
83
84 int
85 rumpblk_init()
86 {
87 int rumpblk = RUMPBLK;
88
89 return devsw_attach("rumpblk", &rumpblk_bdevsw, &rumpblk,
90 &rumpblk_cdevsw, &rumpblk);
91 }
92
93 int
94 rumpblk_register(const char *path)
95 {
96 size_t len;
97 int i;
98
99 for (i = 0; i < RUMPBLK_SIZE; i++)
100 if (minors[i].rblk_path && strcmp(minors[i].rblk_path, path)==0)
101 return i;
102
103 for (i = 0; i < RUMPBLK_SIZE; i++)
104 if (minors[i].rblk_path == NULL)
105 break;
106 if (i == RUMPBLK_SIZE)
107 return -1;
108
109 len = strlen(path);
110 minors[i].rblk_path = malloc(len+1, M_TEMP, M_WAITOK);
111 strcpy(minors[i].rblk_path, path);
112 minors[i].rblk_fd = -1;
113 return i;
114 }
115
116 int
117 rumpblk_open(dev_t dev, int flag, int fmt, struct lwp *l)
118 {
119 struct rblkdev *rblk = &minors[minor(dev)];
120 struct stat sb;
121 int error, fd;
122
123 KASSERT(rblk->rblk_fd == -1);
124 fd = rumpuser_open(rblk->rblk_path, OFLAGS(flag), &error);
125 if (error)
126 return error;
127
128 /*
129 * Setup partition info. First try the usual. */
130 if (rumpuser_ioctl(fd, DIOCGDINFO, &rblk->rblk_dl, &error) != -1) {
131 /*
132 * If that works, use it. We still need to guess
133 * which partition we are on.
134 */
135 rblk->rblk_curpi = &rblk->rblk_dl.d_partitions[0];
136 } else {
137 /*
138 * If that didn't work, assume were a regular file
139 * and just try to fake the info the best we can.
140 */
141 memset(&rblk->rblk_dl, 0, sizeof(rblk->rblk_dl));
142
143 if (rumpuser_stat(rblk->rblk_path, &sb, &error) == -1) {
144 int dummy;
145
146 rumpuser_close(fd, &dummy);
147 return error;
148 }
149 rblk->rblk_pi.p_size = sb.st_size >> DEV_BSHIFT;
150 rblk->rblk_dl.d_secsize = DEV_BSIZE;
151 rblk->rblk_curpi = &rblk->rblk_pi;
152 }
153 rblk->rblk_fd = fd;
154
155 return 0;
156 }
157
158 int
159 rumpblk_close(dev_t dev, int flag, int fmt, struct lwp *l)
160 {
161 struct rblkdev *rblk = &minors[minor(dev)];
162 int dummy;
163
164 rumpuser_close(rblk->rblk_fd, &dummy);
165 rblk->rblk_fd = -1;
166
167 return 0;
168 }
169
170 int
171 rumpblk_ioctl(dev_t dev, u_long xfer, void *addr, int flag, struct lwp *l)
172 {
173 struct rblkdev *rblk = &minors[minor(dev)];
174 int rv, error;
175
176 if (xfer == DIOCGPART) {
177 struct partinfo *pi = (struct partinfo *)addr;
178
179 pi->part = rblk->rblk_curpi;
180 pi->disklab = &rblk->rblk_dl;
181
182 return 0;
183 }
184
185 rv = rumpuser_ioctl(rblk->rblk_fd, xfer, addr, &error);
186 if (rv == -1)
187 return error;
188
189 return 0;
190 }
191
192 int
193 rumpblk_read(dev_t dev, struct uio *uio, int flags)
194 {
195
196 panic("%s: unimplemented", __func__);
197 }
198
199 int
200 rumpblk_write(dev_t dev, struct uio *uio, int flags)
201 {
202
203 panic("%s: unimplemented", __func__);
204 }
205
206 void
207 rumpblk_strategy(struct buf *bp)
208 {
209 struct rblkdev *rblk = &minors[minor(bp->b_dev)];
210 off_t off;
211 int async;
212
213 off = bp->b_blkno << DEV_BSHIFT;
214 DPRINTF(("rumpblk_strategy: 0x%x bytes %s off 0x%" PRIx64
215 " (0x%" PRIx64 " - 0x%" PRIx64")\n",
216 bp->b_bcount, BUF_ISREAD(bp) "READ" : "WRITE",
217 off, off, (off + bp->b_bcount)));
218
219 /*
220 * Do I/O. We have different paths for async and sync I/O.
221 * Async I/O is done by passing a request to rumpuser where
222 * it is executed. The rumpuser routine then calls
223 * biodone() to signal any waiters in the kernel. I/O's are
224 * executed in series. Technically executing them in parallel
225 * would produce better results, but then we'd need either
226 * more threads or posix aio. Maybe worth investigating
227 * this later.
228 *
229 * Synchronous I/O is done directly in the context mainly to
230 * avoid unnecessary scheduling with the I/O thread.
231 */
232 async = bp->b_flags & B_ASYNC;
233 if (async && rump_threads) {
234 struct rumpuser_aio *rua;
235
236 rumpuser_mutex_enter(&rumpuser_aio_mtx);
237 /*
238 * Check if our buffer is full. Doing it this way
239 * throttles the I/O a bit if we have a massive
240 * async I/O burst.
241 *
242 * XXX: this actually leads to deadlocks with spl()
243 * (caller maybe be at splbio() legally for async I/O),
244 * so for now set N_AIOS high and FIXXXME some day.
245 */
246 if ((rumpuser_aio_head+1) % N_AIOS == rumpuser_aio_tail) {
247 rumpuser_mutex_exit(&rumpuser_aio_mtx);
248 goto syncfallback;
249 }
250
251 rua = &rumpuser_aios[rumpuser_aio_head];
252 KASSERT(rua->rua_bp == NULL);
253 rua->rua_fd = rblk->rblk_fd;
254 rua->rua_data = bp->b_data;
255 rua->rua_dlen = bp->b_bcount;
256 rua->rua_off = off;
257 rua->rua_bp = bp;
258 rua->rua_op = BUF_ISREAD(bp);
259
260 /* insert into queue & signal */
261 rumpuser_aio_head = (rumpuser_aio_head+1) % (N_AIOS-1);
262 rumpuser_cv_signal(&rumpuser_aio_cv);
263 rumpuser_mutex_exit(&rumpuser_aio_mtx);
264 } else {
265 syncfallback:
266 if (BUF_ISREAD(bp)) {
267 rumpuser_read_bio(rblk->rblk_fd, bp->b_data,
268 bp->b_bcount, off, rump_biodone, bp);
269 } else {
270 rumpuser_write_bio(rblk->rblk_fd, bp->b_data,
271 bp->b_bcount, off, rump_biodone, bp);
272 }
273 if (!async) {
274 int error;
275
276 if (BUF_ISWRITE(bp))
277 rumpuser_fsync(rblk->rblk_fd, &error);
278 }
279 }
280 }
281