rumpblk.c revision 1.1.2.2 1 1.1.2.2 mjf /* $NetBSD: rumpblk.c,v 1.1.2.2 2009/01/17 13:29:38 mjf Exp $ */
2 1.1.2.2 mjf
3 1.1.2.2 mjf /*
4 1.1.2.2 mjf * Copyright (c) 2009 Antti Kantee. All Rights Reserved.
5 1.1.2.2 mjf *
6 1.1.2.2 mjf * Development of this software was supported by the
7 1.1.2.2 mjf * Finnish Cultural Foundation.
8 1.1.2.2 mjf *
9 1.1.2.2 mjf * Redistribution and use in source and binary forms, with or without
10 1.1.2.2 mjf * modification, are permitted provided that the following conditions
11 1.1.2.2 mjf * are met:
12 1.1.2.2 mjf * 1. Redistributions of source code must retain the above copyright
13 1.1.2.2 mjf * notice, this list of conditions and the following disclaimer.
14 1.1.2.2 mjf * 2. Redistributions in binary form must reproduce the above copyright
15 1.1.2.2 mjf * notice, this list of conditions and the following disclaimer in the
16 1.1.2.2 mjf * documentation and/or other materials provided with the distribution.
17 1.1.2.2 mjf *
18 1.1.2.2 mjf * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
19 1.1.2.2 mjf * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20 1.1.2.2 mjf * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 1.1.2.2 mjf * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 1.1.2.2 mjf * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 1.1.2.2 mjf * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 1.1.2.2 mjf * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 1.1.2.2 mjf * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 1.1.2.2 mjf * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 1.1.2.2 mjf * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 1.1.2.2 mjf * SUCH DAMAGE.
29 1.1.2.2 mjf */
30 1.1.2.2 mjf
31 1.1.2.2 mjf /*
32 1.1.2.2 mjf * Block device emulation. Presents a block device interface and
33 1.1.2.2 mjf * uses rumpuser system calls to satisfy I/O requests.
34 1.1.2.2 mjf */
35 1.1.2.2 mjf
36 1.1.2.2 mjf #include <sys/cdefs.h>
37 1.1.2.2 mjf __KERNEL_RCSID(0, "$NetBSD: rumpblk.c,v 1.1.2.2 2009/01/17 13:29:38 mjf Exp $");
38 1.1.2.2 mjf
39 1.1.2.2 mjf #include <sys/param.h>
40 1.1.2.2 mjf #include <sys/buf.h>
41 1.1.2.2 mjf #include <sys/conf.h>
42 1.1.2.2 mjf #include <sys/disklabel.h>
43 1.1.2.2 mjf #include <sys/fcntl.h>
44 1.1.2.2 mjf #include <sys/kmem.h>
45 1.1.2.2 mjf #include <sys/malloc.h>
46 1.1.2.2 mjf #include <sys/stat.h>
47 1.1.2.2 mjf
48 1.1.2.2 mjf #include <rump/rumpuser.h>
49 1.1.2.2 mjf
50 1.1.2.2 mjf #include "rump_private.h"
51 1.1.2.2 mjf #include "rump_vfs_private.h"
52 1.1.2.2 mjf
53 1.1.2.2 mjf #define RUMPBLK_SIZE 16
54 1.1.2.2 mjf static struct rblkdev {
55 1.1.2.2 mjf char *rblk_path;
56 1.1.2.2 mjf int rblk_fd;
57 1.1.2.2 mjf
58 1.1.2.2 mjf struct partition *rblk_curpi;
59 1.1.2.2 mjf struct partition rblk_pi;
60 1.1.2.2 mjf struct disklabel rblk_dl;
61 1.1.2.2 mjf } minors[RUMPBLK_SIZE];
62 1.1.2.2 mjf
63 1.1.2.2 mjf dev_type_open(rumpblk_open);
64 1.1.2.2 mjf dev_type_close(rumpblk_close);
65 1.1.2.2 mjf dev_type_read(rumpblk_read);
66 1.1.2.2 mjf dev_type_write(rumpblk_write);
67 1.1.2.2 mjf dev_type_ioctl(rumpblk_ioctl);
68 1.1.2.2 mjf dev_type_strategy(rumpblk_strategy);
69 1.1.2.2 mjf dev_type_dump(rumpblk_dump);
70 1.1.2.2 mjf dev_type_size(rumpblk_size);
71 1.1.2.2 mjf
72 1.1.2.2 mjf static const struct bdevsw rumpblk_bdevsw = {
73 1.1.2.2 mjf rumpblk_open, rumpblk_close, rumpblk_strategy, rumpblk_ioctl,
74 1.1.2.2 mjf nodump, nosize, D_DISK
75 1.1.2.2 mjf };
76 1.1.2.2 mjf
77 1.1.2.2 mjf static const struct cdevsw rumpblk_cdevsw = {
78 1.1.2.2 mjf rumpblk_open, rumpblk_close, rumpblk_read, rumpblk_write,
79 1.1.2.2 mjf rumpblk_ioctl, nostop, notty, nopoll, nommap, nokqfilter, D_DISK
80 1.1.2.2 mjf };
81 1.1.2.2 mjf
82 1.1.2.2 mjf /* XXX: not mpsafe */
83 1.1.2.2 mjf
84 1.1.2.2 mjf int
85 1.1.2.2 mjf rumpblk_init()
86 1.1.2.2 mjf {
87 1.1.2.2 mjf int rumpblk = RUMPBLK;
88 1.1.2.2 mjf
89 1.1.2.2 mjf return devsw_attach("rumpblk", &rumpblk_bdevsw, &rumpblk,
90 1.1.2.2 mjf &rumpblk_cdevsw, &rumpblk);
91 1.1.2.2 mjf }
92 1.1.2.2 mjf
93 1.1.2.2 mjf int
94 1.1.2.2 mjf rumpblk_register(const char *path)
95 1.1.2.2 mjf {
96 1.1.2.2 mjf size_t len;
97 1.1.2.2 mjf int i;
98 1.1.2.2 mjf
99 1.1.2.2 mjf for (i = 0; i < RUMPBLK_SIZE; i++)
100 1.1.2.2 mjf if (minors[i].rblk_path && strcmp(minors[i].rblk_path, path)==0)
101 1.1.2.2 mjf return i;
102 1.1.2.2 mjf
103 1.1.2.2 mjf for (i = 0; i < RUMPBLK_SIZE; i++)
104 1.1.2.2 mjf if (minors[i].rblk_path == NULL)
105 1.1.2.2 mjf break;
106 1.1.2.2 mjf if (i == RUMPBLK_SIZE)
107 1.1.2.2 mjf return -1;
108 1.1.2.2 mjf
109 1.1.2.2 mjf len = strlen(path);
110 1.1.2.2 mjf minors[i].rblk_path = malloc(len+1, M_TEMP, M_WAITOK);
111 1.1.2.2 mjf strcpy(minors[i].rblk_path, path);
112 1.1.2.2 mjf minors[i].rblk_fd = -1;
113 1.1.2.2 mjf return i;
114 1.1.2.2 mjf }
115 1.1.2.2 mjf
116 1.1.2.2 mjf int
117 1.1.2.2 mjf rumpblk_open(dev_t dev, int flag, int fmt, struct lwp *l)
118 1.1.2.2 mjf {
119 1.1.2.2 mjf struct rblkdev *rblk = &minors[minor(dev)];
120 1.1.2.2 mjf struct stat sb;
121 1.1.2.2 mjf int error, fd;
122 1.1.2.2 mjf
123 1.1.2.2 mjf KASSERT(rblk->rblk_fd == -1);
124 1.1.2.2 mjf fd = rumpuser_open(rblk->rblk_path, OFLAGS(flag), &error);
125 1.1.2.2 mjf if (error)
126 1.1.2.2 mjf return error;
127 1.1.2.2 mjf
128 1.1.2.2 mjf /*
129 1.1.2.2 mjf * Setup partition info. First try the usual. */
130 1.1.2.2 mjf if (rumpuser_ioctl(fd, DIOCGDINFO, &rblk->rblk_dl, &error) != -1) {
131 1.1.2.2 mjf /*
132 1.1.2.2 mjf * If that works, use it. We still need to guess
133 1.1.2.2 mjf * which partition we are on.
134 1.1.2.2 mjf */
135 1.1.2.2 mjf rblk->rblk_curpi = &rblk->rblk_dl.d_partitions[0];
136 1.1.2.2 mjf } else {
137 1.1.2.2 mjf /*
138 1.1.2.2 mjf * If that didn't work, assume were a regular file
139 1.1.2.2 mjf * and just try to fake the info the best we can.
140 1.1.2.2 mjf */
141 1.1.2.2 mjf memset(&rblk->rblk_dl, 0, sizeof(rblk->rblk_dl));
142 1.1.2.2 mjf
143 1.1.2.2 mjf if (rumpuser_stat(rblk->rblk_path, &sb, &error) == -1) {
144 1.1.2.2 mjf int dummy;
145 1.1.2.2 mjf
146 1.1.2.2 mjf rumpuser_close(fd, &dummy);
147 1.1.2.2 mjf return error;
148 1.1.2.2 mjf }
149 1.1.2.2 mjf rblk->rblk_pi.p_size = sb.st_size >> DEV_BSHIFT;
150 1.1.2.2 mjf rblk->rblk_dl.d_secsize = DEV_BSIZE;
151 1.1.2.2 mjf rblk->rblk_curpi = &rblk->rblk_pi;
152 1.1.2.2 mjf }
153 1.1.2.2 mjf rblk->rblk_fd = fd;
154 1.1.2.2 mjf
155 1.1.2.2 mjf return 0;
156 1.1.2.2 mjf }
157 1.1.2.2 mjf
158 1.1.2.2 mjf int
159 1.1.2.2 mjf rumpblk_close(dev_t dev, int flag, int fmt, struct lwp *l)
160 1.1.2.2 mjf {
161 1.1.2.2 mjf struct rblkdev *rblk = &minors[minor(dev)];
162 1.1.2.2 mjf int dummy;
163 1.1.2.2 mjf
164 1.1.2.2 mjf rumpuser_close(rblk->rblk_fd, &dummy);
165 1.1.2.2 mjf rblk->rblk_fd = -1;
166 1.1.2.2 mjf
167 1.1.2.2 mjf return 0;
168 1.1.2.2 mjf }
169 1.1.2.2 mjf
170 1.1.2.2 mjf int
171 1.1.2.2 mjf rumpblk_ioctl(dev_t dev, u_long xfer, void *addr, int flag, struct lwp *l)
172 1.1.2.2 mjf {
173 1.1.2.2 mjf struct rblkdev *rblk = &minors[minor(dev)];
174 1.1.2.2 mjf int rv, error;
175 1.1.2.2 mjf
176 1.1.2.2 mjf if (xfer == DIOCGPART) {
177 1.1.2.2 mjf struct partinfo *pi = (struct partinfo *)addr;
178 1.1.2.2 mjf
179 1.1.2.2 mjf pi->part = rblk->rblk_curpi;
180 1.1.2.2 mjf pi->disklab = &rblk->rblk_dl;
181 1.1.2.2 mjf
182 1.1.2.2 mjf return 0;
183 1.1.2.2 mjf }
184 1.1.2.2 mjf
185 1.1.2.2 mjf rv = rumpuser_ioctl(rblk->rblk_fd, xfer, addr, &error);
186 1.1.2.2 mjf if (rv == -1)
187 1.1.2.2 mjf return error;
188 1.1.2.2 mjf
189 1.1.2.2 mjf return 0;
190 1.1.2.2 mjf }
191 1.1.2.2 mjf
192 1.1.2.2 mjf int
193 1.1.2.2 mjf rumpblk_read(dev_t dev, struct uio *uio, int flags)
194 1.1.2.2 mjf {
195 1.1.2.2 mjf
196 1.1.2.2 mjf panic("%s: unimplemented", __func__);
197 1.1.2.2 mjf }
198 1.1.2.2 mjf
199 1.1.2.2 mjf int
200 1.1.2.2 mjf rumpblk_write(dev_t dev, struct uio *uio, int flags)
201 1.1.2.2 mjf {
202 1.1.2.2 mjf
203 1.1.2.2 mjf panic("%s: unimplemented", __func__);
204 1.1.2.2 mjf }
205 1.1.2.2 mjf
206 1.1.2.2 mjf void
207 1.1.2.2 mjf rumpblk_strategy(struct buf *bp)
208 1.1.2.2 mjf {
209 1.1.2.2 mjf struct rblkdev *rblk = &minors[minor(bp->b_dev)];
210 1.1.2.2 mjf off_t off;
211 1.1.2.2 mjf int async;
212 1.1.2.2 mjf
213 1.1.2.2 mjf off = bp->b_blkno << DEV_BSHIFT;
214 1.1.2.2 mjf DPRINTF(("rumpblk_strategy: 0x%x bytes %s off 0x%" PRIx64
215 1.1.2.2 mjf " (0x%" PRIx64 " - 0x%" PRIx64")\n",
216 1.1.2.2 mjf bp->b_bcount, BUF_ISREAD(bp) "READ" : "WRITE",
217 1.1.2.2 mjf off, off, (off + bp->b_bcount)));
218 1.1.2.2 mjf
219 1.1.2.2 mjf /*
220 1.1.2.2 mjf * Do I/O. We have different paths for async and sync I/O.
221 1.1.2.2 mjf * Async I/O is done by passing a request to rumpuser where
222 1.1.2.2 mjf * it is executed. The rumpuser routine then calls
223 1.1.2.2 mjf * biodone() to signal any waiters in the kernel. I/O's are
224 1.1.2.2 mjf * executed in series. Technically executing them in parallel
225 1.1.2.2 mjf * would produce better results, but then we'd need either
226 1.1.2.2 mjf * more threads or posix aio. Maybe worth investigating
227 1.1.2.2 mjf * this later.
228 1.1.2.2 mjf *
229 1.1.2.2 mjf * Synchronous I/O is done directly in the context mainly to
230 1.1.2.2 mjf * avoid unnecessary scheduling with the I/O thread.
231 1.1.2.2 mjf */
232 1.1.2.2 mjf async = bp->b_flags & B_ASYNC;
233 1.1.2.2 mjf if (async && rump_threads) {
234 1.1.2.2 mjf struct rumpuser_aio *rua;
235 1.1.2.2 mjf
236 1.1.2.2 mjf rua = kmem_alloc(sizeof(struct rumpuser_aio), KM_SLEEP);
237 1.1.2.2 mjf rua->rua_fd = rblk->rblk_fd;
238 1.1.2.2 mjf rua->rua_data = bp->b_data;
239 1.1.2.2 mjf rua->rua_dlen = bp->b_bcount;
240 1.1.2.2 mjf rua->rua_off = off;
241 1.1.2.2 mjf rua->rua_bp = bp;
242 1.1.2.2 mjf rua->rua_op = BUF_ISREAD(bp);
243 1.1.2.2 mjf
244 1.1.2.2 mjf rumpuser_mutex_enter(&rumpuser_aio_mtx);
245 1.1.2.2 mjf
246 1.1.2.2 mjf /*
247 1.1.2.2 mjf * Check if our buffer is full. Doing it this way
248 1.1.2.2 mjf * throttles the I/O a bit if we have a massive
249 1.1.2.2 mjf * async I/O burst.
250 1.1.2.2 mjf *
251 1.1.2.2 mjf * XXX: this actually leads to deadlocks with spl()
252 1.1.2.2 mjf * (caller maybe be at splbio() legally for async I/O),
253 1.1.2.2 mjf * so for now set N_AIOS high and FIXXXME some day.
254 1.1.2.2 mjf */
255 1.1.2.2 mjf if ((rumpuser_aio_head+1) % N_AIOS == rumpuser_aio_tail) {
256 1.1.2.2 mjf kmem_free(rua, sizeof(*rua));
257 1.1.2.2 mjf rumpuser_mutex_exit(&rumpuser_aio_mtx);
258 1.1.2.2 mjf goto syncfallback;
259 1.1.2.2 mjf }
260 1.1.2.2 mjf
261 1.1.2.2 mjf /* insert into queue & signal */
262 1.1.2.2 mjf rumpuser_aios[rumpuser_aio_head] = rua;
263 1.1.2.2 mjf rumpuser_aio_head = (rumpuser_aio_head+1) % (N_AIOS-1);
264 1.1.2.2 mjf rumpuser_cv_signal(&rumpuser_aio_cv);
265 1.1.2.2 mjf rumpuser_mutex_exit(&rumpuser_aio_mtx);
266 1.1.2.2 mjf } else {
267 1.1.2.2 mjf syncfallback:
268 1.1.2.2 mjf if (BUF_ISREAD(bp)) {
269 1.1.2.2 mjf rumpuser_read_bio(rblk->rblk_fd, bp->b_data,
270 1.1.2.2 mjf bp->b_bcount, off, rump_biodone, bp);
271 1.1.2.2 mjf } else {
272 1.1.2.2 mjf rumpuser_write_bio(rblk->rblk_fd, bp->b_data,
273 1.1.2.2 mjf bp->b_bcount, off, rump_biodone, bp);
274 1.1.2.2 mjf }
275 1.1.2.2 mjf if (!async) {
276 1.1.2.2 mjf int error;
277 1.1.2.2 mjf
278 1.1.2.2 mjf if (BUF_ISWRITE(bp))
279 1.1.2.2 mjf rumpuser_fsync(rblk->rblk_fd, &error);
280 1.1.2.2 mjf }
281 1.1.2.2 mjf }
282 1.1.2.2 mjf }
283