md.c revision 1.64 1 /* $NetBSD: md.c,v 1.64 2010/11/22 21:10:10 pooka Exp $ */
2
3 /*
4 * Copyright (c) 1995 Gordon W. Ross, Leo Weppelman.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28 /*
29 * This implements a general-purpose memory-disk.
30 * See md.h for notes on the config types.
31 *
32 * Note that this driver provides the same functionality
33 * as the MFS filesystem hack, but this is better because
34 * you can use this for any filesystem type you'd like!
35 *
36 * Credit for most of the kmem ramdisk code goes to:
37 * Leo Weppelman (atari) and Phil Nelson (pc532)
38 * Credit for the ideas behind the "user space memory" code goes
39 * to the authors of the MFS implementation.
40 */
41
42 #include <sys/cdefs.h>
43 __KERNEL_RCSID(0, "$NetBSD: md.c,v 1.64 2010/11/22 21:10:10 pooka Exp $");
44
45 #ifdef _KERNEL_OPT
46 #include "opt_md.h"
47 #else
48 #define MEMORY_DISK_SERVER 1
49 #endif
50
51 #include <sys/param.h>
52 #include <sys/kernel.h>
53 #include <sys/malloc.h>
54 #include <sys/systm.h>
55 #include <sys/buf.h>
56 #include <sys/bufq.h>
57 #include <sys/device.h>
58 #include <sys/disk.h>
59 #include <sys/stat.h>
60 #include <sys/proc.h>
61 #include <sys/conf.h>
62 #include <sys/disklabel.h>
63
64 #include <uvm/uvm_extern.h>
65
66 #include <dev/md.h>
67
68 /*
69 * The user-space functionality is included by default.
70 * Use `options MEMORY_DISK_SERVER=0' to turn it off.
71 */
72 #ifndef MEMORY_DISK_SERVER
73 #error MEMORY_DISK_SERVER should be defined by opt_md.h
74 #endif /* MEMORY_DISK_SERVER */
75
76 /*
77 * We should use the raw partition for ioctl.
78 */
79 #define MD_UNIT(unit) DISKUNIT(unit)
80
81 /* autoconfig stuff... */
82
83 struct md_softc {
84 device_t sc_dev; /* Self. */
85 struct disk sc_dkdev; /* hook for generic disk handling */
86 struct md_conf sc_md;
87 struct bufq_state *sc_buflist;
88 };
89 /* shorthand for fields in sc_md: */
90 #define sc_addr sc_md.md_addr
91 #define sc_size sc_md.md_size
92 #define sc_type sc_md.md_type
93
94 void mdattach(int);
95
96 static void md_attach(device_t, device_t, void *);
97 static int md_detach(device_t, int);
98
99 static dev_type_open(mdopen);
100 static dev_type_close(mdclose);
101 static dev_type_read(mdread);
102 static dev_type_write(mdwrite);
103 static dev_type_ioctl(mdioctl);
104 static dev_type_strategy(mdstrategy);
105 static dev_type_size(mdsize);
106
107 const struct bdevsw md_bdevsw = {
108 mdopen, mdclose, mdstrategy, mdioctl, nodump, mdsize, D_DISK
109 };
110
111 const struct cdevsw md_cdevsw = {
112 mdopen, mdclose, mdread, mdwrite, mdioctl,
113 nostop, notty, nopoll, nommap, nokqfilter, D_DISK
114 };
115
116 static struct dkdriver mddkdriver = { mdstrategy, NULL };
117
118 extern struct cfdriver md_cd;
119 CFATTACH_DECL3_NEW(md, sizeof(struct md_softc),
120 0, md_attach, md_detach, NULL, NULL, NULL, DVF_DETACH_SHUTDOWN);
121
122 static kmutex_t md_device_lock; /* Protect unit creation / deletion. */
123 extern size_t md_root_size;
124
125 static void md_set_disklabel(struct md_softc *);
126
127 /*
128 * This is called if we are configured as a pseudo-device
129 */
130 void
131 mdattach(int n)
132 {
133
134 mutex_init(&md_device_lock, MUTEX_DEFAULT, IPL_NONE);
135 if (config_cfattach_attach(md_cd.cd_name, &md_ca)) {
136 aprint_error("%s: cfattach_attach failed\n", md_cd.cd_name);
137 return;
138 }
139 }
140
141 static void
142 md_attach(device_t parent, device_t self, void *aux)
143 {
144 struct md_softc *sc = device_private(self);
145
146 sc->sc_dev = self;
147 bufq_alloc(&sc->sc_buflist, "fcfs", 0);
148
149 /* XXX - Could accept aux info here to set the config. */
150 #ifdef MEMORY_DISK_HOOKS
151 /*
152 * This external function might setup a pre-loaded disk.
153 * All it would need to do is setup the md_conf struct.
154 * See sys/dev/md_root.c for an example.
155 */
156 md_attach_hook(device_unit(self), &sc->sc_md);
157 #endif
158
159 /*
160 * Initialize and attach the disk structure.
161 */
162 disk_init(&sc->sc_dkdev, device_xname(self), &mddkdriver);
163 disk_attach(&sc->sc_dkdev);
164
165 if (sc->sc_type != MD_UNCONFIGURED)
166 md_set_disklabel(sc);
167
168 if (!pmf_device_register(self, NULL, NULL))
169 aprint_error_dev(self, "couldn't establish power handler\n");
170 }
171
172 static int
173 md_detach(device_t self, int flags)
174 {
175 struct md_softc *sc = device_private(self);
176 int rc;
177
178 rc = 0;
179 mutex_enter(&sc->sc_dkdev.dk_openlock);
180 if (sc->sc_dkdev.dk_openmask == 0)
181 ; /* nothing to do */
182 else if ((flags & DETACH_FORCE) == 0)
183 rc = EBUSY;
184 mutex_exit(&sc->sc_dkdev.dk_openlock);
185
186 if (rc != 0)
187 return rc;
188
189 pmf_device_deregister(self);
190 disk_detach(&sc->sc_dkdev);
191 disk_destroy(&sc->sc_dkdev);
192 bufq_free(sc->sc_buflist);
193 return 0;
194 }
195
196 /*
197 * operational routines:
198 * open, close, read, write, strategy,
199 * ioctl, dump, size
200 */
201
202 #if MEMORY_DISK_SERVER
203 static int md_server_loop(struct md_softc *sc);
204 static int md_ioctl_server(struct md_softc *sc, struct md_conf *umd,
205 struct lwp *l);
206 #endif /* MEMORY_DISK_SERVER */
207 static int md_ioctl_kalloc(struct md_softc *sc, struct md_conf *umd,
208 struct lwp *l);
209
210 static int
211 mdsize(dev_t dev)
212 {
213 struct md_softc *sc;
214
215 sc = device_lookup_private(&md_cd, MD_UNIT(dev));
216 if (sc == NULL)
217 return 0;
218
219 if (sc->sc_type == MD_UNCONFIGURED)
220 return 0;
221
222 return (sc->sc_size >> DEV_BSHIFT);
223 }
224
225 static int
226 mdopen(dev_t dev, int flag, int fmt, struct lwp *l)
227 {
228 int unit;
229 int part = DISKPART(dev);
230 int pmask = 1 << part;
231 cfdata_t cf;
232 struct md_softc *sc;
233 struct disk *dk;
234
235 mutex_enter(&md_device_lock);
236 unit = MD_UNIT(dev);
237 sc = device_lookup_private(&md_cd, unit);
238 if (sc == NULL) {
239 if (part != RAW_PART) {
240 mutex_exit(&md_device_lock);
241 return ENXIO;
242 }
243 cf = malloc(sizeof(*cf), M_DEVBUF, M_WAITOK);
244 cf->cf_name = md_cd.cd_name;
245 cf->cf_atname = md_cd.cd_name;
246 cf->cf_unit = unit;
247 cf->cf_fstate = FSTATE_STAR;
248 sc = device_private(config_attach_pseudo(cf));
249 if (sc == NULL) {
250 mutex_exit(&md_device_lock);
251 return ENOMEM;
252 }
253 }
254
255 dk = &sc->sc_dkdev;
256
257 /*
258 * The raw partition is used for ioctl to configure.
259 */
260 if (part == RAW_PART)
261 goto ok;
262
263 #ifdef MEMORY_DISK_HOOKS
264 /* Call the open hook to allow loading the device. */
265 md_open_hook(unit, &sc->sc_md);
266 #endif
267
268 /*
269 * This is a normal, "slave" device, so
270 * enforce initialized.
271 */
272 if (sc->sc_type == MD_UNCONFIGURED) {
273 mutex_exit(&md_device_lock);
274 return ENXIO;
275 }
276
277 ok:
278 /* XXX duplicates code in dk_open(). Call dk_open(), instead? */
279 mutex_enter(&dk->dk_openlock);
280 /* Mark our unit as open. */
281 switch (fmt) {
282 case S_IFCHR:
283 dk->dk_copenmask |= pmask;
284 break;
285 case S_IFBLK:
286 dk->dk_bopenmask |= pmask;
287 break;
288 }
289
290 dk->dk_openmask = dk->dk_copenmask | dk->dk_bopenmask;
291
292 mutex_exit(&dk->dk_openlock);
293 mutex_exit(&md_device_lock);
294 return 0;
295 }
296
297 static int
298 mdclose(dev_t dev, int flag, int fmt, struct lwp *l)
299 {
300 int part = DISKPART(dev);
301 int pmask = 1 << part;
302 int error;
303 cfdata_t cf;
304 struct md_softc *sc;
305 struct disk *dk;
306
307 sc = device_lookup_private(&md_cd, MD_UNIT(dev));
308 if (sc == NULL)
309 return ENXIO;
310
311 dk = &sc->sc_dkdev;
312
313 mutex_enter(&dk->dk_openlock);
314
315 switch (fmt) {
316 case S_IFCHR:
317 dk->dk_copenmask &= ~pmask;
318 break;
319 case S_IFBLK:
320 dk->dk_bopenmask &= ~pmask;
321 break;
322 }
323 dk->dk_openmask = dk->dk_copenmask | dk->dk_bopenmask;
324
325 mutex_exit(&dk->dk_openlock);
326
327 mutex_enter(&md_device_lock);
328 cf = device_cfdata(sc->sc_dev);
329 error = config_detach(sc->sc_dev, DETACH_QUIET);
330 if (! error)
331 free(cf, M_DEVBUF);
332 mutex_exit(&md_device_lock);
333 return error;
334 }
335
336 static int
337 mdread(dev_t dev, struct uio *uio, int flags)
338 {
339 struct md_softc *sc;
340
341 sc = device_lookup_private(&md_cd, MD_UNIT(dev));
342
343 if (sc == NULL || sc->sc_type == MD_UNCONFIGURED)
344 return ENXIO;
345
346 return (physio(mdstrategy, NULL, dev, B_READ, minphys, uio));
347 }
348
349 static int
350 mdwrite(dev_t dev, struct uio *uio, int flags)
351 {
352 struct md_softc *sc;
353
354 sc = device_lookup_private(&md_cd, MD_UNIT(dev));
355
356 if (sc == NULL || sc->sc_type == MD_UNCONFIGURED)
357 return ENXIO;
358
359 return (physio(mdstrategy, NULL, dev, B_WRITE, minphys, uio));
360 }
361
362 /*
363 * Handle I/O requests, either directly, or
364 * by passing them to the server process.
365 */
366 static void
367 mdstrategy(struct buf *bp)
368 {
369 struct md_softc *sc;
370 void * addr;
371 size_t off, xfer;
372 bool is_read;
373
374 sc = device_lookup_private(&md_cd, MD_UNIT(bp->b_dev));
375
376 if (sc == NULL || sc->sc_type == MD_UNCONFIGURED) {
377 bp->b_error = ENXIO;
378 goto done;
379 }
380
381 switch (sc->sc_type) {
382 #if MEMORY_DISK_SERVER
383 case MD_UMEM_SERVER:
384 /* Just add this job to the server's queue. */
385 bufq_put(sc->sc_buflist, bp);
386 wakeup((void *)sc);
387 /* see md_server_loop() */
388 /* no biodone in this case */
389 return;
390 #endif /* MEMORY_DISK_SERVER */
391
392 case MD_KMEM_FIXED:
393 case MD_KMEM_ALLOCATED:
394 /* These are in kernel space. Access directly. */
395 is_read = ((bp->b_flags & B_READ) == B_READ);
396 bp->b_resid = bp->b_bcount;
397 off = (bp->b_blkno << DEV_BSHIFT);
398 if (off >= sc->sc_size) {
399 if (is_read)
400 break; /* EOF */
401 goto set_eio;
402 }
403 xfer = bp->b_resid;
404 if (xfer > (sc->sc_size - off))
405 xfer = (sc->sc_size - off);
406 addr = (char *)sc->sc_addr + off;
407 disk_busy(&sc->sc_dkdev);
408 if (is_read)
409 memcpy(bp->b_data, addr, xfer);
410 else
411 memcpy(addr, bp->b_data, xfer);
412 disk_unbusy(&sc->sc_dkdev, xfer, is_read);
413 bp->b_resid -= xfer;
414 break;
415
416 default:
417 bp->b_resid = bp->b_bcount;
418 set_eio:
419 bp->b_error = EIO;
420 break;
421 }
422 done:
423 biodone(bp);
424 }
425
426 static int
427 mdioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
428 {
429 struct md_softc *sc;
430 struct md_conf *umd;
431 struct disklabel *lp;
432 struct partinfo *pp;
433
434 if ((sc = device_lookup_private(&md_cd, MD_UNIT(dev))) == NULL)
435 return ENXIO;
436
437 if (sc->sc_type != MD_UNCONFIGURED) {
438 switch (cmd) {
439 case DIOCGDINFO:
440 lp = (struct disklabel *)data;
441 *lp = *sc->sc_dkdev.dk_label;
442 return 0;
443
444 case DIOCGPART:
445 pp = (struct partinfo *)data;
446 pp->disklab = sc->sc_dkdev.dk_label;
447 pp->part =
448 &sc->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
449 return 0;
450 }
451 }
452
453 /* If this is not the raw partition, punt! */
454 if (DISKPART(dev) != RAW_PART)
455 return ENOTTY;
456
457 umd = (struct md_conf *)data;
458 switch (cmd) {
459 case MD_GETCONF:
460 *umd = sc->sc_md;
461 return 0;
462
463 case MD_SETCONF:
464 /* Can only set it once. */
465 if (sc->sc_type != MD_UNCONFIGURED)
466 break;
467 switch (umd->md_type) {
468 case MD_KMEM_ALLOCATED:
469 return md_ioctl_kalloc(sc, umd, l);
470 #if MEMORY_DISK_SERVER
471 case MD_UMEM_SERVER:
472 return md_ioctl_server(sc, umd, l);
473 #endif /* MEMORY_DISK_SERVER */
474 default:
475 break;
476 }
477 break;
478 }
479 return EINVAL;
480 }
481
482 static void
483 md_set_disklabel(struct md_softc *sc)
484 {
485 struct disklabel *lp = sc->sc_dkdev.dk_label;
486 struct partition *pp;
487
488 memset(lp, 0, sizeof(*lp));
489
490 lp->d_secsize = DEV_BSIZE;
491 lp->d_secperunit = sc->sc_size / DEV_BSIZE;
492 if (lp->d_secperunit >= (32*64)) {
493 lp->d_nsectors = 32;
494 lp->d_ntracks = 64;
495 lp->d_ncylinders = lp->d_secperunit / (32*64);
496 } else {
497 lp->d_nsectors = 1;
498 lp->d_ntracks = 1;
499 lp->d_ncylinders = lp->d_secperunit;
500 }
501 lp->d_secpercyl = lp->d_ntracks*lp->d_nsectors;
502
503 strncpy(lp->d_typename, md_cd.cd_name, sizeof(lp->d_typename));
504 lp->d_type = DTYPE_UNKNOWN;
505 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
506 lp->d_rpm = 3600;
507 lp->d_interleave = 1;
508 lp->d_flags = 0;
509
510 pp = &lp->d_partitions[0];
511 pp->p_offset = 0;
512 pp->p_size = lp->d_secperunit;
513 pp->p_fstype = FS_BSDFFS;
514
515 pp = &lp->d_partitions[RAW_PART];
516 pp->p_offset = 0;
517 pp->p_size = lp->d_secperunit;
518 pp->p_fstype = FS_UNUSED;
519
520 lp->d_npartitions = RAW_PART+1;
521 lp->d_magic = DISKMAGIC;
522 lp->d_magic2 = DISKMAGIC;
523 lp->d_checksum = dkcksum(lp);
524 }
525
526 /*
527 * Handle ioctl MD_SETCONF for (sc_type == MD_KMEM_ALLOCATED)
528 * Just allocate some kernel memory and return.
529 */
530 static int
531 md_ioctl_kalloc(struct md_softc *sc, struct md_conf *umd,
532 struct lwp *l)
533 {
534 vaddr_t addr;
535 vsize_t size;
536
537 /* Sanity check the size. */
538 size = umd->md_size;
539 addr = uvm_km_alloc(kernel_map, size, 0, UVM_KMF_WIRED|UVM_KMF_ZERO);
540 if (!addr)
541 return ENOMEM;
542
543 /* This unit is now configured. */
544 sc->sc_addr = (void *)addr; /* kernel space */
545 sc->sc_size = (size_t)size;
546 sc->sc_type = MD_KMEM_ALLOCATED;
547 md_set_disklabel(sc);
548 return 0;
549 }
550
551 #if MEMORY_DISK_SERVER
552
553 /*
554 * Handle ioctl MD_SETCONF for (sc_type == MD_UMEM_SERVER)
555 * Set config, then become the I/O server for this unit.
556 */
557 static int
558 md_ioctl_server(struct md_softc *sc, struct md_conf *umd,
559 struct lwp *l)
560 {
561 vaddr_t end;
562 int error;
563
564 /* Sanity check addr, size. */
565 end = (vaddr_t) ((char *)umd->md_addr + umd->md_size);
566
567 if ((end >= VM_MAXUSER_ADDRESS) ||
568 (end < ((vaddr_t) umd->md_addr)) )
569 return EINVAL;
570
571 /* This unit is now configured. */
572 sc->sc_addr = umd->md_addr; /* user space */
573 sc->sc_size = umd->md_size;
574 sc->sc_type = MD_UMEM_SERVER;
575 md_set_disklabel(sc);
576
577 /* Become the server daemon */
578 error = md_server_loop(sc);
579
580 /* This server is now going away! */
581 sc->sc_type = MD_UNCONFIGURED;
582 sc->sc_addr = 0;
583 sc->sc_size = 0;
584
585 return (error);
586 }
587
588 static int md_sleep_pri = PWAIT | PCATCH;
589
590 static int
591 md_server_loop(struct md_softc *sc)
592 {
593 struct buf *bp;
594 void *addr; /* user space address */
595 size_t off; /* offset into "device" */
596 size_t xfer; /* amount to transfer */
597 int error;
598 bool is_read;
599
600 for (;;) {
601 /* Wait for some work to arrive. */
602 while ((bp = bufq_get(sc->sc_buflist)) == NULL) {
603 error = tsleep((void *)sc, md_sleep_pri, "md_idle", 0);
604 if (error)
605 return error;
606 }
607
608 /* Do the transfer to/from user space. */
609 error = 0;
610 is_read = ((bp->b_flags & B_READ) == B_READ);
611 bp->b_resid = bp->b_bcount;
612 off = (bp->b_blkno << DEV_BSHIFT);
613 if (off >= sc->sc_size) {
614 if (is_read)
615 goto done; /* EOF (not an error) */
616 error = EIO;
617 goto done;
618 }
619 xfer = bp->b_resid;
620 if (xfer > (sc->sc_size - off))
621 xfer = (sc->sc_size - off);
622 addr = (char *)sc->sc_addr + off;
623 disk_busy(&sc->sc_dkdev);
624 if (is_read)
625 error = copyin(addr, bp->b_data, xfer);
626 else
627 error = copyout(bp->b_data, addr, xfer);
628 disk_unbusy(&sc->sc_dkdev, (error ? 0 : xfer), is_read);
629 if (!error)
630 bp->b_resid -= xfer;
631
632 done:
633 if (error) {
634 bp->b_error = error;
635 }
636 biodone(bp);
637 }
638 }
639 #endif /* MEMORY_DISK_SERVER */
640