md.c revision 1.70 1 /* $NetBSD: md.c,v 1.70 2014/07/25 08:02:19 dholland Exp $ */
2
3 /*
4 * Copyright (c) 1995 Gordon W. Ross, Leo Weppelman.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28 /*
29 * This implements a general-purpose memory-disk.
30 * See md.h for notes on the config types.
31 *
32 * Note that this driver provides the same functionality
33 * as the MFS filesystem hack, but this is better because
34 * you can use this for any filesystem type you'd like!
35 *
36 * Credit for most of the kmem ramdisk code goes to:
37 * Leo Weppelman (atari) and Phil Nelson (pc532)
38 * Credit for the ideas behind the "user space memory" code goes
39 * to the authors of the MFS implementation.
40 */
41
42 #include <sys/cdefs.h>
43 __KERNEL_RCSID(0, "$NetBSD: md.c,v 1.70 2014/07/25 08:02:19 dholland Exp $");
44
45 #ifdef _KERNEL_OPT
46 #include "opt_md.h"
47 #else
48 #define MEMORY_DISK_SERVER 1
49 #endif
50
51 #include <sys/param.h>
52 #include <sys/kernel.h>
53 #include <sys/malloc.h>
54 #include <sys/systm.h>
55 #include <sys/buf.h>
56 #include <sys/bufq.h>
57 #include <sys/device.h>
58 #include <sys/disk.h>
59 #include <sys/stat.h>
60 #include <sys/proc.h>
61 #include <sys/conf.h>
62 #include <sys/disklabel.h>
63
64 #include <uvm/uvm_extern.h>
65
66 #include <dev/md.h>
67
68 /*
69 * The user-space functionality is included by default.
70 * Use `options MEMORY_DISK_SERVER=0' to turn it off.
71 */
72 #ifndef MEMORY_DISK_SERVER
73 #error MEMORY_DISK_SERVER should be defined by opt_md.h
74 #endif /* MEMORY_DISK_SERVER */
75
76 /*
77 * We should use the raw partition for ioctl.
78 */
79 #define MD_UNIT(unit) DISKUNIT(unit)
80
81 /* autoconfig stuff... */
82
83 struct md_softc {
84 device_t sc_dev; /* Self. */
85 struct disk sc_dkdev; /* hook for generic disk handling */
86 struct md_conf sc_md;
87 kmutex_t sc_lock; /* Protect self. */
88 kcondvar_t sc_cv; /* Wait here for work. */
89 struct bufq_state *sc_buflist;
90 };
91 /* shorthand for fields in sc_md: */
92 #define sc_addr sc_md.md_addr
93 #define sc_size sc_md.md_size
94 #define sc_type sc_md.md_type
95
96 void mdattach(int);
97
98 static void md_attach(device_t, device_t, void *);
99 static int md_detach(device_t, int);
100
101 static dev_type_open(mdopen);
102 static dev_type_close(mdclose);
103 static dev_type_read(mdread);
104 static dev_type_write(mdwrite);
105 static dev_type_ioctl(mdioctl);
106 static dev_type_strategy(mdstrategy);
107 static dev_type_size(mdsize);
108
109 const struct bdevsw md_bdevsw = {
110 .d_open = mdopen,
111 .d_close = mdclose,
112 .d_strategy = mdstrategy,
113 .d_ioctl = mdioctl,
114 .d_dump = nodump,
115 .d_psize = mdsize,
116 .d_discard = nodiscard,
117 .d_flag = D_DISK | D_MPSAFE
118 };
119
120 const struct cdevsw md_cdevsw = {
121 .d_open = mdopen,
122 .d_close = mdclose,
123 .d_read = mdread,
124 .d_write = mdwrite,
125 .d_ioctl = mdioctl,
126 .d_stop = nostop,
127 .d_tty = notty,
128 .d_poll = nopoll,
129 .d_mmap = nommap,
130 .d_kqfilter = nokqfilter,
131 .d_flag = D_DISK
132 };
133
134 static struct dkdriver mddkdriver = { mdstrategy, NULL };
135
136 extern struct cfdriver md_cd;
137 CFATTACH_DECL3_NEW(md, sizeof(struct md_softc),
138 0, md_attach, md_detach, NULL, NULL, NULL, DVF_DETACH_SHUTDOWN);
139
140 static kmutex_t md_device_lock; /* Protect unit creation / deletion. */
141 extern size_t md_root_size;
142
143 static void md_set_disklabel(struct md_softc *);
144
145 /*
146 * This is called if we are configured as a pseudo-device
147 */
148 void
149 mdattach(int n)
150 {
151
152 mutex_init(&md_device_lock, MUTEX_DEFAULT, IPL_NONE);
153 if (config_cfattach_attach(md_cd.cd_name, &md_ca)) {
154 aprint_error("%s: cfattach_attach failed\n", md_cd.cd_name);
155 return;
156 }
157 }
158
159 static void
160 md_attach(device_t parent, device_t self, void *aux)
161 {
162 struct md_softc *sc = device_private(self);
163
164 sc->sc_dev = self;
165 sc->sc_type = MD_UNCONFIGURED;
166 mutex_init(&sc->sc_lock, MUTEX_DEFAULT, IPL_NONE);
167 cv_init(&sc->sc_cv, "mdidle");
168 bufq_alloc(&sc->sc_buflist, "fcfs", 0);
169
170 /* XXX - Could accept aux info here to set the config. */
171 #ifdef MEMORY_DISK_HOOKS
172 /*
173 * This external function might setup a pre-loaded disk.
174 * All it would need to do is setup the md_conf struct.
175 * See sys/dev/md_root.c for an example.
176 */
177 md_attach_hook(device_unit(self), &sc->sc_md);
178 #endif
179
180 /*
181 * Initialize and attach the disk structure.
182 */
183 disk_init(&sc->sc_dkdev, device_xname(self), &mddkdriver);
184 disk_attach(&sc->sc_dkdev);
185
186 if (sc->sc_type != MD_UNCONFIGURED)
187 md_set_disklabel(sc);
188
189 if (!pmf_device_register(self, NULL, NULL))
190 aprint_error_dev(self, "couldn't establish power handler\n");
191 }
192
193 static int
194 md_detach(device_t self, int flags)
195 {
196 struct md_softc *sc = device_private(self);
197 int rc;
198
199 rc = 0;
200 mutex_enter(&sc->sc_dkdev.dk_openlock);
201 if (sc->sc_dkdev.dk_openmask == 0 && sc->sc_type == MD_UNCONFIGURED)
202 ; /* nothing to do */
203 else if ((flags & DETACH_FORCE) == 0)
204 rc = EBUSY;
205 mutex_exit(&sc->sc_dkdev.dk_openlock);
206
207 if (rc != 0)
208 return rc;
209
210 pmf_device_deregister(self);
211 disk_detach(&sc->sc_dkdev);
212 disk_destroy(&sc->sc_dkdev);
213 bufq_free(sc->sc_buflist);
214 mutex_destroy(&sc->sc_lock);
215 cv_destroy(&sc->sc_cv);
216 return 0;
217 }
218
219 /*
220 * operational routines:
221 * open, close, read, write, strategy,
222 * ioctl, dump, size
223 */
224
225 #if MEMORY_DISK_SERVER
226 static int md_server_loop(struct md_softc *sc);
227 static int md_ioctl_server(struct md_softc *sc, struct md_conf *umd,
228 struct lwp *l);
229 #endif /* MEMORY_DISK_SERVER */
230 static int md_ioctl_kalloc(struct md_softc *sc, struct md_conf *umd,
231 struct lwp *l);
232
233 static int
234 mdsize(dev_t dev)
235 {
236 struct md_softc *sc;
237 int res;
238
239 sc = device_lookup_private(&md_cd, MD_UNIT(dev));
240 if (sc == NULL)
241 return 0;
242
243 mutex_enter(&sc->sc_lock);
244 if (sc->sc_type == MD_UNCONFIGURED)
245 res = 0;
246 else
247 res = sc->sc_size >> DEV_BSHIFT;
248 mutex_exit(&sc->sc_lock);
249
250 return res;
251 }
252
253 static int
254 mdopen(dev_t dev, int flag, int fmt, struct lwp *l)
255 {
256 int unit;
257 int part = DISKPART(dev);
258 int pmask = 1 << part;
259 cfdata_t cf;
260 struct md_softc *sc;
261 struct disk *dk;
262 #ifdef MEMORY_DISK_HOOKS
263 bool configured;
264 #endif
265
266 mutex_enter(&md_device_lock);
267 unit = MD_UNIT(dev);
268 sc = device_lookup_private(&md_cd, unit);
269 if (sc == NULL) {
270 if (part != RAW_PART) {
271 mutex_exit(&md_device_lock);
272 return ENXIO;
273 }
274 cf = malloc(sizeof(*cf), M_DEVBUF, M_WAITOK);
275 cf->cf_name = md_cd.cd_name;
276 cf->cf_atname = md_cd.cd_name;
277 cf->cf_unit = unit;
278 cf->cf_fstate = FSTATE_STAR;
279 sc = device_private(config_attach_pseudo(cf));
280 if (sc == NULL) {
281 mutex_exit(&md_device_lock);
282 return ENOMEM;
283 }
284 }
285
286 dk = &sc->sc_dkdev;
287
288 /*
289 * The raw partition is used for ioctl to configure.
290 */
291 if (part == RAW_PART)
292 goto ok;
293
294 #ifdef MEMORY_DISK_HOOKS
295 /* Call the open hook to allow loading the device. */
296 configured = (sc->sc_type != MD_UNCONFIGURED);
297 md_open_hook(unit, &sc->sc_md);
298 /* initialize disklabel if the device is configured in open hook */
299 if (!configured && sc->sc_type != MD_UNCONFIGURED)
300 md_set_disklabel(sc);
301 #endif
302
303 /*
304 * This is a normal, "slave" device, so
305 * enforce initialized.
306 */
307 if (sc->sc_type == MD_UNCONFIGURED) {
308 mutex_exit(&md_device_lock);
309 return ENXIO;
310 }
311
312 ok:
313 /* XXX duplicates code in dk_open(). Call dk_open(), instead? */
314 mutex_enter(&dk->dk_openlock);
315 /* Mark our unit as open. */
316 switch (fmt) {
317 case S_IFCHR:
318 dk->dk_copenmask |= pmask;
319 break;
320 case S_IFBLK:
321 dk->dk_bopenmask |= pmask;
322 break;
323 }
324
325 dk->dk_openmask = dk->dk_copenmask | dk->dk_bopenmask;
326
327 mutex_exit(&dk->dk_openlock);
328 mutex_exit(&md_device_lock);
329 return 0;
330 }
331
332 static int
333 mdclose(dev_t dev, int flag, int fmt, struct lwp *l)
334 {
335 int part = DISKPART(dev);
336 int pmask = 1 << part;
337 int error;
338 cfdata_t cf;
339 struct md_softc *sc;
340 struct disk *dk;
341
342 sc = device_lookup_private(&md_cd, MD_UNIT(dev));
343 if (sc == NULL)
344 return ENXIO;
345
346 dk = &sc->sc_dkdev;
347
348 mutex_enter(&dk->dk_openlock);
349
350 switch (fmt) {
351 case S_IFCHR:
352 dk->dk_copenmask &= ~pmask;
353 break;
354 case S_IFBLK:
355 dk->dk_bopenmask &= ~pmask;
356 break;
357 }
358 dk->dk_openmask = dk->dk_copenmask | dk->dk_bopenmask;
359 if (dk->dk_openmask != 0) {
360 mutex_exit(&dk->dk_openlock);
361 return 0;
362 }
363
364 mutex_exit(&dk->dk_openlock);
365
366 mutex_enter(&md_device_lock);
367 cf = device_cfdata(sc->sc_dev);
368 error = config_detach(sc->sc_dev, DETACH_QUIET);
369 if (! error)
370 free(cf, M_DEVBUF);
371 mutex_exit(&md_device_lock);
372 return error;
373 }
374
375 static int
376 mdread(dev_t dev, struct uio *uio, int flags)
377 {
378 struct md_softc *sc;
379
380 sc = device_lookup_private(&md_cd, MD_UNIT(dev));
381
382 if (sc == NULL || sc->sc_type == MD_UNCONFIGURED)
383 return ENXIO;
384
385 return (physio(mdstrategy, NULL, dev, B_READ, minphys, uio));
386 }
387
388 static int
389 mdwrite(dev_t dev, struct uio *uio, int flags)
390 {
391 struct md_softc *sc;
392
393 sc = device_lookup_private(&md_cd, MD_UNIT(dev));
394
395 if (sc == NULL || sc->sc_type == MD_UNCONFIGURED)
396 return ENXIO;
397
398 return (physio(mdstrategy, NULL, dev, B_WRITE, minphys, uio));
399 }
400
401 /*
402 * Handle I/O requests, either directly, or
403 * by passing them to the server process.
404 */
405 static void
406 mdstrategy(struct buf *bp)
407 {
408 struct md_softc *sc;
409 void * addr;
410 size_t off, xfer;
411 bool is_read;
412
413 sc = device_lookup_private(&md_cd, MD_UNIT(bp->b_dev));
414
415 mutex_enter(&sc->sc_lock);
416
417 if (sc == NULL || sc->sc_type == MD_UNCONFIGURED) {
418 bp->b_error = ENXIO;
419 goto done;
420 }
421
422 switch (sc->sc_type) {
423 #if MEMORY_DISK_SERVER
424 case MD_UMEM_SERVER:
425 /* Just add this job to the server's queue. */
426 bufq_put(sc->sc_buflist, bp);
427 cv_signal(&sc->sc_cv);
428 mutex_exit(&sc->sc_lock);
429 /* see md_server_loop() */
430 /* no biodone in this case */
431 return;
432 #endif /* MEMORY_DISK_SERVER */
433
434 case MD_KMEM_FIXED:
435 case MD_KMEM_ALLOCATED:
436 /* These are in kernel space. Access directly. */
437 is_read = ((bp->b_flags & B_READ) == B_READ);
438 bp->b_resid = bp->b_bcount;
439 off = (bp->b_blkno << DEV_BSHIFT);
440 if (off >= sc->sc_size) {
441 if (is_read)
442 break; /* EOF */
443 goto set_eio;
444 }
445 xfer = bp->b_resid;
446 if (xfer > (sc->sc_size - off))
447 xfer = (sc->sc_size - off);
448 addr = (char *)sc->sc_addr + off;
449 disk_busy(&sc->sc_dkdev);
450 if (is_read)
451 memcpy(bp->b_data, addr, xfer);
452 else
453 memcpy(addr, bp->b_data, xfer);
454 disk_unbusy(&sc->sc_dkdev, xfer, is_read);
455 bp->b_resid -= xfer;
456 break;
457
458 default:
459 bp->b_resid = bp->b_bcount;
460 set_eio:
461 bp->b_error = EIO;
462 break;
463 }
464
465 done:
466 mutex_exit(&sc->sc_lock);
467
468 biodone(bp);
469 }
470
471 static int
472 mdioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
473 {
474 struct md_softc *sc;
475 struct md_conf *umd;
476 struct disklabel *lp;
477 struct partinfo *pp;
478 int error;
479
480 if ((sc = device_lookup_private(&md_cd, MD_UNIT(dev))) == NULL)
481 return ENXIO;
482
483 mutex_enter(&sc->sc_lock);
484 if (sc->sc_type != MD_UNCONFIGURED) {
485 switch (cmd) {
486 case DIOCGDINFO:
487 lp = (struct disklabel *)data;
488 *lp = *sc->sc_dkdev.dk_label;
489 mutex_exit(&sc->sc_lock);
490 return 0;
491
492 case DIOCGPART:
493 pp = (struct partinfo *)data;
494 pp->disklab = sc->sc_dkdev.dk_label;
495 pp->part =
496 &sc->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
497 mutex_exit(&sc->sc_lock);
498 return 0;
499 }
500 }
501
502 /* If this is not the raw partition, punt! */
503 if (DISKPART(dev) != RAW_PART) {
504 mutex_exit(&sc->sc_lock);
505 return ENOTTY;
506 }
507
508 umd = (struct md_conf *)data;
509 error = EINVAL;
510 switch (cmd) {
511 case MD_GETCONF:
512 *umd = sc->sc_md;
513 error = 0;
514 break;
515
516 case MD_SETCONF:
517 /* Can only set it once. */
518 if (sc->sc_type != MD_UNCONFIGURED)
519 break;
520 switch (umd->md_type) {
521 case MD_KMEM_ALLOCATED:
522 error = md_ioctl_kalloc(sc, umd, l);
523 break;
524 #if MEMORY_DISK_SERVER
525 case MD_UMEM_SERVER:
526 error = md_ioctl_server(sc, umd, l);
527 break;
528 #endif /* MEMORY_DISK_SERVER */
529 default:
530 break;
531 }
532 break;
533 }
534 mutex_exit(&sc->sc_lock);
535 return error;
536 }
537
538 static void
539 md_set_disklabel(struct md_softc *sc)
540 {
541 struct disklabel *lp = sc->sc_dkdev.dk_label;
542 struct partition *pp;
543
544 memset(lp, 0, sizeof(*lp));
545
546 lp->d_secsize = DEV_BSIZE;
547 lp->d_secperunit = sc->sc_size / DEV_BSIZE;
548 if (lp->d_secperunit >= (32*64)) {
549 lp->d_nsectors = 32;
550 lp->d_ntracks = 64;
551 lp->d_ncylinders = lp->d_secperunit / (32*64);
552 } else {
553 lp->d_nsectors = 1;
554 lp->d_ntracks = 1;
555 lp->d_ncylinders = lp->d_secperunit;
556 }
557 lp->d_secpercyl = lp->d_ntracks*lp->d_nsectors;
558
559 strncpy(lp->d_typename, md_cd.cd_name, sizeof(lp->d_typename));
560 lp->d_type = DTYPE_UNKNOWN;
561 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
562 lp->d_rpm = 3600;
563 lp->d_interleave = 1;
564 lp->d_flags = 0;
565
566 pp = &lp->d_partitions[0];
567 pp->p_offset = 0;
568 pp->p_size = lp->d_secperunit;
569 pp->p_fstype = FS_BSDFFS;
570
571 pp = &lp->d_partitions[RAW_PART];
572 pp->p_offset = 0;
573 pp->p_size = lp->d_secperunit;
574 pp->p_fstype = FS_UNUSED;
575
576 lp->d_npartitions = RAW_PART+1;
577 lp->d_magic = DISKMAGIC;
578 lp->d_magic2 = DISKMAGIC;
579 lp->d_checksum = dkcksum(lp);
580 }
581
582 /*
583 * Handle ioctl MD_SETCONF for (sc_type == MD_KMEM_ALLOCATED)
584 * Just allocate some kernel memory and return.
585 */
586 static int
587 md_ioctl_kalloc(struct md_softc *sc, struct md_conf *umd,
588 struct lwp *l)
589 {
590 vaddr_t addr;
591 vsize_t size;
592
593 mutex_exit(&sc->sc_lock);
594
595 /* Sanity check the size. */
596 size = umd->md_size;
597 addr = uvm_km_alloc(kernel_map, size, 0, UVM_KMF_WIRED|UVM_KMF_ZERO);
598
599 mutex_enter(&sc->sc_lock);
600
601 if (!addr)
602 return ENOMEM;
603
604 /* If another thread beat us to configure this unit: fail. */
605 if (sc->sc_type != MD_UNCONFIGURED) {
606 uvm_km_free(kernel_map, addr, size, UVM_KMF_WIRED);
607 return EINVAL;
608 }
609
610 /* This unit is now configured. */
611 sc->sc_addr = (void *)addr; /* kernel space */
612 sc->sc_size = (size_t)size;
613 sc->sc_type = MD_KMEM_ALLOCATED;
614 md_set_disklabel(sc);
615 return 0;
616 }
617
618 #if MEMORY_DISK_SERVER
619
620 /*
621 * Handle ioctl MD_SETCONF for (sc_type == MD_UMEM_SERVER)
622 * Set config, then become the I/O server for this unit.
623 */
624 static int
625 md_ioctl_server(struct md_softc *sc, struct md_conf *umd,
626 struct lwp *l)
627 {
628 vaddr_t end;
629 int error;
630
631 KASSERT(mutex_owned(&sc->sc_lock));
632
633 /* Sanity check addr, size. */
634 end = (vaddr_t) ((char *)umd->md_addr + umd->md_size);
635
636 if ((end >= VM_MAXUSER_ADDRESS) ||
637 (end < ((vaddr_t) umd->md_addr)) )
638 return EINVAL;
639
640 /* This unit is now configured. */
641 sc->sc_addr = umd->md_addr; /* user space */
642 sc->sc_size = umd->md_size;
643 sc->sc_type = MD_UMEM_SERVER;
644 md_set_disklabel(sc);
645
646 /* Become the server daemon */
647 error = md_server_loop(sc);
648
649 /* This server is now going away! */
650 sc->sc_type = MD_UNCONFIGURED;
651 sc->sc_addr = 0;
652 sc->sc_size = 0;
653
654 return (error);
655 }
656
657 static int
658 md_server_loop(struct md_softc *sc)
659 {
660 struct buf *bp;
661 void *addr; /* user space address */
662 size_t off; /* offset into "device" */
663 size_t xfer; /* amount to transfer */
664 int error;
665 bool is_read;
666
667 KASSERT(mutex_owned(&sc->sc_lock));
668
669 for (;;) {
670 /* Wait for some work to arrive. */
671 while ((bp = bufq_get(sc->sc_buflist)) == NULL) {
672 error = cv_wait_sig(&sc->sc_cv, &sc->sc_lock);
673 if (error)
674 return error;
675 }
676
677 /* Do the transfer to/from user space. */
678 mutex_exit(&sc->sc_lock);
679 error = 0;
680 is_read = ((bp->b_flags & B_READ) == B_READ);
681 bp->b_resid = bp->b_bcount;
682 off = (bp->b_blkno << DEV_BSHIFT);
683 if (off >= sc->sc_size) {
684 if (is_read)
685 goto done; /* EOF (not an error) */
686 error = EIO;
687 goto done;
688 }
689 xfer = bp->b_resid;
690 if (xfer > (sc->sc_size - off))
691 xfer = (sc->sc_size - off);
692 addr = (char *)sc->sc_addr + off;
693 disk_busy(&sc->sc_dkdev);
694 if (is_read)
695 error = copyin(addr, bp->b_data, xfer);
696 else
697 error = copyout(bp->b_data, addr, xfer);
698 disk_unbusy(&sc->sc_dkdev, (error ? 0 : xfer), is_read);
699 if (!error)
700 bp->b_resid -= xfer;
701
702 done:
703 if (error) {
704 bp->b_error = error;
705 }
706 biodone(bp);
707 mutex_enter(&sc->sc_lock);
708 }
709 }
710 #endif /* MEMORY_DISK_SERVER */
711