md.c revision 1.69 1 /* $NetBSD: md.c,v 1.69 2014/03/16 05:20:26 dholland Exp $ */
2
3 /*
4 * Copyright (c) 1995 Gordon W. Ross, Leo Weppelman.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28 /*
29 * This implements a general-purpose memory-disk.
30 * See md.h for notes on the config types.
31 *
32 * Note that this driver provides the same functionality
33 * as the MFS filesystem hack, but this is better because
34 * you can use this for any filesystem type you'd like!
35 *
36 * Credit for most of the kmem ramdisk code goes to:
37 * Leo Weppelman (atari) and Phil Nelson (pc532)
38 * Credit for the ideas behind the "user space memory" code goes
39 * to the authors of the MFS implementation.
40 */
41
42 #include <sys/cdefs.h>
43 __KERNEL_RCSID(0, "$NetBSD: md.c,v 1.69 2014/03/16 05:20:26 dholland Exp $");
44
45 #ifdef _KERNEL_OPT
46 #include "opt_md.h"
47 #else
48 #define MEMORY_DISK_SERVER 1
49 #endif
50
51 #include <sys/param.h>
52 #include <sys/kernel.h>
53 #include <sys/malloc.h>
54 #include <sys/systm.h>
55 #include <sys/buf.h>
56 #include <sys/bufq.h>
57 #include <sys/device.h>
58 #include <sys/disk.h>
59 #include <sys/stat.h>
60 #include <sys/proc.h>
61 #include <sys/conf.h>
62 #include <sys/disklabel.h>
63
64 #include <uvm/uvm_extern.h>
65
66 #include <dev/md.h>
67
68 /*
69 * The user-space functionality is included by default.
70 * Use `options MEMORY_DISK_SERVER=0' to turn it off.
71 */
72 #ifndef MEMORY_DISK_SERVER
73 #error MEMORY_DISK_SERVER should be defined by opt_md.h
74 #endif /* MEMORY_DISK_SERVER */
75
76 /*
77 * We should use the raw partition for ioctl.
78 */
79 #define MD_UNIT(unit) DISKUNIT(unit)
80
81 /* autoconfig stuff... */
82
83 struct md_softc {
84 device_t sc_dev; /* Self. */
85 struct disk sc_dkdev; /* hook for generic disk handling */
86 struct md_conf sc_md;
87 kmutex_t sc_lock; /* Protect self. */
88 kcondvar_t sc_cv; /* Wait here for work. */
89 struct bufq_state *sc_buflist;
90 };
91 /* shorthand for fields in sc_md: */
92 #define sc_addr sc_md.md_addr
93 #define sc_size sc_md.md_size
94 #define sc_type sc_md.md_type
95
96 void mdattach(int);
97
98 static void md_attach(device_t, device_t, void *);
99 static int md_detach(device_t, int);
100
101 static dev_type_open(mdopen);
102 static dev_type_close(mdclose);
103 static dev_type_read(mdread);
104 static dev_type_write(mdwrite);
105 static dev_type_ioctl(mdioctl);
106 static dev_type_strategy(mdstrategy);
107 static dev_type_size(mdsize);
108
109 const struct bdevsw md_bdevsw = {
110 .d_open = mdopen,
111 .d_close = mdclose,
112 .d_strategy = mdstrategy,
113 .d_ioctl = mdioctl,
114 .d_dump = nodump,
115 .d_psize = mdsize,
116 .d_flag = D_DISK | D_MPSAFE
117 };
118
119 const struct cdevsw md_cdevsw = {
120 .d_open = mdopen,
121 .d_close = mdclose,
122 .d_read = mdread,
123 .d_write = mdwrite,
124 .d_ioctl = mdioctl,
125 .d_stop = nostop,
126 .d_tty = notty,
127 .d_poll = nopoll,
128 .d_mmap = nommap,
129 .d_kqfilter = nokqfilter,
130 .d_flag = D_DISK
131 };
132
133 static struct dkdriver mddkdriver = { mdstrategy, NULL };
134
135 extern struct cfdriver md_cd;
136 CFATTACH_DECL3_NEW(md, sizeof(struct md_softc),
137 0, md_attach, md_detach, NULL, NULL, NULL, DVF_DETACH_SHUTDOWN);
138
139 static kmutex_t md_device_lock; /* Protect unit creation / deletion. */
140 extern size_t md_root_size;
141
142 static void md_set_disklabel(struct md_softc *);
143
144 /*
145 * This is called if we are configured as a pseudo-device
146 */
147 void
148 mdattach(int n)
149 {
150
151 mutex_init(&md_device_lock, MUTEX_DEFAULT, IPL_NONE);
152 if (config_cfattach_attach(md_cd.cd_name, &md_ca)) {
153 aprint_error("%s: cfattach_attach failed\n", md_cd.cd_name);
154 return;
155 }
156 }
157
158 static void
159 md_attach(device_t parent, device_t self, void *aux)
160 {
161 struct md_softc *sc = device_private(self);
162
163 sc->sc_dev = self;
164 sc->sc_type = MD_UNCONFIGURED;
165 mutex_init(&sc->sc_lock, MUTEX_DEFAULT, IPL_NONE);
166 cv_init(&sc->sc_cv, "mdidle");
167 bufq_alloc(&sc->sc_buflist, "fcfs", 0);
168
169 /* XXX - Could accept aux info here to set the config. */
170 #ifdef MEMORY_DISK_HOOKS
171 /*
172 * This external function might setup a pre-loaded disk.
173 * All it would need to do is setup the md_conf struct.
174 * See sys/dev/md_root.c for an example.
175 */
176 md_attach_hook(device_unit(self), &sc->sc_md);
177 #endif
178
179 /*
180 * Initialize and attach the disk structure.
181 */
182 disk_init(&sc->sc_dkdev, device_xname(self), &mddkdriver);
183 disk_attach(&sc->sc_dkdev);
184
185 if (sc->sc_type != MD_UNCONFIGURED)
186 md_set_disklabel(sc);
187
188 if (!pmf_device_register(self, NULL, NULL))
189 aprint_error_dev(self, "couldn't establish power handler\n");
190 }
191
192 static int
193 md_detach(device_t self, int flags)
194 {
195 struct md_softc *sc = device_private(self);
196 int rc;
197
198 rc = 0;
199 mutex_enter(&sc->sc_dkdev.dk_openlock);
200 if (sc->sc_dkdev.dk_openmask == 0 && sc->sc_type == MD_UNCONFIGURED)
201 ; /* nothing to do */
202 else if ((flags & DETACH_FORCE) == 0)
203 rc = EBUSY;
204 mutex_exit(&sc->sc_dkdev.dk_openlock);
205
206 if (rc != 0)
207 return rc;
208
209 pmf_device_deregister(self);
210 disk_detach(&sc->sc_dkdev);
211 disk_destroy(&sc->sc_dkdev);
212 bufq_free(sc->sc_buflist);
213 mutex_destroy(&sc->sc_lock);
214 cv_destroy(&sc->sc_cv);
215 return 0;
216 }
217
218 /*
219 * operational routines:
220 * open, close, read, write, strategy,
221 * ioctl, dump, size
222 */
223
224 #if MEMORY_DISK_SERVER
225 static int md_server_loop(struct md_softc *sc);
226 static int md_ioctl_server(struct md_softc *sc, struct md_conf *umd,
227 struct lwp *l);
228 #endif /* MEMORY_DISK_SERVER */
229 static int md_ioctl_kalloc(struct md_softc *sc, struct md_conf *umd,
230 struct lwp *l);
231
232 static int
233 mdsize(dev_t dev)
234 {
235 struct md_softc *sc;
236 int res;
237
238 sc = device_lookup_private(&md_cd, MD_UNIT(dev));
239 if (sc == NULL)
240 return 0;
241
242 mutex_enter(&sc->sc_lock);
243 if (sc->sc_type == MD_UNCONFIGURED)
244 res = 0;
245 else
246 res = sc->sc_size >> DEV_BSHIFT;
247 mutex_exit(&sc->sc_lock);
248
249 return res;
250 }
251
252 static int
253 mdopen(dev_t dev, int flag, int fmt, struct lwp *l)
254 {
255 int unit;
256 int part = DISKPART(dev);
257 int pmask = 1 << part;
258 cfdata_t cf;
259 struct md_softc *sc;
260 struct disk *dk;
261 #ifdef MEMORY_DISK_HOOKS
262 bool configured;
263 #endif
264
265 mutex_enter(&md_device_lock);
266 unit = MD_UNIT(dev);
267 sc = device_lookup_private(&md_cd, unit);
268 if (sc == NULL) {
269 if (part != RAW_PART) {
270 mutex_exit(&md_device_lock);
271 return ENXIO;
272 }
273 cf = malloc(sizeof(*cf), M_DEVBUF, M_WAITOK);
274 cf->cf_name = md_cd.cd_name;
275 cf->cf_atname = md_cd.cd_name;
276 cf->cf_unit = unit;
277 cf->cf_fstate = FSTATE_STAR;
278 sc = device_private(config_attach_pseudo(cf));
279 if (sc == NULL) {
280 mutex_exit(&md_device_lock);
281 return ENOMEM;
282 }
283 }
284
285 dk = &sc->sc_dkdev;
286
287 /*
288 * The raw partition is used for ioctl to configure.
289 */
290 if (part == RAW_PART)
291 goto ok;
292
293 #ifdef MEMORY_DISK_HOOKS
294 /* Call the open hook to allow loading the device. */
295 configured = (sc->sc_type != MD_UNCONFIGURED);
296 md_open_hook(unit, &sc->sc_md);
297 /* initialize disklabel if the device is configured in open hook */
298 if (!configured && sc->sc_type != MD_UNCONFIGURED)
299 md_set_disklabel(sc);
300 #endif
301
302 /*
303 * This is a normal, "slave" device, so
304 * enforce initialized.
305 */
306 if (sc->sc_type == MD_UNCONFIGURED) {
307 mutex_exit(&md_device_lock);
308 return ENXIO;
309 }
310
311 ok:
312 /* XXX duplicates code in dk_open(). Call dk_open(), instead? */
313 mutex_enter(&dk->dk_openlock);
314 /* Mark our unit as open. */
315 switch (fmt) {
316 case S_IFCHR:
317 dk->dk_copenmask |= pmask;
318 break;
319 case S_IFBLK:
320 dk->dk_bopenmask |= pmask;
321 break;
322 }
323
324 dk->dk_openmask = dk->dk_copenmask | dk->dk_bopenmask;
325
326 mutex_exit(&dk->dk_openlock);
327 mutex_exit(&md_device_lock);
328 return 0;
329 }
330
331 static int
332 mdclose(dev_t dev, int flag, int fmt, struct lwp *l)
333 {
334 int part = DISKPART(dev);
335 int pmask = 1 << part;
336 int error;
337 cfdata_t cf;
338 struct md_softc *sc;
339 struct disk *dk;
340
341 sc = device_lookup_private(&md_cd, MD_UNIT(dev));
342 if (sc == NULL)
343 return ENXIO;
344
345 dk = &sc->sc_dkdev;
346
347 mutex_enter(&dk->dk_openlock);
348
349 switch (fmt) {
350 case S_IFCHR:
351 dk->dk_copenmask &= ~pmask;
352 break;
353 case S_IFBLK:
354 dk->dk_bopenmask &= ~pmask;
355 break;
356 }
357 dk->dk_openmask = dk->dk_copenmask | dk->dk_bopenmask;
358 if (dk->dk_openmask != 0) {
359 mutex_exit(&dk->dk_openlock);
360 return 0;
361 }
362
363 mutex_exit(&dk->dk_openlock);
364
365 mutex_enter(&md_device_lock);
366 cf = device_cfdata(sc->sc_dev);
367 error = config_detach(sc->sc_dev, DETACH_QUIET);
368 if (! error)
369 free(cf, M_DEVBUF);
370 mutex_exit(&md_device_lock);
371 return error;
372 }
373
374 static int
375 mdread(dev_t dev, struct uio *uio, int flags)
376 {
377 struct md_softc *sc;
378
379 sc = device_lookup_private(&md_cd, MD_UNIT(dev));
380
381 if (sc == NULL || sc->sc_type == MD_UNCONFIGURED)
382 return ENXIO;
383
384 return (physio(mdstrategy, NULL, dev, B_READ, minphys, uio));
385 }
386
387 static int
388 mdwrite(dev_t dev, struct uio *uio, int flags)
389 {
390 struct md_softc *sc;
391
392 sc = device_lookup_private(&md_cd, MD_UNIT(dev));
393
394 if (sc == NULL || sc->sc_type == MD_UNCONFIGURED)
395 return ENXIO;
396
397 return (physio(mdstrategy, NULL, dev, B_WRITE, minphys, uio));
398 }
399
400 /*
401 * Handle I/O requests, either directly, or
402 * by passing them to the server process.
403 */
404 static void
405 mdstrategy(struct buf *bp)
406 {
407 struct md_softc *sc;
408 void * addr;
409 size_t off, xfer;
410 bool is_read;
411
412 sc = device_lookup_private(&md_cd, MD_UNIT(bp->b_dev));
413
414 mutex_enter(&sc->sc_lock);
415
416 if (sc == NULL || sc->sc_type == MD_UNCONFIGURED) {
417 bp->b_error = ENXIO;
418 goto done;
419 }
420
421 switch (sc->sc_type) {
422 #if MEMORY_DISK_SERVER
423 case MD_UMEM_SERVER:
424 /* Just add this job to the server's queue. */
425 bufq_put(sc->sc_buflist, bp);
426 cv_signal(&sc->sc_cv);
427 mutex_exit(&sc->sc_lock);
428 /* see md_server_loop() */
429 /* no biodone in this case */
430 return;
431 #endif /* MEMORY_DISK_SERVER */
432
433 case MD_KMEM_FIXED:
434 case MD_KMEM_ALLOCATED:
435 /* These are in kernel space. Access directly. */
436 is_read = ((bp->b_flags & B_READ) == B_READ);
437 bp->b_resid = bp->b_bcount;
438 off = (bp->b_blkno << DEV_BSHIFT);
439 if (off >= sc->sc_size) {
440 if (is_read)
441 break; /* EOF */
442 goto set_eio;
443 }
444 xfer = bp->b_resid;
445 if (xfer > (sc->sc_size - off))
446 xfer = (sc->sc_size - off);
447 addr = (char *)sc->sc_addr + off;
448 disk_busy(&sc->sc_dkdev);
449 if (is_read)
450 memcpy(bp->b_data, addr, xfer);
451 else
452 memcpy(addr, bp->b_data, xfer);
453 disk_unbusy(&sc->sc_dkdev, xfer, is_read);
454 bp->b_resid -= xfer;
455 break;
456
457 default:
458 bp->b_resid = bp->b_bcount;
459 set_eio:
460 bp->b_error = EIO;
461 break;
462 }
463
464 done:
465 mutex_exit(&sc->sc_lock);
466
467 biodone(bp);
468 }
469
470 static int
471 mdioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
472 {
473 struct md_softc *sc;
474 struct md_conf *umd;
475 struct disklabel *lp;
476 struct partinfo *pp;
477 int error;
478
479 if ((sc = device_lookup_private(&md_cd, MD_UNIT(dev))) == NULL)
480 return ENXIO;
481
482 mutex_enter(&sc->sc_lock);
483 if (sc->sc_type != MD_UNCONFIGURED) {
484 switch (cmd) {
485 case DIOCGDINFO:
486 lp = (struct disklabel *)data;
487 *lp = *sc->sc_dkdev.dk_label;
488 mutex_exit(&sc->sc_lock);
489 return 0;
490
491 case DIOCGPART:
492 pp = (struct partinfo *)data;
493 pp->disklab = sc->sc_dkdev.dk_label;
494 pp->part =
495 &sc->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
496 mutex_exit(&sc->sc_lock);
497 return 0;
498 }
499 }
500
501 /* If this is not the raw partition, punt! */
502 if (DISKPART(dev) != RAW_PART) {
503 mutex_exit(&sc->sc_lock);
504 return ENOTTY;
505 }
506
507 umd = (struct md_conf *)data;
508 error = EINVAL;
509 switch (cmd) {
510 case MD_GETCONF:
511 *umd = sc->sc_md;
512 error = 0;
513 break;
514
515 case MD_SETCONF:
516 /* Can only set it once. */
517 if (sc->sc_type != MD_UNCONFIGURED)
518 break;
519 switch (umd->md_type) {
520 case MD_KMEM_ALLOCATED:
521 error = md_ioctl_kalloc(sc, umd, l);
522 break;
523 #if MEMORY_DISK_SERVER
524 case MD_UMEM_SERVER:
525 error = md_ioctl_server(sc, umd, l);
526 break;
527 #endif /* MEMORY_DISK_SERVER */
528 default:
529 break;
530 }
531 break;
532 }
533 mutex_exit(&sc->sc_lock);
534 return error;
535 }
536
537 static void
538 md_set_disklabel(struct md_softc *sc)
539 {
540 struct disklabel *lp = sc->sc_dkdev.dk_label;
541 struct partition *pp;
542
543 memset(lp, 0, sizeof(*lp));
544
545 lp->d_secsize = DEV_BSIZE;
546 lp->d_secperunit = sc->sc_size / DEV_BSIZE;
547 if (lp->d_secperunit >= (32*64)) {
548 lp->d_nsectors = 32;
549 lp->d_ntracks = 64;
550 lp->d_ncylinders = lp->d_secperunit / (32*64);
551 } else {
552 lp->d_nsectors = 1;
553 lp->d_ntracks = 1;
554 lp->d_ncylinders = lp->d_secperunit;
555 }
556 lp->d_secpercyl = lp->d_ntracks*lp->d_nsectors;
557
558 strncpy(lp->d_typename, md_cd.cd_name, sizeof(lp->d_typename));
559 lp->d_type = DTYPE_UNKNOWN;
560 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
561 lp->d_rpm = 3600;
562 lp->d_interleave = 1;
563 lp->d_flags = 0;
564
565 pp = &lp->d_partitions[0];
566 pp->p_offset = 0;
567 pp->p_size = lp->d_secperunit;
568 pp->p_fstype = FS_BSDFFS;
569
570 pp = &lp->d_partitions[RAW_PART];
571 pp->p_offset = 0;
572 pp->p_size = lp->d_secperunit;
573 pp->p_fstype = FS_UNUSED;
574
575 lp->d_npartitions = RAW_PART+1;
576 lp->d_magic = DISKMAGIC;
577 lp->d_magic2 = DISKMAGIC;
578 lp->d_checksum = dkcksum(lp);
579 }
580
581 /*
582 * Handle ioctl MD_SETCONF for (sc_type == MD_KMEM_ALLOCATED)
583 * Just allocate some kernel memory and return.
584 */
585 static int
586 md_ioctl_kalloc(struct md_softc *sc, struct md_conf *umd,
587 struct lwp *l)
588 {
589 vaddr_t addr;
590 vsize_t size;
591
592 mutex_exit(&sc->sc_lock);
593
594 /* Sanity check the size. */
595 size = umd->md_size;
596 addr = uvm_km_alloc(kernel_map, size, 0, UVM_KMF_WIRED|UVM_KMF_ZERO);
597
598 mutex_enter(&sc->sc_lock);
599
600 if (!addr)
601 return ENOMEM;
602
603 /* If another thread beat us to configure this unit: fail. */
604 if (sc->sc_type != MD_UNCONFIGURED) {
605 uvm_km_free(kernel_map, addr, size, UVM_KMF_WIRED);
606 return EINVAL;
607 }
608
609 /* This unit is now configured. */
610 sc->sc_addr = (void *)addr; /* kernel space */
611 sc->sc_size = (size_t)size;
612 sc->sc_type = MD_KMEM_ALLOCATED;
613 md_set_disklabel(sc);
614 return 0;
615 }
616
617 #if MEMORY_DISK_SERVER
618
619 /*
620 * Handle ioctl MD_SETCONF for (sc_type == MD_UMEM_SERVER)
621 * Set config, then become the I/O server for this unit.
622 */
623 static int
624 md_ioctl_server(struct md_softc *sc, struct md_conf *umd,
625 struct lwp *l)
626 {
627 vaddr_t end;
628 int error;
629
630 KASSERT(mutex_owned(&sc->sc_lock));
631
632 /* Sanity check addr, size. */
633 end = (vaddr_t) ((char *)umd->md_addr + umd->md_size);
634
635 if ((end >= VM_MAXUSER_ADDRESS) ||
636 (end < ((vaddr_t) umd->md_addr)) )
637 return EINVAL;
638
639 /* This unit is now configured. */
640 sc->sc_addr = umd->md_addr; /* user space */
641 sc->sc_size = umd->md_size;
642 sc->sc_type = MD_UMEM_SERVER;
643 md_set_disklabel(sc);
644
645 /* Become the server daemon */
646 error = md_server_loop(sc);
647
648 /* This server is now going away! */
649 sc->sc_type = MD_UNCONFIGURED;
650 sc->sc_addr = 0;
651 sc->sc_size = 0;
652
653 return (error);
654 }
655
656 static int
657 md_server_loop(struct md_softc *sc)
658 {
659 struct buf *bp;
660 void *addr; /* user space address */
661 size_t off; /* offset into "device" */
662 size_t xfer; /* amount to transfer */
663 int error;
664 bool is_read;
665
666 KASSERT(mutex_owned(&sc->sc_lock));
667
668 for (;;) {
669 /* Wait for some work to arrive. */
670 while ((bp = bufq_get(sc->sc_buflist)) == NULL) {
671 error = cv_wait_sig(&sc->sc_cv, &sc->sc_lock);
672 if (error)
673 return error;
674 }
675
676 /* Do the transfer to/from user space. */
677 mutex_exit(&sc->sc_lock);
678 error = 0;
679 is_read = ((bp->b_flags & B_READ) == B_READ);
680 bp->b_resid = bp->b_bcount;
681 off = (bp->b_blkno << DEV_BSHIFT);
682 if (off >= sc->sc_size) {
683 if (is_read)
684 goto done; /* EOF (not an error) */
685 error = EIO;
686 goto done;
687 }
688 xfer = bp->b_resid;
689 if (xfer > (sc->sc_size - off))
690 xfer = (sc->sc_size - off);
691 addr = (char *)sc->sc_addr + off;
692 disk_busy(&sc->sc_dkdev);
693 if (is_read)
694 error = copyin(addr, bp->b_data, xfer);
695 else
696 error = copyout(bp->b_data, addr, xfer);
697 disk_unbusy(&sc->sc_dkdev, (error ? 0 : xfer), is_read);
698 if (!error)
699 bp->b_resid -= xfer;
700
701 done:
702 if (error) {
703 bp->b_error = error;
704 }
705 biodone(bp);
706 mutex_enter(&sc->sc_lock);
707 }
708 }
709 #endif /* MEMORY_DISK_SERVER */
710