md.c revision 1.71 1 /* $NetBSD: md.c,v 1.71 2014/07/25 08:10:35 dholland Exp $ */
2
3 /*
4 * Copyright (c) 1995 Gordon W. Ross, Leo Weppelman.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28 /*
29 * This implements a general-purpose memory-disk.
30 * See md.h for notes on the config types.
31 *
32 * Note that this driver provides the same functionality
33 * as the MFS filesystem hack, but this is better because
34 * you can use this for any filesystem type you'd like!
35 *
36 * Credit for most of the kmem ramdisk code goes to:
37 * Leo Weppelman (atari) and Phil Nelson (pc532)
38 * Credit for the ideas behind the "user space memory" code goes
39 * to the authors of the MFS implementation.
40 */
41
42 #include <sys/cdefs.h>
43 __KERNEL_RCSID(0, "$NetBSD: md.c,v 1.71 2014/07/25 08:10:35 dholland Exp $");
44
45 #ifdef _KERNEL_OPT
46 #include "opt_md.h"
47 #else
48 #define MEMORY_DISK_SERVER 1
49 #endif
50
51 #include <sys/param.h>
52 #include <sys/kernel.h>
53 #include <sys/malloc.h>
54 #include <sys/systm.h>
55 #include <sys/buf.h>
56 #include <sys/bufq.h>
57 #include <sys/device.h>
58 #include <sys/disk.h>
59 #include <sys/stat.h>
60 #include <sys/proc.h>
61 #include <sys/conf.h>
62 #include <sys/disklabel.h>
63
64 #include <uvm/uvm_extern.h>
65
66 #include <dev/md.h>
67
68 /*
69 * The user-space functionality is included by default.
70 * Use `options MEMORY_DISK_SERVER=0' to turn it off.
71 */
72 #ifndef MEMORY_DISK_SERVER
73 #error MEMORY_DISK_SERVER should be defined by opt_md.h
74 #endif /* MEMORY_DISK_SERVER */
75
76 /*
77 * We should use the raw partition for ioctl.
78 */
79 #define MD_UNIT(unit) DISKUNIT(unit)
80
81 /* autoconfig stuff... */
82
83 struct md_softc {
84 device_t sc_dev; /* Self. */
85 struct disk sc_dkdev; /* hook for generic disk handling */
86 struct md_conf sc_md;
87 kmutex_t sc_lock; /* Protect self. */
88 kcondvar_t sc_cv; /* Wait here for work. */
89 struct bufq_state *sc_buflist;
90 };
91 /* shorthand for fields in sc_md: */
92 #define sc_addr sc_md.md_addr
93 #define sc_size sc_md.md_size
94 #define sc_type sc_md.md_type
95
96 void mdattach(int);
97
98 static void md_attach(device_t, device_t, void *);
99 static int md_detach(device_t, int);
100
101 static dev_type_open(mdopen);
102 static dev_type_close(mdclose);
103 static dev_type_read(mdread);
104 static dev_type_write(mdwrite);
105 static dev_type_ioctl(mdioctl);
106 static dev_type_strategy(mdstrategy);
107 static dev_type_size(mdsize);
108
109 const struct bdevsw md_bdevsw = {
110 .d_open = mdopen,
111 .d_close = mdclose,
112 .d_strategy = mdstrategy,
113 .d_ioctl = mdioctl,
114 .d_dump = nodump,
115 .d_psize = mdsize,
116 .d_discard = nodiscard,
117 .d_flag = D_DISK | D_MPSAFE
118 };
119
120 const struct cdevsw md_cdevsw = {
121 .d_open = mdopen,
122 .d_close = mdclose,
123 .d_read = mdread,
124 .d_write = mdwrite,
125 .d_ioctl = mdioctl,
126 .d_stop = nostop,
127 .d_tty = notty,
128 .d_poll = nopoll,
129 .d_mmap = nommap,
130 .d_kqfilter = nokqfilter,
131 .d_discard = nodiscard,
132 .d_flag = D_DISK
133 };
134
135 static struct dkdriver mddkdriver = { mdstrategy, NULL };
136
137 extern struct cfdriver md_cd;
138 CFATTACH_DECL3_NEW(md, sizeof(struct md_softc),
139 0, md_attach, md_detach, NULL, NULL, NULL, DVF_DETACH_SHUTDOWN);
140
141 static kmutex_t md_device_lock; /* Protect unit creation / deletion. */
142 extern size_t md_root_size;
143
144 static void md_set_disklabel(struct md_softc *);
145
146 /*
147 * This is called if we are configured as a pseudo-device
148 */
149 void
150 mdattach(int n)
151 {
152
153 mutex_init(&md_device_lock, MUTEX_DEFAULT, IPL_NONE);
154 if (config_cfattach_attach(md_cd.cd_name, &md_ca)) {
155 aprint_error("%s: cfattach_attach failed\n", md_cd.cd_name);
156 return;
157 }
158 }
159
160 static void
161 md_attach(device_t parent, device_t self, void *aux)
162 {
163 struct md_softc *sc = device_private(self);
164
165 sc->sc_dev = self;
166 sc->sc_type = MD_UNCONFIGURED;
167 mutex_init(&sc->sc_lock, MUTEX_DEFAULT, IPL_NONE);
168 cv_init(&sc->sc_cv, "mdidle");
169 bufq_alloc(&sc->sc_buflist, "fcfs", 0);
170
171 /* XXX - Could accept aux info here to set the config. */
172 #ifdef MEMORY_DISK_HOOKS
173 /*
174 * This external function might setup a pre-loaded disk.
175 * All it would need to do is setup the md_conf struct.
176 * See sys/dev/md_root.c for an example.
177 */
178 md_attach_hook(device_unit(self), &sc->sc_md);
179 #endif
180
181 /*
182 * Initialize and attach the disk structure.
183 */
184 disk_init(&sc->sc_dkdev, device_xname(self), &mddkdriver);
185 disk_attach(&sc->sc_dkdev);
186
187 if (sc->sc_type != MD_UNCONFIGURED)
188 md_set_disklabel(sc);
189
190 if (!pmf_device_register(self, NULL, NULL))
191 aprint_error_dev(self, "couldn't establish power handler\n");
192 }
193
194 static int
195 md_detach(device_t self, int flags)
196 {
197 struct md_softc *sc = device_private(self);
198 int rc;
199
200 rc = 0;
201 mutex_enter(&sc->sc_dkdev.dk_openlock);
202 if (sc->sc_dkdev.dk_openmask == 0 && sc->sc_type == MD_UNCONFIGURED)
203 ; /* nothing to do */
204 else if ((flags & DETACH_FORCE) == 0)
205 rc = EBUSY;
206 mutex_exit(&sc->sc_dkdev.dk_openlock);
207
208 if (rc != 0)
209 return rc;
210
211 pmf_device_deregister(self);
212 disk_detach(&sc->sc_dkdev);
213 disk_destroy(&sc->sc_dkdev);
214 bufq_free(sc->sc_buflist);
215 mutex_destroy(&sc->sc_lock);
216 cv_destroy(&sc->sc_cv);
217 return 0;
218 }
219
220 /*
221 * operational routines:
222 * open, close, read, write, strategy,
223 * ioctl, dump, size
224 */
225
226 #if MEMORY_DISK_SERVER
227 static int md_server_loop(struct md_softc *sc);
228 static int md_ioctl_server(struct md_softc *sc, struct md_conf *umd,
229 struct lwp *l);
230 #endif /* MEMORY_DISK_SERVER */
231 static int md_ioctl_kalloc(struct md_softc *sc, struct md_conf *umd,
232 struct lwp *l);
233
234 static int
235 mdsize(dev_t dev)
236 {
237 struct md_softc *sc;
238 int res;
239
240 sc = device_lookup_private(&md_cd, MD_UNIT(dev));
241 if (sc == NULL)
242 return 0;
243
244 mutex_enter(&sc->sc_lock);
245 if (sc->sc_type == MD_UNCONFIGURED)
246 res = 0;
247 else
248 res = sc->sc_size >> DEV_BSHIFT;
249 mutex_exit(&sc->sc_lock);
250
251 return res;
252 }
253
254 static int
255 mdopen(dev_t dev, int flag, int fmt, struct lwp *l)
256 {
257 int unit;
258 int part = DISKPART(dev);
259 int pmask = 1 << part;
260 cfdata_t cf;
261 struct md_softc *sc;
262 struct disk *dk;
263 #ifdef MEMORY_DISK_HOOKS
264 bool configured;
265 #endif
266
267 mutex_enter(&md_device_lock);
268 unit = MD_UNIT(dev);
269 sc = device_lookup_private(&md_cd, unit);
270 if (sc == NULL) {
271 if (part != RAW_PART) {
272 mutex_exit(&md_device_lock);
273 return ENXIO;
274 }
275 cf = malloc(sizeof(*cf), M_DEVBUF, M_WAITOK);
276 cf->cf_name = md_cd.cd_name;
277 cf->cf_atname = md_cd.cd_name;
278 cf->cf_unit = unit;
279 cf->cf_fstate = FSTATE_STAR;
280 sc = device_private(config_attach_pseudo(cf));
281 if (sc == NULL) {
282 mutex_exit(&md_device_lock);
283 return ENOMEM;
284 }
285 }
286
287 dk = &sc->sc_dkdev;
288
289 /*
290 * The raw partition is used for ioctl to configure.
291 */
292 if (part == RAW_PART)
293 goto ok;
294
295 #ifdef MEMORY_DISK_HOOKS
296 /* Call the open hook to allow loading the device. */
297 configured = (sc->sc_type != MD_UNCONFIGURED);
298 md_open_hook(unit, &sc->sc_md);
299 /* initialize disklabel if the device is configured in open hook */
300 if (!configured && sc->sc_type != MD_UNCONFIGURED)
301 md_set_disklabel(sc);
302 #endif
303
304 /*
305 * This is a normal, "slave" device, so
306 * enforce initialized.
307 */
308 if (sc->sc_type == MD_UNCONFIGURED) {
309 mutex_exit(&md_device_lock);
310 return ENXIO;
311 }
312
313 ok:
314 /* XXX duplicates code in dk_open(). Call dk_open(), instead? */
315 mutex_enter(&dk->dk_openlock);
316 /* Mark our unit as open. */
317 switch (fmt) {
318 case S_IFCHR:
319 dk->dk_copenmask |= pmask;
320 break;
321 case S_IFBLK:
322 dk->dk_bopenmask |= pmask;
323 break;
324 }
325
326 dk->dk_openmask = dk->dk_copenmask | dk->dk_bopenmask;
327
328 mutex_exit(&dk->dk_openlock);
329 mutex_exit(&md_device_lock);
330 return 0;
331 }
332
333 static int
334 mdclose(dev_t dev, int flag, int fmt, struct lwp *l)
335 {
336 int part = DISKPART(dev);
337 int pmask = 1 << part;
338 int error;
339 cfdata_t cf;
340 struct md_softc *sc;
341 struct disk *dk;
342
343 sc = device_lookup_private(&md_cd, MD_UNIT(dev));
344 if (sc == NULL)
345 return ENXIO;
346
347 dk = &sc->sc_dkdev;
348
349 mutex_enter(&dk->dk_openlock);
350
351 switch (fmt) {
352 case S_IFCHR:
353 dk->dk_copenmask &= ~pmask;
354 break;
355 case S_IFBLK:
356 dk->dk_bopenmask &= ~pmask;
357 break;
358 }
359 dk->dk_openmask = dk->dk_copenmask | dk->dk_bopenmask;
360 if (dk->dk_openmask != 0) {
361 mutex_exit(&dk->dk_openlock);
362 return 0;
363 }
364
365 mutex_exit(&dk->dk_openlock);
366
367 mutex_enter(&md_device_lock);
368 cf = device_cfdata(sc->sc_dev);
369 error = config_detach(sc->sc_dev, DETACH_QUIET);
370 if (! error)
371 free(cf, M_DEVBUF);
372 mutex_exit(&md_device_lock);
373 return error;
374 }
375
376 static int
377 mdread(dev_t dev, struct uio *uio, int flags)
378 {
379 struct md_softc *sc;
380
381 sc = device_lookup_private(&md_cd, MD_UNIT(dev));
382
383 if (sc == NULL || sc->sc_type == MD_UNCONFIGURED)
384 return ENXIO;
385
386 return (physio(mdstrategy, NULL, dev, B_READ, minphys, uio));
387 }
388
389 static int
390 mdwrite(dev_t dev, struct uio *uio, int flags)
391 {
392 struct md_softc *sc;
393
394 sc = device_lookup_private(&md_cd, MD_UNIT(dev));
395
396 if (sc == NULL || sc->sc_type == MD_UNCONFIGURED)
397 return ENXIO;
398
399 return (physio(mdstrategy, NULL, dev, B_WRITE, minphys, uio));
400 }
401
402 /*
403 * Handle I/O requests, either directly, or
404 * by passing them to the server process.
405 */
406 static void
407 mdstrategy(struct buf *bp)
408 {
409 struct md_softc *sc;
410 void * addr;
411 size_t off, xfer;
412 bool is_read;
413
414 sc = device_lookup_private(&md_cd, MD_UNIT(bp->b_dev));
415
416 mutex_enter(&sc->sc_lock);
417
418 if (sc == NULL || sc->sc_type == MD_UNCONFIGURED) {
419 bp->b_error = ENXIO;
420 goto done;
421 }
422
423 switch (sc->sc_type) {
424 #if MEMORY_DISK_SERVER
425 case MD_UMEM_SERVER:
426 /* Just add this job to the server's queue. */
427 bufq_put(sc->sc_buflist, bp);
428 cv_signal(&sc->sc_cv);
429 mutex_exit(&sc->sc_lock);
430 /* see md_server_loop() */
431 /* no biodone in this case */
432 return;
433 #endif /* MEMORY_DISK_SERVER */
434
435 case MD_KMEM_FIXED:
436 case MD_KMEM_ALLOCATED:
437 /* These are in kernel space. Access directly. */
438 is_read = ((bp->b_flags & B_READ) == B_READ);
439 bp->b_resid = bp->b_bcount;
440 off = (bp->b_blkno << DEV_BSHIFT);
441 if (off >= sc->sc_size) {
442 if (is_read)
443 break; /* EOF */
444 goto set_eio;
445 }
446 xfer = bp->b_resid;
447 if (xfer > (sc->sc_size - off))
448 xfer = (sc->sc_size - off);
449 addr = (char *)sc->sc_addr + off;
450 disk_busy(&sc->sc_dkdev);
451 if (is_read)
452 memcpy(bp->b_data, addr, xfer);
453 else
454 memcpy(addr, bp->b_data, xfer);
455 disk_unbusy(&sc->sc_dkdev, xfer, is_read);
456 bp->b_resid -= xfer;
457 break;
458
459 default:
460 bp->b_resid = bp->b_bcount;
461 set_eio:
462 bp->b_error = EIO;
463 break;
464 }
465
466 done:
467 mutex_exit(&sc->sc_lock);
468
469 biodone(bp);
470 }
471
472 static int
473 mdioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
474 {
475 struct md_softc *sc;
476 struct md_conf *umd;
477 struct disklabel *lp;
478 struct partinfo *pp;
479 int error;
480
481 if ((sc = device_lookup_private(&md_cd, MD_UNIT(dev))) == NULL)
482 return ENXIO;
483
484 mutex_enter(&sc->sc_lock);
485 if (sc->sc_type != MD_UNCONFIGURED) {
486 switch (cmd) {
487 case DIOCGDINFO:
488 lp = (struct disklabel *)data;
489 *lp = *sc->sc_dkdev.dk_label;
490 mutex_exit(&sc->sc_lock);
491 return 0;
492
493 case DIOCGPART:
494 pp = (struct partinfo *)data;
495 pp->disklab = sc->sc_dkdev.dk_label;
496 pp->part =
497 &sc->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
498 mutex_exit(&sc->sc_lock);
499 return 0;
500 }
501 }
502
503 /* If this is not the raw partition, punt! */
504 if (DISKPART(dev) != RAW_PART) {
505 mutex_exit(&sc->sc_lock);
506 return ENOTTY;
507 }
508
509 umd = (struct md_conf *)data;
510 error = EINVAL;
511 switch (cmd) {
512 case MD_GETCONF:
513 *umd = sc->sc_md;
514 error = 0;
515 break;
516
517 case MD_SETCONF:
518 /* Can only set it once. */
519 if (sc->sc_type != MD_UNCONFIGURED)
520 break;
521 switch (umd->md_type) {
522 case MD_KMEM_ALLOCATED:
523 error = md_ioctl_kalloc(sc, umd, l);
524 break;
525 #if MEMORY_DISK_SERVER
526 case MD_UMEM_SERVER:
527 error = md_ioctl_server(sc, umd, l);
528 break;
529 #endif /* MEMORY_DISK_SERVER */
530 default:
531 break;
532 }
533 break;
534 }
535 mutex_exit(&sc->sc_lock);
536 return error;
537 }
538
539 static void
540 md_set_disklabel(struct md_softc *sc)
541 {
542 struct disklabel *lp = sc->sc_dkdev.dk_label;
543 struct partition *pp;
544
545 memset(lp, 0, sizeof(*lp));
546
547 lp->d_secsize = DEV_BSIZE;
548 lp->d_secperunit = sc->sc_size / DEV_BSIZE;
549 if (lp->d_secperunit >= (32*64)) {
550 lp->d_nsectors = 32;
551 lp->d_ntracks = 64;
552 lp->d_ncylinders = lp->d_secperunit / (32*64);
553 } else {
554 lp->d_nsectors = 1;
555 lp->d_ntracks = 1;
556 lp->d_ncylinders = lp->d_secperunit;
557 }
558 lp->d_secpercyl = lp->d_ntracks*lp->d_nsectors;
559
560 strncpy(lp->d_typename, md_cd.cd_name, sizeof(lp->d_typename));
561 lp->d_type = DTYPE_UNKNOWN;
562 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
563 lp->d_rpm = 3600;
564 lp->d_interleave = 1;
565 lp->d_flags = 0;
566
567 pp = &lp->d_partitions[0];
568 pp->p_offset = 0;
569 pp->p_size = lp->d_secperunit;
570 pp->p_fstype = FS_BSDFFS;
571
572 pp = &lp->d_partitions[RAW_PART];
573 pp->p_offset = 0;
574 pp->p_size = lp->d_secperunit;
575 pp->p_fstype = FS_UNUSED;
576
577 lp->d_npartitions = RAW_PART+1;
578 lp->d_magic = DISKMAGIC;
579 lp->d_magic2 = DISKMAGIC;
580 lp->d_checksum = dkcksum(lp);
581 }
582
583 /*
584 * Handle ioctl MD_SETCONF for (sc_type == MD_KMEM_ALLOCATED)
585 * Just allocate some kernel memory and return.
586 */
587 static int
588 md_ioctl_kalloc(struct md_softc *sc, struct md_conf *umd,
589 struct lwp *l)
590 {
591 vaddr_t addr;
592 vsize_t size;
593
594 mutex_exit(&sc->sc_lock);
595
596 /* Sanity check the size. */
597 size = umd->md_size;
598 addr = uvm_km_alloc(kernel_map, size, 0, UVM_KMF_WIRED|UVM_KMF_ZERO);
599
600 mutex_enter(&sc->sc_lock);
601
602 if (!addr)
603 return ENOMEM;
604
605 /* If another thread beat us to configure this unit: fail. */
606 if (sc->sc_type != MD_UNCONFIGURED) {
607 uvm_km_free(kernel_map, addr, size, UVM_KMF_WIRED);
608 return EINVAL;
609 }
610
611 /* This unit is now configured. */
612 sc->sc_addr = (void *)addr; /* kernel space */
613 sc->sc_size = (size_t)size;
614 sc->sc_type = MD_KMEM_ALLOCATED;
615 md_set_disklabel(sc);
616 return 0;
617 }
618
619 #if MEMORY_DISK_SERVER
620
621 /*
622 * Handle ioctl MD_SETCONF for (sc_type == MD_UMEM_SERVER)
623 * Set config, then become the I/O server for this unit.
624 */
625 static int
626 md_ioctl_server(struct md_softc *sc, struct md_conf *umd,
627 struct lwp *l)
628 {
629 vaddr_t end;
630 int error;
631
632 KASSERT(mutex_owned(&sc->sc_lock));
633
634 /* Sanity check addr, size. */
635 end = (vaddr_t) ((char *)umd->md_addr + umd->md_size);
636
637 if ((end >= VM_MAXUSER_ADDRESS) ||
638 (end < ((vaddr_t) umd->md_addr)) )
639 return EINVAL;
640
641 /* This unit is now configured. */
642 sc->sc_addr = umd->md_addr; /* user space */
643 sc->sc_size = umd->md_size;
644 sc->sc_type = MD_UMEM_SERVER;
645 md_set_disklabel(sc);
646
647 /* Become the server daemon */
648 error = md_server_loop(sc);
649
650 /* This server is now going away! */
651 sc->sc_type = MD_UNCONFIGURED;
652 sc->sc_addr = 0;
653 sc->sc_size = 0;
654
655 return (error);
656 }
657
658 static int
659 md_server_loop(struct md_softc *sc)
660 {
661 struct buf *bp;
662 void *addr; /* user space address */
663 size_t off; /* offset into "device" */
664 size_t xfer; /* amount to transfer */
665 int error;
666 bool is_read;
667
668 KASSERT(mutex_owned(&sc->sc_lock));
669
670 for (;;) {
671 /* Wait for some work to arrive. */
672 while ((bp = bufq_get(sc->sc_buflist)) == NULL) {
673 error = cv_wait_sig(&sc->sc_cv, &sc->sc_lock);
674 if (error)
675 return error;
676 }
677
678 /* Do the transfer to/from user space. */
679 mutex_exit(&sc->sc_lock);
680 error = 0;
681 is_read = ((bp->b_flags & B_READ) == B_READ);
682 bp->b_resid = bp->b_bcount;
683 off = (bp->b_blkno << DEV_BSHIFT);
684 if (off >= sc->sc_size) {
685 if (is_read)
686 goto done; /* EOF (not an error) */
687 error = EIO;
688 goto done;
689 }
690 xfer = bp->b_resid;
691 if (xfer > (sc->sc_size - off))
692 xfer = (sc->sc_size - off);
693 addr = (char *)sc->sc_addr + off;
694 disk_busy(&sc->sc_dkdev);
695 if (is_read)
696 error = copyin(addr, bp->b_data, xfer);
697 else
698 error = copyout(bp->b_data, addr, xfer);
699 disk_unbusy(&sc->sc_dkdev, (error ? 0 : xfer), is_read);
700 if (!error)
701 bp->b_resid -= xfer;
702
703 done:
704 if (error) {
705 bp->b_error = error;
706 }
707 biodone(bp);
708 mutex_enter(&sc->sc_lock);
709 }
710 }
711 #endif /* MEMORY_DISK_SERVER */
712