md.c revision 1.68 1 /* $NetBSD: md.c,v 1.68 2014/03/13 10:22:35 hannken Exp $ */
2
3 /*
4 * Copyright (c) 1995 Gordon W. Ross, Leo Weppelman.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28 /*
29 * This implements a general-purpose memory-disk.
30 * See md.h for notes on the config types.
31 *
32 * Note that this driver provides the same functionality
33 * as the MFS filesystem hack, but this is better because
34 * you can use this for any filesystem type you'd like!
35 *
36 * Credit for most of the kmem ramdisk code goes to:
37 * Leo Weppelman (atari) and Phil Nelson (pc532)
38 * Credit for the ideas behind the "user space memory" code goes
39 * to the authors of the MFS implementation.
40 */
41
42 #include <sys/cdefs.h>
43 __KERNEL_RCSID(0, "$NetBSD: md.c,v 1.68 2014/03/13 10:22:35 hannken Exp $");
44
45 #ifdef _KERNEL_OPT
46 #include "opt_md.h"
47 #else
48 #define MEMORY_DISK_SERVER 1
49 #endif
50
51 #include <sys/param.h>
52 #include <sys/kernel.h>
53 #include <sys/malloc.h>
54 #include <sys/systm.h>
55 #include <sys/buf.h>
56 #include <sys/bufq.h>
57 #include <sys/device.h>
58 #include <sys/disk.h>
59 #include <sys/stat.h>
60 #include <sys/proc.h>
61 #include <sys/conf.h>
62 #include <sys/disklabel.h>
63
64 #include <uvm/uvm_extern.h>
65
66 #include <dev/md.h>
67
68 /*
69 * The user-space functionality is included by default.
70 * Use `options MEMORY_DISK_SERVER=0' to turn it off.
71 */
72 #ifndef MEMORY_DISK_SERVER
73 #error MEMORY_DISK_SERVER should be defined by opt_md.h
74 #endif /* MEMORY_DISK_SERVER */
75
76 /*
77 * We should use the raw partition for ioctl.
78 */
79 #define MD_UNIT(unit) DISKUNIT(unit)
80
81 /* autoconfig stuff... */
82
83 struct md_softc {
84 device_t sc_dev; /* Self. */
85 struct disk sc_dkdev; /* hook for generic disk handling */
86 struct md_conf sc_md;
87 kmutex_t sc_lock; /* Protect self. */
88 kcondvar_t sc_cv; /* Wait here for work. */
89 struct bufq_state *sc_buflist;
90 };
91 /* shorthand for fields in sc_md: */
92 #define sc_addr sc_md.md_addr
93 #define sc_size sc_md.md_size
94 #define sc_type sc_md.md_type
95
96 void mdattach(int);
97
98 static void md_attach(device_t, device_t, void *);
99 static int md_detach(device_t, int);
100
101 static dev_type_open(mdopen);
102 static dev_type_close(mdclose);
103 static dev_type_read(mdread);
104 static dev_type_write(mdwrite);
105 static dev_type_ioctl(mdioctl);
106 static dev_type_strategy(mdstrategy);
107 static dev_type_size(mdsize);
108
109 const struct bdevsw md_bdevsw = {
110 mdopen, mdclose, mdstrategy, mdioctl, nodump, mdsize, D_DISK | D_MPSAFE
111 };
112
113 const struct cdevsw md_cdevsw = {
114 mdopen, mdclose, mdread, mdwrite, mdioctl,
115 nostop, notty, nopoll, nommap, nokqfilter, D_DISK
116 };
117
118 static struct dkdriver mddkdriver = { mdstrategy, NULL };
119
120 extern struct cfdriver md_cd;
121 CFATTACH_DECL3_NEW(md, sizeof(struct md_softc),
122 0, md_attach, md_detach, NULL, NULL, NULL, DVF_DETACH_SHUTDOWN);
123
124 static kmutex_t md_device_lock; /* Protect unit creation / deletion. */
125 extern size_t md_root_size;
126
127 static void md_set_disklabel(struct md_softc *);
128
129 /*
130 * This is called if we are configured as a pseudo-device
131 */
132 void
133 mdattach(int n)
134 {
135
136 mutex_init(&md_device_lock, MUTEX_DEFAULT, IPL_NONE);
137 if (config_cfattach_attach(md_cd.cd_name, &md_ca)) {
138 aprint_error("%s: cfattach_attach failed\n", md_cd.cd_name);
139 return;
140 }
141 }
142
143 static void
144 md_attach(device_t parent, device_t self, void *aux)
145 {
146 struct md_softc *sc = device_private(self);
147
148 sc->sc_dev = self;
149 sc->sc_type = MD_UNCONFIGURED;
150 mutex_init(&sc->sc_lock, MUTEX_DEFAULT, IPL_NONE);
151 cv_init(&sc->sc_cv, "mdidle");
152 bufq_alloc(&sc->sc_buflist, "fcfs", 0);
153
154 /* XXX - Could accept aux info here to set the config. */
155 #ifdef MEMORY_DISK_HOOKS
156 /*
157 * This external function might setup a pre-loaded disk.
158 * All it would need to do is setup the md_conf struct.
159 * See sys/dev/md_root.c for an example.
160 */
161 md_attach_hook(device_unit(self), &sc->sc_md);
162 #endif
163
164 /*
165 * Initialize and attach the disk structure.
166 */
167 disk_init(&sc->sc_dkdev, device_xname(self), &mddkdriver);
168 disk_attach(&sc->sc_dkdev);
169
170 if (sc->sc_type != MD_UNCONFIGURED)
171 md_set_disklabel(sc);
172
173 if (!pmf_device_register(self, NULL, NULL))
174 aprint_error_dev(self, "couldn't establish power handler\n");
175 }
176
177 static int
178 md_detach(device_t self, int flags)
179 {
180 struct md_softc *sc = device_private(self);
181 int rc;
182
183 rc = 0;
184 mutex_enter(&sc->sc_dkdev.dk_openlock);
185 if (sc->sc_dkdev.dk_openmask == 0 && sc->sc_type == MD_UNCONFIGURED)
186 ; /* nothing to do */
187 else if ((flags & DETACH_FORCE) == 0)
188 rc = EBUSY;
189 mutex_exit(&sc->sc_dkdev.dk_openlock);
190
191 if (rc != 0)
192 return rc;
193
194 pmf_device_deregister(self);
195 disk_detach(&sc->sc_dkdev);
196 disk_destroy(&sc->sc_dkdev);
197 bufq_free(sc->sc_buflist);
198 mutex_destroy(&sc->sc_lock);
199 cv_destroy(&sc->sc_cv);
200 return 0;
201 }
202
203 /*
204 * operational routines:
205 * open, close, read, write, strategy,
206 * ioctl, dump, size
207 */
208
209 #if MEMORY_DISK_SERVER
210 static int md_server_loop(struct md_softc *sc);
211 static int md_ioctl_server(struct md_softc *sc, struct md_conf *umd,
212 struct lwp *l);
213 #endif /* MEMORY_DISK_SERVER */
214 static int md_ioctl_kalloc(struct md_softc *sc, struct md_conf *umd,
215 struct lwp *l);
216
217 static int
218 mdsize(dev_t dev)
219 {
220 struct md_softc *sc;
221 int res;
222
223 sc = device_lookup_private(&md_cd, MD_UNIT(dev));
224 if (sc == NULL)
225 return 0;
226
227 mutex_enter(&sc->sc_lock);
228 if (sc->sc_type == MD_UNCONFIGURED)
229 res = 0;
230 else
231 res = sc->sc_size >> DEV_BSHIFT;
232 mutex_exit(&sc->sc_lock);
233
234 return res;
235 }
236
237 static int
238 mdopen(dev_t dev, int flag, int fmt, struct lwp *l)
239 {
240 int unit;
241 int part = DISKPART(dev);
242 int pmask = 1 << part;
243 cfdata_t cf;
244 struct md_softc *sc;
245 struct disk *dk;
246 #ifdef MEMORY_DISK_HOOKS
247 bool configured;
248 #endif
249
250 mutex_enter(&md_device_lock);
251 unit = MD_UNIT(dev);
252 sc = device_lookup_private(&md_cd, unit);
253 if (sc == NULL) {
254 if (part != RAW_PART) {
255 mutex_exit(&md_device_lock);
256 return ENXIO;
257 }
258 cf = malloc(sizeof(*cf), M_DEVBUF, M_WAITOK);
259 cf->cf_name = md_cd.cd_name;
260 cf->cf_atname = md_cd.cd_name;
261 cf->cf_unit = unit;
262 cf->cf_fstate = FSTATE_STAR;
263 sc = device_private(config_attach_pseudo(cf));
264 if (sc == NULL) {
265 mutex_exit(&md_device_lock);
266 return ENOMEM;
267 }
268 }
269
270 dk = &sc->sc_dkdev;
271
272 /*
273 * The raw partition is used for ioctl to configure.
274 */
275 if (part == RAW_PART)
276 goto ok;
277
278 #ifdef MEMORY_DISK_HOOKS
279 /* Call the open hook to allow loading the device. */
280 configured = (sc->sc_type != MD_UNCONFIGURED);
281 md_open_hook(unit, &sc->sc_md);
282 /* initialize disklabel if the device is configured in open hook */
283 if (!configured && sc->sc_type != MD_UNCONFIGURED)
284 md_set_disklabel(sc);
285 #endif
286
287 /*
288 * This is a normal, "slave" device, so
289 * enforce initialized.
290 */
291 if (sc->sc_type == MD_UNCONFIGURED) {
292 mutex_exit(&md_device_lock);
293 return ENXIO;
294 }
295
296 ok:
297 /* XXX duplicates code in dk_open(). Call dk_open(), instead? */
298 mutex_enter(&dk->dk_openlock);
299 /* Mark our unit as open. */
300 switch (fmt) {
301 case S_IFCHR:
302 dk->dk_copenmask |= pmask;
303 break;
304 case S_IFBLK:
305 dk->dk_bopenmask |= pmask;
306 break;
307 }
308
309 dk->dk_openmask = dk->dk_copenmask | dk->dk_bopenmask;
310
311 mutex_exit(&dk->dk_openlock);
312 mutex_exit(&md_device_lock);
313 return 0;
314 }
315
316 static int
317 mdclose(dev_t dev, int flag, int fmt, struct lwp *l)
318 {
319 int part = DISKPART(dev);
320 int pmask = 1 << part;
321 int error;
322 cfdata_t cf;
323 struct md_softc *sc;
324 struct disk *dk;
325
326 sc = device_lookup_private(&md_cd, MD_UNIT(dev));
327 if (sc == NULL)
328 return ENXIO;
329
330 dk = &sc->sc_dkdev;
331
332 mutex_enter(&dk->dk_openlock);
333
334 switch (fmt) {
335 case S_IFCHR:
336 dk->dk_copenmask &= ~pmask;
337 break;
338 case S_IFBLK:
339 dk->dk_bopenmask &= ~pmask;
340 break;
341 }
342 dk->dk_openmask = dk->dk_copenmask | dk->dk_bopenmask;
343 if (dk->dk_openmask != 0) {
344 mutex_exit(&dk->dk_openlock);
345 return 0;
346 }
347
348 mutex_exit(&dk->dk_openlock);
349
350 mutex_enter(&md_device_lock);
351 cf = device_cfdata(sc->sc_dev);
352 error = config_detach(sc->sc_dev, DETACH_QUIET);
353 if (! error)
354 free(cf, M_DEVBUF);
355 mutex_exit(&md_device_lock);
356 return error;
357 }
358
359 static int
360 mdread(dev_t dev, struct uio *uio, int flags)
361 {
362 struct md_softc *sc;
363
364 sc = device_lookup_private(&md_cd, MD_UNIT(dev));
365
366 if (sc == NULL || sc->sc_type == MD_UNCONFIGURED)
367 return ENXIO;
368
369 return (physio(mdstrategy, NULL, dev, B_READ, minphys, uio));
370 }
371
372 static int
373 mdwrite(dev_t dev, struct uio *uio, int flags)
374 {
375 struct md_softc *sc;
376
377 sc = device_lookup_private(&md_cd, MD_UNIT(dev));
378
379 if (sc == NULL || sc->sc_type == MD_UNCONFIGURED)
380 return ENXIO;
381
382 return (physio(mdstrategy, NULL, dev, B_WRITE, minphys, uio));
383 }
384
385 /*
386 * Handle I/O requests, either directly, or
387 * by passing them to the server process.
388 */
389 static void
390 mdstrategy(struct buf *bp)
391 {
392 struct md_softc *sc;
393 void * addr;
394 size_t off, xfer;
395 bool is_read;
396
397 sc = device_lookup_private(&md_cd, MD_UNIT(bp->b_dev));
398
399 mutex_enter(&sc->sc_lock);
400
401 if (sc == NULL || sc->sc_type == MD_UNCONFIGURED) {
402 bp->b_error = ENXIO;
403 goto done;
404 }
405
406 switch (sc->sc_type) {
407 #if MEMORY_DISK_SERVER
408 case MD_UMEM_SERVER:
409 /* Just add this job to the server's queue. */
410 bufq_put(sc->sc_buflist, bp);
411 cv_signal(&sc->sc_cv);
412 mutex_exit(&sc->sc_lock);
413 /* see md_server_loop() */
414 /* no biodone in this case */
415 return;
416 #endif /* MEMORY_DISK_SERVER */
417
418 case MD_KMEM_FIXED:
419 case MD_KMEM_ALLOCATED:
420 /* These are in kernel space. Access directly. */
421 is_read = ((bp->b_flags & B_READ) == B_READ);
422 bp->b_resid = bp->b_bcount;
423 off = (bp->b_blkno << DEV_BSHIFT);
424 if (off >= sc->sc_size) {
425 if (is_read)
426 break; /* EOF */
427 goto set_eio;
428 }
429 xfer = bp->b_resid;
430 if (xfer > (sc->sc_size - off))
431 xfer = (sc->sc_size - off);
432 addr = (char *)sc->sc_addr + off;
433 disk_busy(&sc->sc_dkdev);
434 if (is_read)
435 memcpy(bp->b_data, addr, xfer);
436 else
437 memcpy(addr, bp->b_data, xfer);
438 disk_unbusy(&sc->sc_dkdev, xfer, is_read);
439 bp->b_resid -= xfer;
440 break;
441
442 default:
443 bp->b_resid = bp->b_bcount;
444 set_eio:
445 bp->b_error = EIO;
446 break;
447 }
448
449 done:
450 mutex_exit(&sc->sc_lock);
451
452 biodone(bp);
453 }
454
455 static int
456 mdioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
457 {
458 struct md_softc *sc;
459 struct md_conf *umd;
460 struct disklabel *lp;
461 struct partinfo *pp;
462 int error;
463
464 if ((sc = device_lookup_private(&md_cd, MD_UNIT(dev))) == NULL)
465 return ENXIO;
466
467 mutex_enter(&sc->sc_lock);
468 if (sc->sc_type != MD_UNCONFIGURED) {
469 switch (cmd) {
470 case DIOCGDINFO:
471 lp = (struct disklabel *)data;
472 *lp = *sc->sc_dkdev.dk_label;
473 mutex_exit(&sc->sc_lock);
474 return 0;
475
476 case DIOCGPART:
477 pp = (struct partinfo *)data;
478 pp->disklab = sc->sc_dkdev.dk_label;
479 pp->part =
480 &sc->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
481 mutex_exit(&sc->sc_lock);
482 return 0;
483 }
484 }
485
486 /* If this is not the raw partition, punt! */
487 if (DISKPART(dev) != RAW_PART) {
488 mutex_exit(&sc->sc_lock);
489 return ENOTTY;
490 }
491
492 umd = (struct md_conf *)data;
493 error = EINVAL;
494 switch (cmd) {
495 case MD_GETCONF:
496 *umd = sc->sc_md;
497 error = 0;
498 break;
499
500 case MD_SETCONF:
501 /* Can only set it once. */
502 if (sc->sc_type != MD_UNCONFIGURED)
503 break;
504 switch (umd->md_type) {
505 case MD_KMEM_ALLOCATED:
506 error = md_ioctl_kalloc(sc, umd, l);
507 break;
508 #if MEMORY_DISK_SERVER
509 case MD_UMEM_SERVER:
510 error = md_ioctl_server(sc, umd, l);
511 break;
512 #endif /* MEMORY_DISK_SERVER */
513 default:
514 break;
515 }
516 break;
517 }
518 mutex_exit(&sc->sc_lock);
519 return error;
520 }
521
522 static void
523 md_set_disklabel(struct md_softc *sc)
524 {
525 struct disklabel *lp = sc->sc_dkdev.dk_label;
526 struct partition *pp;
527
528 memset(lp, 0, sizeof(*lp));
529
530 lp->d_secsize = DEV_BSIZE;
531 lp->d_secperunit = sc->sc_size / DEV_BSIZE;
532 if (lp->d_secperunit >= (32*64)) {
533 lp->d_nsectors = 32;
534 lp->d_ntracks = 64;
535 lp->d_ncylinders = lp->d_secperunit / (32*64);
536 } else {
537 lp->d_nsectors = 1;
538 lp->d_ntracks = 1;
539 lp->d_ncylinders = lp->d_secperunit;
540 }
541 lp->d_secpercyl = lp->d_ntracks*lp->d_nsectors;
542
543 strncpy(lp->d_typename, md_cd.cd_name, sizeof(lp->d_typename));
544 lp->d_type = DTYPE_UNKNOWN;
545 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
546 lp->d_rpm = 3600;
547 lp->d_interleave = 1;
548 lp->d_flags = 0;
549
550 pp = &lp->d_partitions[0];
551 pp->p_offset = 0;
552 pp->p_size = lp->d_secperunit;
553 pp->p_fstype = FS_BSDFFS;
554
555 pp = &lp->d_partitions[RAW_PART];
556 pp->p_offset = 0;
557 pp->p_size = lp->d_secperunit;
558 pp->p_fstype = FS_UNUSED;
559
560 lp->d_npartitions = RAW_PART+1;
561 lp->d_magic = DISKMAGIC;
562 lp->d_magic2 = DISKMAGIC;
563 lp->d_checksum = dkcksum(lp);
564 }
565
566 /*
567 * Handle ioctl MD_SETCONF for (sc_type == MD_KMEM_ALLOCATED)
568 * Just allocate some kernel memory and return.
569 */
570 static int
571 md_ioctl_kalloc(struct md_softc *sc, struct md_conf *umd,
572 struct lwp *l)
573 {
574 vaddr_t addr;
575 vsize_t size;
576
577 mutex_exit(&sc->sc_lock);
578
579 /* Sanity check the size. */
580 size = umd->md_size;
581 addr = uvm_km_alloc(kernel_map, size, 0, UVM_KMF_WIRED|UVM_KMF_ZERO);
582
583 mutex_enter(&sc->sc_lock);
584
585 if (!addr)
586 return ENOMEM;
587
588 /* If another thread beat us to configure this unit: fail. */
589 if (sc->sc_type != MD_UNCONFIGURED) {
590 uvm_km_free(kernel_map, addr, size, UVM_KMF_WIRED);
591 return EINVAL;
592 }
593
594 /* This unit is now configured. */
595 sc->sc_addr = (void *)addr; /* kernel space */
596 sc->sc_size = (size_t)size;
597 sc->sc_type = MD_KMEM_ALLOCATED;
598 md_set_disklabel(sc);
599 return 0;
600 }
601
602 #if MEMORY_DISK_SERVER
603
604 /*
605 * Handle ioctl MD_SETCONF for (sc_type == MD_UMEM_SERVER)
606 * Set config, then become the I/O server for this unit.
607 */
608 static int
609 md_ioctl_server(struct md_softc *sc, struct md_conf *umd,
610 struct lwp *l)
611 {
612 vaddr_t end;
613 int error;
614
615 KASSERT(mutex_owned(&sc->sc_lock));
616
617 /* Sanity check addr, size. */
618 end = (vaddr_t) ((char *)umd->md_addr + umd->md_size);
619
620 if ((end >= VM_MAXUSER_ADDRESS) ||
621 (end < ((vaddr_t) umd->md_addr)) )
622 return EINVAL;
623
624 /* This unit is now configured. */
625 sc->sc_addr = umd->md_addr; /* user space */
626 sc->sc_size = umd->md_size;
627 sc->sc_type = MD_UMEM_SERVER;
628 md_set_disklabel(sc);
629
630 /* Become the server daemon */
631 error = md_server_loop(sc);
632
633 /* This server is now going away! */
634 sc->sc_type = MD_UNCONFIGURED;
635 sc->sc_addr = 0;
636 sc->sc_size = 0;
637
638 return (error);
639 }
640
641 static int
642 md_server_loop(struct md_softc *sc)
643 {
644 struct buf *bp;
645 void *addr; /* user space address */
646 size_t off; /* offset into "device" */
647 size_t xfer; /* amount to transfer */
648 int error;
649 bool is_read;
650
651 KASSERT(mutex_owned(&sc->sc_lock));
652
653 for (;;) {
654 /* Wait for some work to arrive. */
655 while ((bp = bufq_get(sc->sc_buflist)) == NULL) {
656 error = cv_wait_sig(&sc->sc_cv, &sc->sc_lock);
657 if (error)
658 return error;
659 }
660
661 /* Do the transfer to/from user space. */
662 mutex_exit(&sc->sc_lock);
663 error = 0;
664 is_read = ((bp->b_flags & B_READ) == B_READ);
665 bp->b_resid = bp->b_bcount;
666 off = (bp->b_blkno << DEV_BSHIFT);
667 if (off >= sc->sc_size) {
668 if (is_read)
669 goto done; /* EOF (not an error) */
670 error = EIO;
671 goto done;
672 }
673 xfer = bp->b_resid;
674 if (xfer > (sc->sc_size - off))
675 xfer = (sc->sc_size - off);
676 addr = (char *)sc->sc_addr + off;
677 disk_busy(&sc->sc_dkdev);
678 if (is_read)
679 error = copyin(addr, bp->b_data, xfer);
680 else
681 error = copyout(bp->b_data, addr, xfer);
682 disk_unbusy(&sc->sc_dkdev, (error ? 0 : xfer), is_read);
683 if (!error)
684 bp->b_resid -= xfer;
685
686 done:
687 if (error) {
688 bp->b_error = error;
689 }
690 biodone(bp);
691 mutex_enter(&sc->sc_lock);
692 }
693 }
694 #endif /* MEMORY_DISK_SERVER */
695