md.c revision 1.71.4.2 1 /* $NetBSD: md.c,v 1.71.4.2 2015/06/06 14:40:06 skrll Exp $ */
2
3 /*
4 * Copyright (c) 1995 Gordon W. Ross, Leo Weppelman.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28 /*
29 * This implements a general-purpose memory-disk.
30 * See md.h for notes on the config types.
31 *
32 * Note that this driver provides the same functionality
33 * as the MFS filesystem hack, but this is better because
34 * you can use this for any filesystem type you'd like!
35 *
36 * Credit for most of the kmem ramdisk code goes to:
37 * Leo Weppelman (atari) and Phil Nelson (pc532)
38 * Credit for the ideas behind the "user space memory" code goes
39 * to the authors of the MFS implementation.
40 */
41
42 #include <sys/cdefs.h>
43 __KERNEL_RCSID(0, "$NetBSD: md.c,v 1.71.4.2 2015/06/06 14:40:06 skrll Exp $");
44
45 #ifdef _KERNEL_OPT
46 #include "opt_md.h"
47 #else
48 #define MEMORY_DISK_SERVER 1
49 #endif
50
51 #include <sys/param.h>
52 #include <sys/kernel.h>
53 #include <sys/malloc.h>
54 #include <sys/systm.h>
55 #include <sys/buf.h>
56 #include <sys/bufq.h>
57 #include <sys/device.h>
58 #include <sys/disk.h>
59 #include <sys/stat.h>
60 #include <sys/proc.h>
61 #include <sys/conf.h>
62 #include <sys/disklabel.h>
63
64 #include <uvm/uvm_extern.h>
65
66 #include <dev/md.h>
67
68 /*
69 * The user-space functionality is included by default.
70 * Use `options MEMORY_DISK_SERVER=0' to turn it off.
71 */
72 #ifndef MEMORY_DISK_SERVER
73 #error MEMORY_DISK_SERVER should be defined by opt_md.h
74 #endif /* MEMORY_DISK_SERVER */
75
76 /*
77 * We should use the raw partition for ioctl.
78 */
79 #define MD_UNIT(unit) DISKUNIT(unit)
80
81 /* autoconfig stuff... */
82
83 struct md_softc {
84 device_t sc_dev; /* Self. */
85 struct disk sc_dkdev; /* hook for generic disk handling */
86 struct md_conf sc_md;
87 kmutex_t sc_lock; /* Protect self. */
88 kcondvar_t sc_cv; /* Wait here for work. */
89 struct bufq_state *sc_buflist;
90 };
91 /* shorthand for fields in sc_md: */
92 #define sc_addr sc_md.md_addr
93 #define sc_size sc_md.md_size
94 #define sc_type sc_md.md_type
95
96 void mdattach(int);
97
98 static void md_attach(device_t, device_t, void *);
99 static int md_detach(device_t, int);
100
101 static dev_type_open(mdopen);
102 static dev_type_close(mdclose);
103 static dev_type_read(mdread);
104 static dev_type_write(mdwrite);
105 static dev_type_ioctl(mdioctl);
106 static dev_type_strategy(mdstrategy);
107 static dev_type_size(mdsize);
108
109 const struct bdevsw md_bdevsw = {
110 .d_open = mdopen,
111 .d_close = mdclose,
112 .d_strategy = mdstrategy,
113 .d_ioctl = mdioctl,
114 .d_dump = nodump,
115 .d_psize = mdsize,
116 .d_discard = nodiscard,
117 .d_flag = D_DISK | D_MPSAFE
118 };
119
120 const struct cdevsw md_cdevsw = {
121 .d_open = mdopen,
122 .d_close = mdclose,
123 .d_read = mdread,
124 .d_write = mdwrite,
125 .d_ioctl = mdioctl,
126 .d_stop = nostop,
127 .d_tty = notty,
128 .d_poll = nopoll,
129 .d_mmap = nommap,
130 .d_kqfilter = nokqfilter,
131 .d_discard = nodiscard,
132 .d_flag = D_DISK
133 };
134
135 static struct dkdriver mddkdriver = {
136 .d_strategy = mdstrategy
137 };
138
139 extern struct cfdriver md_cd;
140 CFATTACH_DECL3_NEW(md, sizeof(struct md_softc),
141 0, md_attach, md_detach, NULL, NULL, NULL, DVF_DETACH_SHUTDOWN);
142
143 static kmutex_t md_device_lock; /* Protect unit creation / deletion. */
144 extern size_t md_root_size;
145
146 static void md_set_disklabel(struct md_softc *);
147
148 /*
149 * This is called if we are configured as a pseudo-device
150 */
151 void
152 mdattach(int n)
153 {
154
155 mutex_init(&md_device_lock, MUTEX_DEFAULT, IPL_NONE);
156 if (config_cfattach_attach(md_cd.cd_name, &md_ca)) {
157 aprint_error("%s: cfattach_attach failed\n", md_cd.cd_name);
158 return;
159 }
160 }
161
162 static void
163 md_attach(device_t parent, device_t self, void *aux)
164 {
165 struct md_softc *sc = device_private(self);
166
167 sc->sc_dev = self;
168 sc->sc_type = MD_UNCONFIGURED;
169 mutex_init(&sc->sc_lock, MUTEX_DEFAULT, IPL_NONE);
170 cv_init(&sc->sc_cv, "mdidle");
171 bufq_alloc(&sc->sc_buflist, "fcfs", 0);
172
173 /* XXX - Could accept aux info here to set the config. */
174 #ifdef MEMORY_DISK_HOOKS
175 /*
176 * This external function might setup a pre-loaded disk.
177 * All it would need to do is setup the md_conf struct.
178 * See sys/dev/md_root.c for an example.
179 */
180 md_attach_hook(device_unit(self), &sc->sc_md);
181 #endif
182
183 /*
184 * Initialize and attach the disk structure.
185 */
186 disk_init(&sc->sc_dkdev, device_xname(self), &mddkdriver);
187 disk_attach(&sc->sc_dkdev);
188
189 if (sc->sc_type != MD_UNCONFIGURED)
190 md_set_disklabel(sc);
191
192 if (!pmf_device_register(self, NULL, NULL))
193 aprint_error_dev(self, "couldn't establish power handler\n");
194 }
195
196 static int
197 md_detach(device_t self, int flags)
198 {
199 struct md_softc *sc = device_private(self);
200 int rc;
201
202 rc = 0;
203 mutex_enter(&sc->sc_dkdev.dk_openlock);
204 if (sc->sc_dkdev.dk_openmask == 0 && sc->sc_type == MD_UNCONFIGURED)
205 ; /* nothing to do */
206 else if ((flags & DETACH_FORCE) == 0)
207 rc = EBUSY;
208 mutex_exit(&sc->sc_dkdev.dk_openlock);
209
210 if (rc != 0)
211 return rc;
212
213 pmf_device_deregister(self);
214 disk_detach(&sc->sc_dkdev);
215 disk_destroy(&sc->sc_dkdev);
216 bufq_free(sc->sc_buflist);
217 mutex_destroy(&sc->sc_lock);
218 cv_destroy(&sc->sc_cv);
219 return 0;
220 }
221
222 /*
223 * operational routines:
224 * open, close, read, write, strategy,
225 * ioctl, dump, size
226 */
227
228 #if MEMORY_DISK_SERVER
229 static int md_server_loop(struct md_softc *sc);
230 static int md_ioctl_server(struct md_softc *sc, struct md_conf *umd,
231 struct lwp *l);
232 #endif /* MEMORY_DISK_SERVER */
233 static int md_ioctl_kalloc(struct md_softc *sc, struct md_conf *umd,
234 struct lwp *l);
235
236 static int
237 mdsize(dev_t dev)
238 {
239 struct md_softc *sc;
240 int res;
241
242 sc = device_lookup_private(&md_cd, MD_UNIT(dev));
243 if (sc == NULL)
244 return 0;
245
246 mutex_enter(&sc->sc_lock);
247 if (sc->sc_type == MD_UNCONFIGURED)
248 res = 0;
249 else
250 res = sc->sc_size >> DEV_BSHIFT;
251 mutex_exit(&sc->sc_lock);
252
253 return res;
254 }
255
256 static int
257 mdopen(dev_t dev, int flag, int fmt, struct lwp *l)
258 {
259 int unit;
260 int part = DISKPART(dev);
261 int pmask = 1 << part;
262 cfdata_t cf;
263 struct md_softc *sc;
264 struct disk *dk;
265 #ifdef MEMORY_DISK_HOOKS
266 bool configured;
267 #endif
268
269 mutex_enter(&md_device_lock);
270 unit = MD_UNIT(dev);
271 sc = device_lookup_private(&md_cd, unit);
272 if (sc == NULL) {
273 if (part != RAW_PART) {
274 mutex_exit(&md_device_lock);
275 return ENXIO;
276 }
277 cf = malloc(sizeof(*cf), M_DEVBUF, M_WAITOK);
278 cf->cf_name = md_cd.cd_name;
279 cf->cf_atname = md_cd.cd_name;
280 cf->cf_unit = unit;
281 cf->cf_fstate = FSTATE_STAR;
282 sc = device_private(config_attach_pseudo(cf));
283 if (sc == NULL) {
284 mutex_exit(&md_device_lock);
285 return ENOMEM;
286 }
287 }
288
289 dk = &sc->sc_dkdev;
290
291 /*
292 * The raw partition is used for ioctl to configure.
293 */
294 if (part == RAW_PART)
295 goto ok;
296
297 #ifdef MEMORY_DISK_HOOKS
298 /* Call the open hook to allow loading the device. */
299 configured = (sc->sc_type != MD_UNCONFIGURED);
300 md_open_hook(unit, &sc->sc_md);
301 /* initialize disklabel if the device is configured in open hook */
302 if (!configured && sc->sc_type != MD_UNCONFIGURED)
303 md_set_disklabel(sc);
304 #endif
305
306 /*
307 * This is a normal, "slave" device, so
308 * enforce initialized.
309 */
310 if (sc->sc_type == MD_UNCONFIGURED) {
311 mutex_exit(&md_device_lock);
312 return ENXIO;
313 }
314
315 ok:
316 /* XXX duplicates code in dk_open(). Call dk_open(), instead? */
317 mutex_enter(&dk->dk_openlock);
318 /* Mark our unit as open. */
319 switch (fmt) {
320 case S_IFCHR:
321 dk->dk_copenmask |= pmask;
322 break;
323 case S_IFBLK:
324 dk->dk_bopenmask |= pmask;
325 break;
326 }
327
328 dk->dk_openmask = dk->dk_copenmask | dk->dk_bopenmask;
329
330 mutex_exit(&dk->dk_openlock);
331 mutex_exit(&md_device_lock);
332 return 0;
333 }
334
335 static int
336 mdclose(dev_t dev, int flag, int fmt, struct lwp *l)
337 {
338 int part = DISKPART(dev);
339 int pmask = 1 << part;
340 int error;
341 cfdata_t cf;
342 struct md_softc *sc;
343 struct disk *dk;
344
345 sc = device_lookup_private(&md_cd, MD_UNIT(dev));
346 if (sc == NULL)
347 return ENXIO;
348
349 dk = &sc->sc_dkdev;
350
351 mutex_enter(&dk->dk_openlock);
352
353 switch (fmt) {
354 case S_IFCHR:
355 dk->dk_copenmask &= ~pmask;
356 break;
357 case S_IFBLK:
358 dk->dk_bopenmask &= ~pmask;
359 break;
360 }
361 dk->dk_openmask = dk->dk_copenmask | dk->dk_bopenmask;
362 if (dk->dk_openmask != 0) {
363 mutex_exit(&dk->dk_openlock);
364 return 0;
365 }
366
367 mutex_exit(&dk->dk_openlock);
368
369 mutex_enter(&md_device_lock);
370 cf = device_cfdata(sc->sc_dev);
371 error = config_detach(sc->sc_dev, DETACH_QUIET);
372 if (! error)
373 free(cf, M_DEVBUF);
374 mutex_exit(&md_device_lock);
375 return error;
376 }
377
378 static int
379 mdread(dev_t dev, struct uio *uio, int flags)
380 {
381 struct md_softc *sc;
382
383 sc = device_lookup_private(&md_cd, MD_UNIT(dev));
384
385 if (sc == NULL || sc->sc_type == MD_UNCONFIGURED)
386 return ENXIO;
387
388 return (physio(mdstrategy, NULL, dev, B_READ, minphys, uio));
389 }
390
391 static int
392 mdwrite(dev_t dev, struct uio *uio, int flags)
393 {
394 struct md_softc *sc;
395
396 sc = device_lookup_private(&md_cd, MD_UNIT(dev));
397
398 if (sc == NULL || sc->sc_type == MD_UNCONFIGURED)
399 return ENXIO;
400
401 return (physio(mdstrategy, NULL, dev, B_WRITE, minphys, uio));
402 }
403
404 /*
405 * Handle I/O requests, either directly, or
406 * by passing them to the server process.
407 */
408 static void
409 mdstrategy(struct buf *bp)
410 {
411 struct md_softc *sc;
412 void * addr;
413 size_t off, xfer;
414 bool is_read;
415
416 sc = device_lookup_private(&md_cd, MD_UNIT(bp->b_dev));
417
418 mutex_enter(&sc->sc_lock);
419
420 if (sc == NULL || sc->sc_type == MD_UNCONFIGURED) {
421 bp->b_error = ENXIO;
422 goto done;
423 }
424
425 switch (sc->sc_type) {
426 #if MEMORY_DISK_SERVER
427 case MD_UMEM_SERVER:
428 /* Just add this job to the server's queue. */
429 bufq_put(sc->sc_buflist, bp);
430 cv_signal(&sc->sc_cv);
431 mutex_exit(&sc->sc_lock);
432 /* see md_server_loop() */
433 /* no biodone in this case */
434 return;
435 #endif /* MEMORY_DISK_SERVER */
436
437 case MD_KMEM_FIXED:
438 case MD_KMEM_ALLOCATED:
439 /* These are in kernel space. Access directly. */
440 is_read = ((bp->b_flags & B_READ) == B_READ);
441 bp->b_resid = bp->b_bcount;
442 off = (bp->b_blkno << DEV_BSHIFT);
443 if (off >= sc->sc_size) {
444 if (is_read)
445 break; /* EOF */
446 goto set_eio;
447 }
448 xfer = bp->b_resid;
449 if (xfer > (sc->sc_size - off))
450 xfer = (sc->sc_size - off);
451 addr = (char *)sc->sc_addr + off;
452 disk_busy(&sc->sc_dkdev);
453 if (is_read)
454 memcpy(bp->b_data, addr, xfer);
455 else
456 memcpy(addr, bp->b_data, xfer);
457 disk_unbusy(&sc->sc_dkdev, xfer, is_read);
458 bp->b_resid -= xfer;
459 break;
460
461 default:
462 bp->b_resid = bp->b_bcount;
463 set_eio:
464 bp->b_error = EIO;
465 break;
466 }
467
468 done:
469 mutex_exit(&sc->sc_lock);
470
471 biodone(bp);
472 }
473
474 static int
475 mdioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
476 {
477 struct md_softc *sc;
478 struct md_conf *umd;
479 int error;
480
481 if ((sc = device_lookup_private(&md_cd, MD_UNIT(dev))) == NULL)
482 return ENXIO;
483
484 mutex_enter(&sc->sc_lock);
485 if (sc->sc_type != MD_UNCONFIGURED) {
486 error = disk_ioctl(&sc->sc_dkdev, dev, cmd, data, flag, l);
487 if (error != EPASSTHROUGH) {
488 mutex_exit(&sc->sc_lock);
489 return 0;
490 }
491 }
492
493 /* If this is not the raw partition, punt! */
494 if (DISKPART(dev) != RAW_PART) {
495 mutex_exit(&sc->sc_lock);
496 return ENOTTY;
497 }
498
499 umd = (struct md_conf *)data;
500 error = EINVAL;
501 switch (cmd) {
502 case MD_GETCONF:
503 *umd = sc->sc_md;
504 error = 0;
505 break;
506
507 case MD_SETCONF:
508 /* Can only set it once. */
509 if (sc->sc_type != MD_UNCONFIGURED)
510 break;
511 switch (umd->md_type) {
512 case MD_KMEM_ALLOCATED:
513 error = md_ioctl_kalloc(sc, umd, l);
514 break;
515 #if MEMORY_DISK_SERVER
516 case MD_UMEM_SERVER:
517 error = md_ioctl_server(sc, umd, l);
518 break;
519 #endif /* MEMORY_DISK_SERVER */
520 default:
521 break;
522 }
523 break;
524 }
525 mutex_exit(&sc->sc_lock);
526 return error;
527 }
528
529 static void
530 md_set_disklabel(struct md_softc *sc)
531 {
532 struct disklabel *lp = sc->sc_dkdev.dk_label;
533 struct partition *pp;
534
535 memset(lp, 0, sizeof(*lp));
536
537 lp->d_secsize = DEV_BSIZE;
538 lp->d_secperunit = sc->sc_size / DEV_BSIZE;
539 if (lp->d_secperunit >= (32*64)) {
540 lp->d_nsectors = 32;
541 lp->d_ntracks = 64;
542 lp->d_ncylinders = lp->d_secperunit / (32*64);
543 } else {
544 lp->d_nsectors = 1;
545 lp->d_ntracks = 1;
546 lp->d_ncylinders = lp->d_secperunit;
547 }
548 lp->d_secpercyl = lp->d_ntracks*lp->d_nsectors;
549
550 strncpy(lp->d_typename, md_cd.cd_name, sizeof(lp->d_typename));
551 lp->d_type = DKTYPE_MD;
552 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
553 lp->d_rpm = 3600;
554 lp->d_interleave = 1;
555 lp->d_flags = 0;
556
557 pp = &lp->d_partitions[0];
558 pp->p_offset = 0;
559 pp->p_size = lp->d_secperunit;
560 pp->p_fstype = FS_BSDFFS;
561
562 pp = &lp->d_partitions[RAW_PART];
563 pp->p_offset = 0;
564 pp->p_size = lp->d_secperunit;
565 pp->p_fstype = FS_UNUSED;
566
567 lp->d_npartitions = RAW_PART+1;
568 lp->d_magic = DISKMAGIC;
569 lp->d_magic2 = DISKMAGIC;
570 lp->d_checksum = dkcksum(lp);
571 }
572
573 /*
574 * Handle ioctl MD_SETCONF for (sc_type == MD_KMEM_ALLOCATED)
575 * Just allocate some kernel memory and return.
576 */
577 static int
578 md_ioctl_kalloc(struct md_softc *sc, struct md_conf *umd,
579 struct lwp *l)
580 {
581 vaddr_t addr;
582 vsize_t size;
583
584 mutex_exit(&sc->sc_lock);
585
586 /* Sanity check the size. */
587 size = umd->md_size;
588 addr = uvm_km_alloc(kernel_map, size, 0, UVM_KMF_WIRED|UVM_KMF_ZERO);
589
590 mutex_enter(&sc->sc_lock);
591
592 if (!addr)
593 return ENOMEM;
594
595 /* If another thread beat us to configure this unit: fail. */
596 if (sc->sc_type != MD_UNCONFIGURED) {
597 uvm_km_free(kernel_map, addr, size, UVM_KMF_WIRED);
598 return EINVAL;
599 }
600
601 /* This unit is now configured. */
602 sc->sc_addr = (void *)addr; /* kernel space */
603 sc->sc_size = (size_t)size;
604 sc->sc_type = MD_KMEM_ALLOCATED;
605 md_set_disklabel(sc);
606 return 0;
607 }
608
609 #if MEMORY_DISK_SERVER
610
611 /*
612 * Handle ioctl MD_SETCONF for (sc_type == MD_UMEM_SERVER)
613 * Set config, then become the I/O server for this unit.
614 */
615 static int
616 md_ioctl_server(struct md_softc *sc, struct md_conf *umd,
617 struct lwp *l)
618 {
619 vaddr_t end;
620 int error;
621
622 KASSERT(mutex_owned(&sc->sc_lock));
623
624 /* Sanity check addr, size. */
625 end = (vaddr_t) ((char *)umd->md_addr + umd->md_size);
626
627 if ((end >= VM_MAXUSER_ADDRESS) ||
628 (end < ((vaddr_t) umd->md_addr)) )
629 return EINVAL;
630
631 /* This unit is now configured. */
632 sc->sc_addr = umd->md_addr; /* user space */
633 sc->sc_size = umd->md_size;
634 sc->sc_type = MD_UMEM_SERVER;
635 md_set_disklabel(sc);
636
637 /* Become the server daemon */
638 error = md_server_loop(sc);
639
640 /* This server is now going away! */
641 sc->sc_type = MD_UNCONFIGURED;
642 sc->sc_addr = 0;
643 sc->sc_size = 0;
644
645 return (error);
646 }
647
648 static int
649 md_server_loop(struct md_softc *sc)
650 {
651 struct buf *bp;
652 void *addr; /* user space address */
653 size_t off; /* offset into "device" */
654 size_t xfer; /* amount to transfer */
655 int error;
656 bool is_read;
657
658 KASSERT(mutex_owned(&sc->sc_lock));
659
660 for (;;) {
661 /* Wait for some work to arrive. */
662 while ((bp = bufq_get(sc->sc_buflist)) == NULL) {
663 error = cv_wait_sig(&sc->sc_cv, &sc->sc_lock);
664 if (error)
665 return error;
666 }
667
668 /* Do the transfer to/from user space. */
669 mutex_exit(&sc->sc_lock);
670 error = 0;
671 is_read = ((bp->b_flags & B_READ) == B_READ);
672 bp->b_resid = bp->b_bcount;
673 off = (bp->b_blkno << DEV_BSHIFT);
674 if (off >= sc->sc_size) {
675 if (is_read)
676 goto done; /* EOF (not an error) */
677 error = EIO;
678 goto done;
679 }
680 xfer = bp->b_resid;
681 if (xfer > (sc->sc_size - off))
682 xfer = (sc->sc_size - off);
683 addr = (char *)sc->sc_addr + off;
684 disk_busy(&sc->sc_dkdev);
685 if (is_read)
686 error = copyin(addr, bp->b_data, xfer);
687 else
688 error = copyout(bp->b_data, addr, xfer);
689 disk_unbusy(&sc->sc_dkdev, (error ? 0 : xfer), is_read);
690 if (!error)
691 bp->b_resid -= xfer;
692
693 done:
694 if (error) {
695 bp->b_error = error;
696 }
697 biodone(bp);
698 mutex_enter(&sc->sc_lock);
699 }
700 }
701 #endif /* MEMORY_DISK_SERVER */
702