dk.c revision 1.113 1 /* $NetBSD: dk.c,v 1.113 2022/08/22 00:19:12 riastradh Exp $ */
2
3 /*-
4 * Copyright (c) 2004, 2005, 2006, 2007 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 #include <sys/cdefs.h>
33 __KERNEL_RCSID(0, "$NetBSD: dk.c,v 1.113 2022/08/22 00:19:12 riastradh Exp $");
34
35 #ifdef _KERNEL_OPT
36 #include "opt_dkwedge.h"
37 #endif
38
39 #include <sys/param.h>
40 #include <sys/systm.h>
41 #include <sys/proc.h>
42 #include <sys/errno.h>
43 #include <sys/pool.h>
44 #include <sys/ioctl.h>
45 #include <sys/disklabel.h>
46 #include <sys/disk.h>
47 #include <sys/fcntl.h>
48 #include <sys/buf.h>
49 #include <sys/bufq.h>
50 #include <sys/vnode.h>
51 #include <sys/stat.h>
52 #include <sys/conf.h>
53 #include <sys/callout.h>
54 #include <sys/kernel.h>
55 #include <sys/malloc.h>
56 #include <sys/device.h>
57 #include <sys/kauth.h>
58
59 #include <miscfs/specfs/specdev.h>
60
61 MALLOC_DEFINE(M_DKWEDGE, "dkwedge", "Disk wedge structures");
62
63 typedef enum {
64 DKW_STATE_LARVAL = 0,
65 DKW_STATE_RUNNING = 1,
66 DKW_STATE_DYING = 2,
67 DKW_STATE_DEAD = 666
68 } dkwedge_state_t;
69
70 struct dkwedge_softc {
71 device_t sc_dev; /* pointer to our pseudo-device */
72 struct cfdata sc_cfdata; /* our cfdata structure */
73 uint8_t sc_wname[128]; /* wedge name (Unicode, UTF-8) */
74
75 dkwedge_state_t sc_state; /* state this wedge is in */
76
77 struct disk *sc_parent; /* parent disk */
78 daddr_t sc_offset; /* LBA offset of wedge in parent */
79 uint64_t sc_size; /* size of wedge in blocks */
80 char sc_ptype[32]; /* partition type */
81 dev_t sc_pdev; /* cached parent's dev_t */
82 /* link on parent's wedge list */
83 LIST_ENTRY(dkwedge_softc) sc_plink;
84
85 struct disk sc_dk; /* our own disk structure */
86 struct bufq_state *sc_bufq; /* buffer queue */
87 struct callout sc_restart_ch; /* callout to restart I/O */
88
89 kmutex_t sc_iolock;
90 kcondvar_t sc_dkdrn;
91 u_int sc_iopend; /* I/Os pending */
92 int sc_mode; /* parent open mode */
93 };
94
95 static void dkstart(struct dkwedge_softc *);
96 static void dkiodone(struct buf *);
97 static void dkrestart(void *);
98 static void dkminphys(struct buf *);
99
100 static int dklastclose(struct dkwedge_softc *);
101 static int dkwedge_cleanup_parent(struct dkwedge_softc *, int);
102 static int dkwedge_detach(device_t, int);
103 static void dkwedge_delall1(struct disk *, bool);
104 static int dkwedge_del1(struct dkwedge_info *, int);
105 static int dk_open_parent(dev_t, int, struct vnode **);
106 static int dk_close_parent(struct vnode *, int);
107
108 static dev_type_open(dkopen);
109 static dev_type_close(dkclose);
110 static dev_type_read(dkread);
111 static dev_type_write(dkwrite);
112 static dev_type_ioctl(dkioctl);
113 static dev_type_strategy(dkstrategy);
114 static dev_type_dump(dkdump);
115 static dev_type_size(dksize);
116 static dev_type_discard(dkdiscard);
117
118 const struct bdevsw dk_bdevsw = {
119 .d_open = dkopen,
120 .d_close = dkclose,
121 .d_strategy = dkstrategy,
122 .d_ioctl = dkioctl,
123 .d_dump = dkdump,
124 .d_psize = dksize,
125 .d_discard = dkdiscard,
126 .d_flag = D_DISK | D_MPSAFE
127 };
128
129 const struct cdevsw dk_cdevsw = {
130 .d_open = dkopen,
131 .d_close = dkclose,
132 .d_read = dkread,
133 .d_write = dkwrite,
134 .d_ioctl = dkioctl,
135 .d_stop = nostop,
136 .d_tty = notty,
137 .d_poll = nopoll,
138 .d_mmap = nommap,
139 .d_kqfilter = nokqfilter,
140 .d_discard = dkdiscard,
141 .d_flag = D_DISK | D_MPSAFE
142 };
143
144 static struct dkwedge_softc **dkwedges;
145 static u_int ndkwedges;
146 static krwlock_t dkwedges_lock;
147
148 static LIST_HEAD(, dkwedge_discovery_method) dkwedge_discovery_methods;
149 static krwlock_t dkwedge_discovery_methods_lock;
150
151 /*
152 * dkwedge_match:
153 *
154 * Autoconfiguration match function for pseudo-device glue.
155 */
156 static int
157 dkwedge_match(device_t parent, cfdata_t match,
158 void *aux)
159 {
160
161 /* Pseudo-device; always present. */
162 return (1);
163 }
164
165 /*
166 * dkwedge_attach:
167 *
168 * Autoconfiguration attach function for pseudo-device glue.
169 */
170 static void
171 dkwedge_attach(device_t parent, device_t self,
172 void *aux)
173 {
174
175 if (!pmf_device_register(self, NULL, NULL))
176 aprint_error_dev(self, "couldn't establish power handler\n");
177 }
178
179 CFDRIVER_DECL(dk, DV_DISK, NULL);
180 CFATTACH_DECL3_NEW(dk, 0,
181 dkwedge_match, dkwedge_attach, dkwedge_detach, NULL, NULL, NULL,
182 DVF_DETACH_SHUTDOWN);
183
184 /*
185 * dkwedge_wait_drain:
186 *
187 * Wait for I/O on the wedge to drain.
188 */
189 static void
190 dkwedge_wait_drain(struct dkwedge_softc *sc)
191 {
192
193 mutex_enter(&sc->sc_iolock);
194 while (sc->sc_iopend != 0)
195 cv_wait(&sc->sc_dkdrn, &sc->sc_iolock);
196 mutex_exit(&sc->sc_iolock);
197 }
198
199 /*
200 * dkwedge_compute_pdev:
201 *
202 * Compute the parent disk's dev_t.
203 */
204 static int
205 dkwedge_compute_pdev(const char *pname, dev_t *pdevp, enum vtype type)
206 {
207 const char *name, *cp;
208 devmajor_t pmaj;
209 int punit;
210 char devname[16];
211
212 name = pname;
213 switch (type) {
214 case VBLK:
215 pmaj = devsw_name2blk(name, devname, sizeof(devname));
216 break;
217 case VCHR:
218 pmaj = devsw_name2chr(name, devname, sizeof(devname));
219 break;
220 default:
221 pmaj = NODEVMAJOR;
222 break;
223 }
224 if (pmaj == NODEVMAJOR)
225 return (ENODEV);
226
227 name += strlen(devname);
228 for (cp = name, punit = 0; *cp >= '0' && *cp <= '9'; cp++)
229 punit = (punit * 10) + (*cp - '0');
230 if (cp == name) {
231 /* Invalid parent disk name. */
232 return (ENODEV);
233 }
234
235 *pdevp = MAKEDISKDEV(pmaj, punit, RAW_PART);
236
237 return (0);
238 }
239
240 /*
241 * dkwedge_array_expand:
242 *
243 * Expand the dkwedges array.
244 */
245 static void
246 dkwedge_array_expand(void)
247 {
248 int newcnt = ndkwedges + 16;
249 struct dkwedge_softc **newarray, **oldarray;
250
251 newarray = malloc(newcnt * sizeof(*newarray), M_DKWEDGE,
252 M_WAITOK|M_ZERO);
253 if ((oldarray = dkwedges) != NULL)
254 memcpy(newarray, dkwedges, ndkwedges * sizeof(*newarray));
255 dkwedges = newarray;
256 ndkwedges = newcnt;
257 if (oldarray != NULL)
258 free(oldarray, M_DKWEDGE);
259 }
260
261 static void
262 dk_set_geometry(struct dkwedge_softc *sc, struct disk *pdk)
263 {
264 struct disk *dk = &sc->sc_dk;
265 struct disk_geom *dg = &dk->dk_geom;
266
267 memset(dg, 0, sizeof(*dg));
268
269 dg->dg_secperunit = sc->sc_size;
270 dg->dg_secsize = DEV_BSIZE << pdk->dk_blkshift;
271
272 /* fake numbers, 1 cylinder is 1 MB with default sector size */
273 dg->dg_nsectors = 32;
274 dg->dg_ntracks = 64;
275 dg->dg_ncylinders = dg->dg_secperunit / (dg->dg_nsectors * dg->dg_ntracks);
276
277 disk_set_info(sc->sc_dev, dk, NULL);
278 }
279
280 /*
281 * dkwedge_add: [exported function]
282 *
283 * Add a disk wedge based on the provided information.
284 *
285 * The incoming dkw_devname[] is ignored, instead being
286 * filled in and returned to the caller.
287 */
288 int
289 dkwedge_add(struct dkwedge_info *dkw)
290 {
291 struct dkwedge_softc *sc, *lsc;
292 struct disk *pdk;
293 u_int unit;
294 int error;
295 dev_t pdev;
296
297 dkw->dkw_parent[sizeof(dkw->dkw_parent) - 1] = '\0';
298 pdk = disk_find(dkw->dkw_parent);
299 if (pdk == NULL)
300 return (ENODEV);
301
302 error = dkwedge_compute_pdev(pdk->dk_name, &pdev, VBLK);
303 if (error)
304 return (error);
305
306 if (dkw->dkw_offset < 0)
307 return (EINVAL);
308
309 /*
310 * Check for an existing wedge at the same disk offset. Allow
311 * updating a wedge if the only change is the size, and the new
312 * size is larger than the old.
313 */
314 sc = NULL;
315 mutex_enter(&pdk->dk_openlock);
316 LIST_FOREACH(lsc, &pdk->dk_wedges, sc_plink) {
317 if (lsc->sc_offset != dkw->dkw_offset)
318 continue;
319 if (strcmp(lsc->sc_wname, dkw->dkw_wname) != 0)
320 break;
321 if (strcmp(lsc->sc_ptype, dkw->dkw_ptype) != 0)
322 break;
323 if (lsc->sc_size > dkw->dkw_size)
324 break;
325
326 sc = lsc;
327 sc->sc_size = dkw->dkw_size;
328 dk_set_geometry(sc, pdk);
329
330 break;
331 }
332 mutex_exit(&pdk->dk_openlock);
333
334 if (sc != NULL)
335 goto announce;
336
337 sc = malloc(sizeof(*sc), M_DKWEDGE, M_WAITOK|M_ZERO);
338 sc->sc_state = DKW_STATE_LARVAL;
339 sc->sc_parent = pdk;
340 sc->sc_pdev = pdev;
341 sc->sc_offset = dkw->dkw_offset;
342 sc->sc_size = dkw->dkw_size;
343
344 memcpy(sc->sc_wname, dkw->dkw_wname, sizeof(sc->sc_wname));
345 sc->sc_wname[sizeof(sc->sc_wname) - 1] = '\0';
346
347 memcpy(sc->sc_ptype, dkw->dkw_ptype, sizeof(sc->sc_ptype));
348 sc->sc_ptype[sizeof(sc->sc_ptype) - 1] = '\0';
349
350 bufq_alloc(&sc->sc_bufq, "fcfs", 0);
351
352 callout_init(&sc->sc_restart_ch, 0);
353 callout_setfunc(&sc->sc_restart_ch, dkrestart, sc);
354
355 mutex_init(&sc->sc_iolock, MUTEX_DEFAULT, IPL_BIO);
356 cv_init(&sc->sc_dkdrn, "dkdrn");
357
358 /*
359 * Wedge will be added; increment the wedge count for the parent.
360 * Only allow this to happen if RAW_PART is the only thing open.
361 */
362 mutex_enter(&pdk->dk_openlock);
363 if (pdk->dk_openmask & ~(1 << RAW_PART))
364 error = EBUSY;
365 else {
366 /* Check for wedge overlap. */
367 LIST_FOREACH(lsc, &pdk->dk_wedges, sc_plink) {
368 daddr_t lastblk = sc->sc_offset + sc->sc_size - 1;
369 daddr_t llastblk = lsc->sc_offset + lsc->sc_size - 1;
370
371 if (sc->sc_offset >= lsc->sc_offset &&
372 sc->sc_offset <= llastblk) {
373 /* Overlaps the tail of the existing wedge. */
374 break;
375 }
376 if (lastblk >= lsc->sc_offset &&
377 lastblk <= llastblk) {
378 /* Overlaps the head of the existing wedge. */
379 break;
380 }
381 }
382 if (lsc != NULL) {
383 if (sc->sc_offset == lsc->sc_offset &&
384 sc->sc_size == lsc->sc_size &&
385 strcmp(sc->sc_wname, lsc->sc_wname) == 0)
386 error = EEXIST;
387 else
388 error = EINVAL;
389 } else {
390 pdk->dk_nwedges++;
391 LIST_INSERT_HEAD(&pdk->dk_wedges, sc, sc_plink);
392 }
393 }
394 mutex_exit(&pdk->dk_openlock);
395 if (error) {
396 cv_destroy(&sc->sc_dkdrn);
397 mutex_destroy(&sc->sc_iolock);
398 bufq_free(sc->sc_bufq);
399 free(sc, M_DKWEDGE);
400 return (error);
401 }
402
403 /* Fill in our cfdata for the pseudo-device glue. */
404 sc->sc_cfdata.cf_name = dk_cd.cd_name;
405 sc->sc_cfdata.cf_atname = dk_ca.ca_name;
406 /* sc->sc_cfdata.cf_unit set below */
407 sc->sc_cfdata.cf_fstate = FSTATE_STAR;
408
409 /* Insert the larval wedge into the array. */
410 rw_enter(&dkwedges_lock, RW_WRITER);
411 for (error = 0;;) {
412 struct dkwedge_softc **scpp;
413
414 /*
415 * Check for a duplicate wname while searching for
416 * a slot.
417 */
418 for (scpp = NULL, unit = 0; unit < ndkwedges; unit++) {
419 if (dkwedges[unit] == NULL) {
420 if (scpp == NULL) {
421 scpp = &dkwedges[unit];
422 sc->sc_cfdata.cf_unit = unit;
423 }
424 } else {
425 /* XXX Unicode. */
426 if (strcmp(dkwedges[unit]->sc_wname,
427 sc->sc_wname) == 0) {
428 error = EEXIST;
429 break;
430 }
431 }
432 }
433 if (error)
434 break;
435 KASSERT(unit == ndkwedges);
436 if (scpp == NULL)
437 dkwedge_array_expand();
438 else {
439 KASSERT(scpp == &dkwedges[sc->sc_cfdata.cf_unit]);
440 *scpp = sc;
441 break;
442 }
443 }
444 rw_exit(&dkwedges_lock);
445 if (error) {
446 mutex_enter(&pdk->dk_openlock);
447 pdk->dk_nwedges--;
448 LIST_REMOVE(sc, sc_plink);
449 mutex_exit(&pdk->dk_openlock);
450
451 cv_destroy(&sc->sc_dkdrn);
452 mutex_destroy(&sc->sc_iolock);
453 bufq_free(sc->sc_bufq);
454 free(sc, M_DKWEDGE);
455 return (error);
456 }
457
458 /*
459 * Now that we know the unit #, attach a pseudo-device for
460 * this wedge instance. This will provide us with the
461 * device_t necessary for glue to other parts of the system.
462 *
463 * This should never fail, unless we're almost totally out of
464 * memory.
465 */
466 if ((sc->sc_dev = config_attach_pseudo(&sc->sc_cfdata)) == NULL) {
467 aprint_error("%s%u: unable to attach pseudo-device\n",
468 sc->sc_cfdata.cf_name, sc->sc_cfdata.cf_unit);
469
470 rw_enter(&dkwedges_lock, RW_WRITER);
471 dkwedges[sc->sc_cfdata.cf_unit] = NULL;
472 rw_exit(&dkwedges_lock);
473
474 mutex_enter(&pdk->dk_openlock);
475 pdk->dk_nwedges--;
476 LIST_REMOVE(sc, sc_plink);
477 mutex_exit(&pdk->dk_openlock);
478
479 cv_destroy(&sc->sc_dkdrn);
480 mutex_destroy(&sc->sc_iolock);
481 bufq_free(sc->sc_bufq);
482 free(sc, M_DKWEDGE);
483 return (ENOMEM);
484 }
485
486 /*
487 * XXX Really ought to make the disk_attach() and the changing
488 * of state to RUNNING atomic.
489 */
490
491 disk_init(&sc->sc_dk, device_xname(sc->sc_dev), NULL);
492 dk_set_geometry(sc, pdk);
493 disk_attach(&sc->sc_dk);
494
495 /* Disk wedge is ready for use! */
496 sc->sc_state = DKW_STATE_RUNNING;
497
498 announce:
499 /* Announce our arrival. */
500 aprint_normal(
501 "%s at %s: \"%s\", %"PRIu64" blocks at %"PRId64", type: %s\n",
502 device_xname(sc->sc_dev), pdk->dk_name,
503 sc->sc_wname, /* XXX Unicode */
504 sc->sc_size, sc->sc_offset,
505 sc->sc_ptype[0] == '\0' ? "<unknown>" : sc->sc_ptype);
506
507 /* Return the devname to the caller. */
508 strlcpy(dkw->dkw_devname, device_xname(sc->sc_dev),
509 sizeof(dkw->dkw_devname));
510
511 return (0);
512 }
513
514 /*
515 * dkwedge_find:
516 *
517 * Lookup a disk wedge based on the provided information.
518 * NOTE: We look up the wedge based on the wedge devname,
519 * not wname.
520 *
521 * Return NULL if the wedge is not found, otherwise return
522 * the wedge's softc. Assign the wedge's unit number to unitp
523 * if unitp is not NULL.
524 */
525 static struct dkwedge_softc *
526 dkwedge_find(struct dkwedge_info *dkw, u_int *unitp)
527 {
528 struct dkwedge_softc *sc = NULL;
529 u_int unit;
530
531 /* Find our softc. */
532 dkw->dkw_devname[sizeof(dkw->dkw_devname) - 1] = '\0';
533 rw_enter(&dkwedges_lock, RW_READER);
534 for (unit = 0; unit < ndkwedges; unit++) {
535 if ((sc = dkwedges[unit]) != NULL &&
536 strcmp(device_xname(sc->sc_dev), dkw->dkw_devname) == 0 &&
537 strcmp(sc->sc_parent->dk_name, dkw->dkw_parent) == 0) {
538 break;
539 }
540 }
541 rw_exit(&dkwedges_lock);
542 if (unit == ndkwedges)
543 return NULL;
544
545 if (unitp != NULL)
546 *unitp = unit;
547
548 return sc;
549 }
550
551 /*
552 * dkwedge_del: [exported function]
553 *
554 * Delete a disk wedge based on the provided information.
555 * NOTE: We look up the wedge based on the wedge devname,
556 * not wname.
557 */
558 int
559 dkwedge_del(struct dkwedge_info *dkw)
560 {
561 return dkwedge_del1(dkw, 0);
562 }
563
564 int
565 dkwedge_del1(struct dkwedge_info *dkw, int flags)
566 {
567 struct dkwedge_softc *sc = NULL;
568
569 /* Find our softc. */
570 if ((sc = dkwedge_find(dkw, NULL)) == NULL)
571 return (ESRCH);
572
573 return config_detach(sc->sc_dev, flags);
574 }
575
576 static int
577 dkwedge_cleanup_parent(struct dkwedge_softc *sc, int flags)
578 {
579 struct disk *dk = &sc->sc_dk;
580 int rc;
581
582 rc = 0;
583 mutex_enter(&dk->dk_openlock);
584 if (dk->dk_openmask == 0)
585 /* nothing to do */
586 mutex_exit(&dk->dk_openlock);
587 else if ((flags & DETACH_FORCE) == 0) {
588 rc = EBUSY;
589 mutex_exit(&dk->dk_openlock);
590 } else {
591 mutex_enter(&sc->sc_parent->dk_rawlock);
592 rc = dklastclose(sc); /* releases locks */
593 }
594
595 return rc;
596 }
597
598 /*
599 * dkwedge_detach:
600 *
601 * Autoconfiguration detach function for pseudo-device glue.
602 */
603 static int
604 dkwedge_detach(device_t self, int flags)
605 {
606 struct dkwedge_softc *sc = NULL;
607 u_int unit;
608 int bmaj, cmaj, rc;
609
610 rw_enter(&dkwedges_lock, RW_WRITER);
611 for (unit = 0; unit < ndkwedges; unit++) {
612 if ((sc = dkwedges[unit]) != NULL && sc->sc_dev == self)
613 break;
614 }
615 if (unit == ndkwedges)
616 rc = ENXIO;
617 else if ((rc = dkwedge_cleanup_parent(sc, flags)) == 0) {
618 /* Mark the wedge as dying. */
619 sc->sc_state = DKW_STATE_DYING;
620 }
621 rw_exit(&dkwedges_lock);
622
623 if (rc != 0)
624 return rc;
625
626 pmf_device_deregister(self);
627
628 /* Locate the wedge major numbers. */
629 bmaj = bdevsw_lookup_major(&dk_bdevsw);
630 cmaj = cdevsw_lookup_major(&dk_cdevsw);
631
632 /* Kill any pending restart. */
633 callout_stop(&sc->sc_restart_ch);
634
635 /*
636 * dkstart() will kill any queued buffers now that the
637 * state of the wedge is not RUNNING. Once we've done
638 * that, wait for any other pending I/O to complete.
639 */
640 dkstart(sc);
641 dkwedge_wait_drain(sc);
642
643 /* Nuke the vnodes for any open instances. */
644 vdevgone(bmaj, unit, unit, VBLK);
645 vdevgone(cmaj, unit, unit, VCHR);
646
647 /* Clean up the parent. */
648 dkwedge_cleanup_parent(sc, flags | DETACH_FORCE);
649
650 /* Announce our departure. */
651 aprint_normal("%s at %s (%s) deleted\n", device_xname(sc->sc_dev),
652 sc->sc_parent->dk_name,
653 sc->sc_wname); /* XXX Unicode */
654
655 mutex_enter(&sc->sc_parent->dk_openlock);
656 sc->sc_parent->dk_nwedges--;
657 LIST_REMOVE(sc, sc_plink);
658 mutex_exit(&sc->sc_parent->dk_openlock);
659
660 /* Delete our buffer queue. */
661 bufq_free(sc->sc_bufq);
662
663 /* Detach from the disk list. */
664 disk_detach(&sc->sc_dk);
665 disk_destroy(&sc->sc_dk);
666
667 /* Poof. */
668 rw_enter(&dkwedges_lock, RW_WRITER);
669 dkwedges[unit] = NULL;
670 sc->sc_state = DKW_STATE_DEAD;
671 rw_exit(&dkwedges_lock);
672
673 mutex_destroy(&sc->sc_iolock);
674 cv_destroy(&sc->sc_dkdrn);
675
676 free(sc, M_DKWEDGE);
677
678 return 0;
679 }
680
681 /*
682 * dkwedge_delall: [exported function]
683 *
684 * Delete all of the wedges on the specified disk. Used when
685 * a disk is being detached.
686 */
687 void
688 dkwedge_delall(struct disk *pdk)
689 {
690 dkwedge_delall1(pdk, false);
691 }
692
693 static void
694 dkwedge_delall1(struct disk *pdk, bool idleonly)
695 {
696 struct dkwedge_info dkw;
697 struct dkwedge_softc *sc;
698 int flags;
699
700 flags = DETACH_QUIET;
701 if (!idleonly) flags |= DETACH_FORCE;
702
703 for (;;) {
704 mutex_enter(&pdk->dk_openlock);
705 LIST_FOREACH(sc, &pdk->dk_wedges, sc_plink) {
706 if (!idleonly || sc->sc_dk.dk_openmask == 0)
707 break;
708 }
709 if (sc == NULL) {
710 KASSERT(idleonly || pdk->dk_nwedges == 0);
711 mutex_exit(&pdk->dk_openlock);
712 return;
713 }
714 strlcpy(dkw.dkw_parent, pdk->dk_name, sizeof(dkw.dkw_parent));
715 strlcpy(dkw.dkw_devname, device_xname(sc->sc_dev),
716 sizeof(dkw.dkw_devname));
717 mutex_exit(&pdk->dk_openlock);
718 (void) dkwedge_del1(&dkw, flags);
719 }
720 }
721
722 /*
723 * dkwedge_list: [exported function]
724 *
725 * List all of the wedges on a particular disk.
726 */
727 int
728 dkwedge_list(struct disk *pdk, struct dkwedge_list *dkwl, struct lwp *l)
729 {
730 struct uio uio;
731 struct iovec iov;
732 struct dkwedge_softc *sc;
733 struct dkwedge_info dkw;
734 int error = 0;
735
736 iov.iov_base = dkwl->dkwl_buf;
737 iov.iov_len = dkwl->dkwl_bufsize;
738
739 uio.uio_iov = &iov;
740 uio.uio_iovcnt = 1;
741 uio.uio_offset = 0;
742 uio.uio_resid = dkwl->dkwl_bufsize;
743 uio.uio_rw = UIO_READ;
744 KASSERT(l == curlwp);
745 uio.uio_vmspace = l->l_proc->p_vmspace;
746
747 dkwl->dkwl_ncopied = 0;
748
749 mutex_enter(&pdk->dk_openlock);
750 LIST_FOREACH(sc, &pdk->dk_wedges, sc_plink) {
751 if (uio.uio_resid < sizeof(dkw))
752 break;
753
754 if (sc->sc_state != DKW_STATE_RUNNING)
755 continue;
756
757 strlcpy(dkw.dkw_devname, device_xname(sc->sc_dev),
758 sizeof(dkw.dkw_devname));
759 memcpy(dkw.dkw_wname, sc->sc_wname, sizeof(dkw.dkw_wname));
760 dkw.dkw_wname[sizeof(dkw.dkw_wname) - 1] = '\0';
761 strlcpy(dkw.dkw_parent, sc->sc_parent->dk_name,
762 sizeof(dkw.dkw_parent));
763 dkw.dkw_offset = sc->sc_offset;
764 dkw.dkw_size = sc->sc_size;
765 strlcpy(dkw.dkw_ptype, sc->sc_ptype, sizeof(dkw.dkw_ptype));
766
767 error = uiomove(&dkw, sizeof(dkw), &uio);
768 if (error)
769 break;
770 dkwl->dkwl_ncopied++;
771 }
772 dkwl->dkwl_nwedges = pdk->dk_nwedges;
773 mutex_exit(&pdk->dk_openlock);
774
775 return (error);
776 }
777
778 device_t
779 dkwedge_find_by_wname(const char *wname)
780 {
781 device_t dv = NULL;
782 struct dkwedge_softc *sc;
783 int i;
784
785 rw_enter(&dkwedges_lock, RW_WRITER);
786 for (i = 0; i < ndkwedges; i++) {
787 if ((sc = dkwedges[i]) == NULL)
788 continue;
789 if (strcmp(sc->sc_wname, wname) == 0) {
790 if (dv != NULL) {
791 printf(
792 "WARNING: double match for wedge name %s "
793 "(%s, %s)\n", wname, device_xname(dv),
794 device_xname(sc->sc_dev));
795 continue;
796 }
797 dv = sc->sc_dev;
798 }
799 }
800 rw_exit(&dkwedges_lock);
801 return dv;
802 }
803
804 device_t
805 dkwedge_find_by_parent(const char *name, size_t *i)
806 {
807 rw_enter(&dkwedges_lock, RW_WRITER);
808 for (; *i < (size_t)ndkwedges; (*i)++) {
809 struct dkwedge_softc *sc;
810 if ((sc = dkwedges[*i]) == NULL)
811 continue;
812 if (strcmp(sc->sc_parent->dk_name, name) != 0)
813 continue;
814 rw_exit(&dkwedges_lock);
815 return sc->sc_dev;
816 }
817 rw_exit(&dkwedges_lock);
818 return NULL;
819 }
820
821 void
822 dkwedge_print_wnames(void)
823 {
824 struct dkwedge_softc *sc;
825 int i;
826
827 rw_enter(&dkwedges_lock, RW_WRITER);
828 for (i = 0; i < ndkwedges; i++) {
829 if ((sc = dkwedges[i]) == NULL)
830 continue;
831 printf(" wedge:%s", sc->sc_wname);
832 }
833 rw_exit(&dkwedges_lock);
834 }
835
836 /*
837 * We need a dummy object to stuff into the dkwedge discovery method link
838 * set to ensure that there is always at least one object in the set.
839 */
840 static struct dkwedge_discovery_method dummy_discovery_method;
841 __link_set_add_bss(dkwedge_methods, dummy_discovery_method);
842
843 /*
844 * dkwedge_init:
845 *
846 * Initialize the disk wedge subsystem.
847 */
848 void
849 dkwedge_init(void)
850 {
851 __link_set_decl(dkwedge_methods, struct dkwedge_discovery_method);
852 struct dkwedge_discovery_method * const *ddmp;
853 struct dkwedge_discovery_method *lddm, *ddm;
854
855 rw_init(&dkwedges_lock);
856 rw_init(&dkwedge_discovery_methods_lock);
857
858 if (config_cfdriver_attach(&dk_cd) != 0)
859 panic("dkwedge: unable to attach cfdriver");
860 if (config_cfattach_attach(dk_cd.cd_name, &dk_ca) != 0)
861 panic("dkwedge: unable to attach cfattach");
862
863 rw_enter(&dkwedge_discovery_methods_lock, RW_WRITER);
864
865 LIST_INIT(&dkwedge_discovery_methods);
866
867 __link_set_foreach(ddmp, dkwedge_methods) {
868 ddm = *ddmp;
869 if (ddm == &dummy_discovery_method)
870 continue;
871 if (LIST_EMPTY(&dkwedge_discovery_methods)) {
872 LIST_INSERT_HEAD(&dkwedge_discovery_methods,
873 ddm, ddm_list);
874 continue;
875 }
876 LIST_FOREACH(lddm, &dkwedge_discovery_methods, ddm_list) {
877 if (ddm->ddm_priority == lddm->ddm_priority) {
878 aprint_error("dk-method-%s: method \"%s\" "
879 "already exists at priority %d\n",
880 ddm->ddm_name, lddm->ddm_name,
881 lddm->ddm_priority);
882 /* Not inserted. */
883 break;
884 }
885 if (ddm->ddm_priority < lddm->ddm_priority) {
886 /* Higher priority; insert before. */
887 LIST_INSERT_BEFORE(lddm, ddm, ddm_list);
888 break;
889 }
890 if (LIST_NEXT(lddm, ddm_list) == NULL) {
891 /* Last one; insert after. */
892 KASSERT(lddm->ddm_priority < ddm->ddm_priority);
893 LIST_INSERT_AFTER(lddm, ddm, ddm_list);
894 break;
895 }
896 }
897 }
898
899 rw_exit(&dkwedge_discovery_methods_lock);
900 }
901
902 #ifdef DKWEDGE_AUTODISCOVER
903 int dkwedge_autodiscover = 1;
904 #else
905 int dkwedge_autodiscover = 0;
906 #endif
907
908 /*
909 * dkwedge_discover: [exported function]
910 *
911 * Discover the wedges on a newly attached disk.
912 * Remove all unused wedges on the disk first.
913 */
914 void
915 dkwedge_discover(struct disk *pdk)
916 {
917 struct dkwedge_discovery_method *ddm;
918 struct vnode *vp;
919 int error;
920 dev_t pdev;
921
922 /*
923 * Require people playing with wedges to enable this explicitly.
924 */
925 if (dkwedge_autodiscover == 0)
926 return;
927
928 rw_enter(&dkwedge_discovery_methods_lock, RW_READER);
929
930 /*
931 * Use the character device for scanning, the block device
932 * is busy if there are already wedges attached.
933 */
934 error = dkwedge_compute_pdev(pdk->dk_name, &pdev, VCHR);
935 if (error) {
936 aprint_error("%s: unable to compute pdev, error = %d\n",
937 pdk->dk_name, error);
938 goto out;
939 }
940
941 error = cdevvp(pdev, &vp);
942 if (error) {
943 aprint_error("%s: unable to find vnode for pdev, error = %d\n",
944 pdk->dk_name, error);
945 goto out;
946 }
947
948 error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
949 if (error) {
950 aprint_error("%s: unable to lock vnode for pdev, error = %d\n",
951 pdk->dk_name, error);
952 vrele(vp);
953 goto out;
954 }
955
956 error = VOP_OPEN(vp, FREAD | FSILENT, NOCRED);
957 if (error) {
958 if (error != ENODEV)
959 aprint_error("%s: unable to open device, error = %d\n",
960 pdk->dk_name, error);
961 vput(vp);
962 goto out;
963 }
964 VOP_UNLOCK(vp);
965
966 /*
967 * Remove unused wedges
968 */
969 dkwedge_delall1(pdk, true);
970
971 /*
972 * For each supported partition map type, look to see if
973 * this map type exists. If so, parse it and add the
974 * corresponding wedges.
975 */
976 LIST_FOREACH(ddm, &dkwedge_discovery_methods, ddm_list) {
977 error = (*ddm->ddm_discover)(pdk, vp);
978 if (error == 0) {
979 /* Successfully created wedges; we're done. */
980 break;
981 }
982 }
983
984 error = vn_close(vp, FREAD, NOCRED);
985 if (error) {
986 aprint_error("%s: unable to close device, error = %d\n",
987 pdk->dk_name, error);
988 /* We'll just assume the vnode has been cleaned up. */
989 }
990
991 out:
992 rw_exit(&dkwedge_discovery_methods_lock);
993 }
994
995 /*
996 * dkwedge_read:
997 *
998 * Read some data from the specified disk, used for
999 * partition discovery.
1000 */
1001 int
1002 dkwedge_read(struct disk *pdk, struct vnode *vp, daddr_t blkno,
1003 void *tbuf, size_t len)
1004 {
1005 buf_t *bp;
1006 int error;
1007 bool isopen;
1008 dev_t bdev;
1009 struct vnode *bdvp;
1010
1011 /*
1012 * The kernel cannot read from a character device vnode
1013 * as physio() only handles user memory.
1014 *
1015 * If the block device has already been opened by a wedge
1016 * use that vnode and temporarily bump the open counter.
1017 *
1018 * Otherwise try to open the block device.
1019 */
1020
1021 bdev = devsw_chr2blk(vp->v_rdev);
1022
1023 mutex_enter(&pdk->dk_rawlock);
1024 if (pdk->dk_rawopens != 0) {
1025 KASSERT(pdk->dk_rawvp != NULL);
1026 isopen = true;
1027 ++pdk->dk_rawopens;
1028 bdvp = pdk->dk_rawvp;
1029 error = 0;
1030 } else {
1031 isopen = false;
1032 error = dk_open_parent(bdev, FREAD, &bdvp);
1033 }
1034 mutex_exit(&pdk->dk_rawlock);
1035
1036 if (error)
1037 return error;
1038
1039 bp = getiobuf(bdvp, true);
1040 bp->b_flags = B_READ;
1041 bp->b_cflags = BC_BUSY;
1042 bp->b_dev = bdev;
1043 bp->b_data = tbuf;
1044 bp->b_bufsize = bp->b_bcount = len;
1045 bp->b_blkno = blkno;
1046 bp->b_cylinder = 0;
1047 bp->b_error = 0;
1048
1049 VOP_STRATEGY(bdvp, bp);
1050 error = biowait(bp);
1051 putiobuf(bp);
1052
1053 mutex_enter(&pdk->dk_rawlock);
1054 if (isopen) {
1055 --pdk->dk_rawopens;
1056 } else {
1057 dk_close_parent(bdvp, FREAD);
1058 }
1059 mutex_exit(&pdk->dk_rawlock);
1060
1061 return error;
1062 }
1063
1064 /*
1065 * dkwedge_lookup:
1066 *
1067 * Look up a dkwedge_softc based on the provided dev_t.
1068 */
1069 static struct dkwedge_softc *
1070 dkwedge_lookup(dev_t dev)
1071 {
1072 int unit = minor(dev);
1073
1074 if (unit >= ndkwedges)
1075 return (NULL);
1076
1077 KASSERT(dkwedges != NULL);
1078
1079 return (dkwedges[unit]);
1080 }
1081
1082 static int
1083 dk_open_parent(dev_t dev, int mode, struct vnode **vpp)
1084 {
1085 struct vnode *vp;
1086 int error;
1087
1088 error = bdevvp(dev, &vp);
1089 if (error)
1090 return error;
1091
1092 error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1093 if (error) {
1094 vrele(vp);
1095 return error;
1096 }
1097 error = VOP_OPEN(vp, mode, NOCRED);
1098 if (error) {
1099 vput(vp);
1100 return error;
1101 }
1102
1103 /* VOP_OPEN() doesn't do this for us. */
1104 if (mode & FWRITE) {
1105 mutex_enter(vp->v_interlock);
1106 vp->v_writecount++;
1107 mutex_exit(vp->v_interlock);
1108 }
1109
1110 VOP_UNLOCK(vp);
1111
1112 *vpp = vp;
1113
1114 return 0;
1115 }
1116
1117 static int
1118 dk_close_parent(struct vnode *vp, int mode)
1119 {
1120 int error;
1121
1122 error = vn_close(vp, mode, NOCRED);
1123 return error;
1124 }
1125
1126 /*
1127 * dkopen: [devsw entry point]
1128 *
1129 * Open a wedge.
1130 */
1131 static int
1132 dkopen(dev_t dev, int flags, int fmt, struct lwp *l)
1133 {
1134 struct dkwedge_softc *sc = dkwedge_lookup(dev);
1135 struct dkwedge_softc *nsc;
1136 struct vnode *vp;
1137 int error = 0;
1138 int mode;
1139
1140 if (sc == NULL)
1141 return (ENODEV);
1142 if (sc->sc_state != DKW_STATE_RUNNING)
1143 return (ENXIO);
1144
1145 /*
1146 * We go through a complicated little dance to only open the parent
1147 * vnode once per wedge, no matter how many times the wedge is
1148 * opened. The reason? We see one dkopen() per open call, but
1149 * only dkclose() on the last close.
1150 */
1151 mutex_enter(&sc->sc_dk.dk_openlock);
1152 mutex_enter(&sc->sc_parent->dk_rawlock);
1153 if (sc->sc_dk.dk_openmask == 0) {
1154 if (sc->sc_parent->dk_rawopens == 0) {
1155 KASSERT(sc->sc_parent->dk_rawvp == NULL);
1156 /*
1157 * Try open read-write. If this fails for EROFS
1158 * and wedge is read-only, retry to open read-only.
1159 */
1160 mode = FREAD | FWRITE;
1161 error = dk_open_parent(sc->sc_pdev, mode, &vp);
1162 if (error == EROFS && (flags & FWRITE) == 0) {
1163 mode &= ~FWRITE;
1164 error = dk_open_parent(sc->sc_pdev, mode, &vp);
1165 }
1166 if (error)
1167 goto popen_fail;
1168 sc->sc_parent->dk_rawvp = vp;
1169 } else {
1170 /*
1171 * Retrieve mode from an already opened wedge.
1172 */
1173 mode = 0;
1174 LIST_FOREACH(nsc, &sc->sc_parent->dk_wedges, sc_plink) {
1175 if (nsc == sc || nsc->sc_dk.dk_openmask == 0)
1176 continue;
1177 mode = nsc->sc_mode;
1178 break;
1179 }
1180 }
1181 sc->sc_mode = mode;
1182 sc->sc_parent->dk_rawopens++;
1183 }
1184 KASSERT(sc->sc_mode != 0);
1185 if (flags & ~sc->sc_mode & FWRITE) {
1186 error = EROFS;
1187 goto popen_fail;
1188 }
1189 if (fmt == S_IFCHR)
1190 sc->sc_dk.dk_copenmask |= 1;
1191 else
1192 sc->sc_dk.dk_bopenmask |= 1;
1193 sc->sc_dk.dk_openmask =
1194 sc->sc_dk.dk_copenmask | sc->sc_dk.dk_bopenmask;
1195
1196 popen_fail:
1197 mutex_exit(&sc->sc_parent->dk_rawlock);
1198 mutex_exit(&sc->sc_dk.dk_openlock);
1199 return (error);
1200 }
1201
1202 /*
1203 * Caller must hold sc->sc_dk.dk_openlock and sc->sc_parent->dk_rawlock.
1204 */
1205 static int
1206 dklastclose(struct dkwedge_softc *sc)
1207 {
1208 struct vnode *vp;
1209 int error = 0, mode;
1210
1211 mode = sc->sc_mode;
1212
1213 vp = NULL;
1214 if (sc->sc_parent->dk_rawopens > 0) {
1215 if (--sc->sc_parent->dk_rawopens == 0) {
1216 KASSERT(sc->sc_parent->dk_rawvp != NULL);
1217 vp = sc->sc_parent->dk_rawvp;
1218 sc->sc_parent->dk_rawvp = NULL;
1219 sc->sc_mode = 0;
1220 }
1221 }
1222
1223 if (vp) {
1224 dk_close_parent(vp, mode);
1225 }
1226
1227 mutex_exit(&sc->sc_parent->dk_rawlock);
1228 mutex_exit(&sc->sc_dk.dk_openlock);
1229
1230 return error;
1231 }
1232
1233 /*
1234 * dkclose: [devsw entry point]
1235 *
1236 * Close a wedge.
1237 */
1238 static int
1239 dkclose(dev_t dev, int flags, int fmt, struct lwp *l)
1240 {
1241 struct dkwedge_softc *sc = dkwedge_lookup(dev);
1242 int error = 0;
1243
1244 if (sc == NULL)
1245 return (ENODEV);
1246 if (sc->sc_state != DKW_STATE_RUNNING)
1247 return (ENXIO);
1248
1249 KASSERT(sc->sc_dk.dk_openmask != 0);
1250
1251 mutex_enter(&sc->sc_dk.dk_openlock);
1252 mutex_enter(&sc->sc_parent->dk_rawlock);
1253
1254 if (fmt == S_IFCHR)
1255 sc->sc_dk.dk_copenmask &= ~1;
1256 else
1257 sc->sc_dk.dk_bopenmask &= ~1;
1258 sc->sc_dk.dk_openmask =
1259 sc->sc_dk.dk_copenmask | sc->sc_dk.dk_bopenmask;
1260
1261 if (sc->sc_dk.dk_openmask == 0) {
1262 error = dklastclose(sc); /* releases locks */
1263 } else {
1264 mutex_exit(&sc->sc_parent->dk_rawlock);
1265 mutex_exit(&sc->sc_dk.dk_openlock);
1266 }
1267
1268 return (error);
1269 }
1270
1271 /*
1272 * dkstragegy: [devsw entry point]
1273 *
1274 * Perform I/O based on the wedge I/O strategy.
1275 */
1276 static void
1277 dkstrategy(struct buf *bp)
1278 {
1279 struct dkwedge_softc *sc = dkwedge_lookup(bp->b_dev);
1280 uint64_t p_size, p_offset;
1281
1282 if (sc == NULL) {
1283 bp->b_error = ENODEV;
1284 goto done;
1285 }
1286
1287 if (sc->sc_state != DKW_STATE_RUNNING ||
1288 sc->sc_parent->dk_rawvp == NULL) {
1289 bp->b_error = ENXIO;
1290 goto done;
1291 }
1292
1293 /* If it's an empty transfer, wake up the top half now. */
1294 if (bp->b_bcount == 0)
1295 goto done;
1296
1297 p_offset = sc->sc_offset << sc->sc_parent->dk_blkshift;
1298 p_size = sc->sc_size << sc->sc_parent->dk_blkshift;
1299
1300 /* Make sure it's in-range. */
1301 if (bounds_check_with_mediasize(bp, DEV_BSIZE, p_size) <= 0)
1302 goto done;
1303
1304 /* Translate it to the parent's raw LBA. */
1305 bp->b_rawblkno = bp->b_blkno + p_offset;
1306
1307 /* Place it in the queue and start I/O on the unit. */
1308 mutex_enter(&sc->sc_iolock);
1309 sc->sc_iopend++;
1310 disk_wait(&sc->sc_dk);
1311 bufq_put(sc->sc_bufq, bp);
1312 mutex_exit(&sc->sc_iolock);
1313
1314 dkstart(sc);
1315 return;
1316
1317 done:
1318 bp->b_resid = bp->b_bcount;
1319 biodone(bp);
1320 }
1321
1322 /*
1323 * dkstart:
1324 *
1325 * Start I/O that has been enqueued on the wedge.
1326 */
1327 static void
1328 dkstart(struct dkwedge_softc *sc)
1329 {
1330 struct vnode *vp;
1331 struct buf *bp, *nbp;
1332
1333 mutex_enter(&sc->sc_iolock);
1334
1335 /* Do as much work as has been enqueued. */
1336 while ((bp = bufq_peek(sc->sc_bufq)) != NULL) {
1337 if (sc->sc_state != DKW_STATE_RUNNING) {
1338 (void) bufq_get(sc->sc_bufq);
1339 if (--sc->sc_iopend == 0)
1340 cv_broadcast(&sc->sc_dkdrn);
1341 mutex_exit(&sc->sc_iolock);
1342 bp->b_error = ENXIO;
1343 bp->b_resid = bp->b_bcount;
1344 biodone(bp);
1345 mutex_enter(&sc->sc_iolock);
1346 continue;
1347 }
1348
1349 /* fetch an I/O buf with sc_iolock dropped */
1350 mutex_exit(&sc->sc_iolock);
1351 nbp = getiobuf(sc->sc_parent->dk_rawvp, false);
1352 mutex_enter(&sc->sc_iolock);
1353 if (nbp == NULL) {
1354 /*
1355 * No resources to run this request; leave the
1356 * buffer queued up, and schedule a timer to
1357 * restart the queue in 1/2 a second.
1358 */
1359 callout_schedule(&sc->sc_restart_ch, hz / 2);
1360 break;
1361 }
1362
1363 /*
1364 * fetch buf, this can fail if another thread
1365 * has already processed the queue, it can also
1366 * return a completely different buf.
1367 */
1368 bp = bufq_get(sc->sc_bufq);
1369 if (bp == NULL) {
1370 mutex_exit(&sc->sc_iolock);
1371 putiobuf(nbp);
1372 mutex_enter(&sc->sc_iolock);
1373 continue;
1374 }
1375
1376 /* Instrumentation. */
1377 disk_busy(&sc->sc_dk);
1378
1379 /* release lock for VOP_STRATEGY */
1380 mutex_exit(&sc->sc_iolock);
1381
1382 nbp->b_data = bp->b_data;
1383 nbp->b_flags = bp->b_flags;
1384 nbp->b_oflags = bp->b_oflags;
1385 nbp->b_cflags = bp->b_cflags;
1386 nbp->b_iodone = dkiodone;
1387 nbp->b_proc = bp->b_proc;
1388 nbp->b_blkno = bp->b_rawblkno;
1389 nbp->b_dev = sc->sc_parent->dk_rawvp->v_rdev;
1390 nbp->b_bcount = bp->b_bcount;
1391 nbp->b_private = bp;
1392 BIO_COPYPRIO(nbp, bp);
1393
1394 vp = nbp->b_vp;
1395 if ((nbp->b_flags & B_READ) == 0) {
1396 mutex_enter(vp->v_interlock);
1397 vp->v_numoutput++;
1398 mutex_exit(vp->v_interlock);
1399 }
1400 VOP_STRATEGY(vp, nbp);
1401
1402 mutex_enter(&sc->sc_iolock);
1403 }
1404
1405 mutex_exit(&sc->sc_iolock);
1406 }
1407
1408 /*
1409 * dkiodone:
1410 *
1411 * I/O to a wedge has completed; alert the top half.
1412 */
1413 static void
1414 dkiodone(struct buf *bp)
1415 {
1416 struct buf *obp = bp->b_private;
1417 struct dkwedge_softc *sc = dkwedge_lookup(obp->b_dev);
1418
1419 if (bp->b_error != 0)
1420 obp->b_error = bp->b_error;
1421 obp->b_resid = bp->b_resid;
1422 putiobuf(bp);
1423
1424 mutex_enter(&sc->sc_iolock);
1425 if (--sc->sc_iopend == 0)
1426 cv_broadcast(&sc->sc_dkdrn);
1427
1428 disk_unbusy(&sc->sc_dk, obp->b_bcount - obp->b_resid,
1429 obp->b_flags & B_READ);
1430 mutex_exit(&sc->sc_iolock);
1431
1432 biodone(obp);
1433
1434 /* Kick the queue in case there is more work we can do. */
1435 dkstart(sc);
1436 }
1437
1438 /*
1439 * dkrestart:
1440 *
1441 * Restart the work queue after it was stalled due to
1442 * a resource shortage. Invoked via a callout.
1443 */
1444 static void
1445 dkrestart(void *v)
1446 {
1447 struct dkwedge_softc *sc = v;
1448
1449 dkstart(sc);
1450 }
1451
1452 /*
1453 * dkminphys:
1454 *
1455 * Call parent's minphys function.
1456 */
1457 static void
1458 dkminphys(struct buf *bp)
1459 {
1460 struct dkwedge_softc *sc = dkwedge_lookup(bp->b_dev);
1461 dev_t dev;
1462
1463 dev = bp->b_dev;
1464 bp->b_dev = sc->sc_pdev;
1465 if (sc->sc_parent->dk_driver && sc->sc_parent->dk_driver->d_minphys)
1466 (*sc->sc_parent->dk_driver->d_minphys)(bp);
1467 else
1468 minphys(bp);
1469 bp->b_dev = dev;
1470 }
1471
1472 /*
1473 * dkread: [devsw entry point]
1474 *
1475 * Read from a wedge.
1476 */
1477 static int
1478 dkread(dev_t dev, struct uio *uio, int flags)
1479 {
1480 struct dkwedge_softc *sc = dkwedge_lookup(dev);
1481
1482 if (sc == NULL)
1483 return (ENODEV);
1484 if (sc->sc_state != DKW_STATE_RUNNING)
1485 return (ENXIO);
1486
1487 return (physio(dkstrategy, NULL, dev, B_READ, dkminphys, uio));
1488 }
1489
1490 /*
1491 * dkwrite: [devsw entry point]
1492 *
1493 * Write to a wedge.
1494 */
1495 static int
1496 dkwrite(dev_t dev, struct uio *uio, int flags)
1497 {
1498 struct dkwedge_softc *sc = dkwedge_lookup(dev);
1499
1500 if (sc == NULL)
1501 return (ENODEV);
1502 if (sc->sc_state != DKW_STATE_RUNNING)
1503 return (ENXIO);
1504
1505 return (physio(dkstrategy, NULL, dev, B_WRITE, dkminphys, uio));
1506 }
1507
1508 /*
1509 * dkioctl: [devsw entry point]
1510 *
1511 * Perform an ioctl request on a wedge.
1512 */
1513 static int
1514 dkioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
1515 {
1516 struct dkwedge_softc *sc = dkwedge_lookup(dev);
1517 int error = 0;
1518
1519 if (sc == NULL)
1520 return (ENODEV);
1521 if (sc->sc_state != DKW_STATE_RUNNING)
1522 return (ENXIO);
1523 if (sc->sc_parent->dk_rawvp == NULL)
1524 return (ENXIO);
1525
1526 /*
1527 * We pass NODEV instead of our device to indicate we don't
1528 * want to handle disklabel ioctls
1529 */
1530 error = disk_ioctl(&sc->sc_dk, NODEV, cmd, data, flag, l);
1531 if (error != EPASSTHROUGH)
1532 return (error);
1533
1534 error = 0;
1535
1536 switch (cmd) {
1537 case DIOCGSTRATEGY:
1538 case DIOCGCACHE:
1539 case DIOCCACHESYNC:
1540 error = VOP_IOCTL(sc->sc_parent->dk_rawvp, cmd, data, flag,
1541 l != NULL ? l->l_cred : NOCRED);
1542 break;
1543 case DIOCGWEDGEINFO:
1544 {
1545 struct dkwedge_info *dkw = (void *) data;
1546
1547 strlcpy(dkw->dkw_devname, device_xname(sc->sc_dev),
1548 sizeof(dkw->dkw_devname));
1549 memcpy(dkw->dkw_wname, sc->sc_wname, sizeof(dkw->dkw_wname));
1550 dkw->dkw_wname[sizeof(dkw->dkw_wname) - 1] = '\0';
1551 strlcpy(dkw->dkw_parent, sc->sc_parent->dk_name,
1552 sizeof(dkw->dkw_parent));
1553 dkw->dkw_offset = sc->sc_offset;
1554 dkw->dkw_size = sc->sc_size;
1555 strlcpy(dkw->dkw_ptype, sc->sc_ptype, sizeof(dkw->dkw_ptype));
1556
1557 break;
1558 }
1559 case DIOCGSECTORALIGN:
1560 {
1561 struct disk_sectoralign *dsa = data;
1562 uint32_t r;
1563
1564 error = VOP_IOCTL(sc->sc_parent->dk_rawvp, cmd, dsa, flag,
1565 l != NULL ? l->l_cred : NOCRED);
1566 if (error)
1567 break;
1568
1569 r = sc->sc_offset % dsa->dsa_alignment;
1570 if (r < dsa->dsa_firstaligned)
1571 dsa->dsa_firstaligned = dsa->dsa_firstaligned - r;
1572 else
1573 dsa->dsa_firstaligned = (dsa->dsa_firstaligned +
1574 dsa->dsa_alignment) - r;
1575 break;
1576 }
1577 default:
1578 error = ENOTTY;
1579 }
1580
1581 return (error);
1582 }
1583
1584 /*
1585 * dkdiscard: [devsw entry point]
1586 *
1587 * Perform a discard-range request on a wedge.
1588 */
1589 static int
1590 dkdiscard(dev_t dev, off_t pos, off_t len)
1591 {
1592 struct dkwedge_softc *sc = dkwedge_lookup(dev);
1593 unsigned shift;
1594 off_t offset, maxlen;
1595 int error;
1596
1597 if (sc == NULL)
1598 return (ENODEV);
1599 if (sc->sc_state != DKW_STATE_RUNNING)
1600 return (ENXIO);
1601 if (sc->sc_parent->dk_rawvp == NULL)
1602 return (ENXIO);
1603
1604 shift = (sc->sc_parent->dk_blkshift + DEV_BSHIFT);
1605 KASSERT(__type_fit(off_t, sc->sc_size));
1606 KASSERT(__type_fit(off_t, sc->sc_offset));
1607 KASSERT(0 <= sc->sc_offset);
1608 KASSERT(sc->sc_size <= (__type_max(off_t) >> shift));
1609 KASSERT(sc->sc_offset <= ((__type_max(off_t) >> shift) - sc->sc_size));
1610 offset = ((off_t)sc->sc_offset << shift);
1611 maxlen = ((off_t)sc->sc_size << shift);
1612
1613 if (len > maxlen)
1614 return (EINVAL);
1615 if (pos > (maxlen - len))
1616 return (EINVAL);
1617
1618 pos += offset;
1619
1620 vn_lock(sc->sc_parent->dk_rawvp, LK_EXCLUSIVE | LK_RETRY);
1621 error = VOP_FDISCARD(sc->sc_parent->dk_rawvp, pos, len);
1622 VOP_UNLOCK(sc->sc_parent->dk_rawvp);
1623
1624 return error;
1625 }
1626
1627 /*
1628 * dksize: [devsw entry point]
1629 *
1630 * Query the size of a wedge for the purpose of performing a dump
1631 * or for swapping to.
1632 */
1633 static int
1634 dksize(dev_t dev)
1635 {
1636 struct dkwedge_softc *sc = dkwedge_lookup(dev);
1637 uint64_t p_size;
1638 int rv = -1;
1639
1640 if (sc == NULL)
1641 return (-1);
1642 if (sc->sc_state != DKW_STATE_RUNNING)
1643 return (-1);
1644
1645 mutex_enter(&sc->sc_dk.dk_openlock);
1646 mutex_enter(&sc->sc_parent->dk_rawlock);
1647
1648 /* Our content type is static, no need to open the device. */
1649
1650 p_size = sc->sc_size << sc->sc_parent->dk_blkshift;
1651 if (strcmp(sc->sc_ptype, DKW_PTYPE_SWAP) == 0) {
1652 /* Saturate if we are larger than INT_MAX. */
1653 if (p_size > INT_MAX)
1654 rv = INT_MAX;
1655 else
1656 rv = (int) p_size;
1657 }
1658
1659 mutex_exit(&sc->sc_parent->dk_rawlock);
1660 mutex_exit(&sc->sc_dk.dk_openlock);
1661
1662 return (rv);
1663 }
1664
1665 /*
1666 * dkdump: [devsw entry point]
1667 *
1668 * Perform a crash dump to a wedge.
1669 */
1670 static int
1671 dkdump(dev_t dev, daddr_t blkno, void *va, size_t size)
1672 {
1673 struct dkwedge_softc *sc = dkwedge_lookup(dev);
1674 const struct bdevsw *bdev;
1675 uint64_t p_size, p_offset;
1676 int rv = 0;
1677
1678 if (sc == NULL)
1679 return (ENODEV);
1680 if (sc->sc_state != DKW_STATE_RUNNING)
1681 return (ENXIO);
1682
1683 mutex_enter(&sc->sc_dk.dk_openlock);
1684 mutex_enter(&sc->sc_parent->dk_rawlock);
1685
1686 /* Our content type is static, no need to open the device. */
1687
1688 if (strcmp(sc->sc_ptype, DKW_PTYPE_SWAP) != 0 &&
1689 strcmp(sc->sc_ptype, DKW_PTYPE_RAID) != 0 &&
1690 strcmp(sc->sc_ptype, DKW_PTYPE_CGD) != 0) {
1691 rv = ENXIO;
1692 goto out;
1693 }
1694 if (size % DEV_BSIZE != 0) {
1695 rv = EINVAL;
1696 goto out;
1697 }
1698
1699 p_offset = sc->sc_offset << sc->sc_parent->dk_blkshift;
1700 p_size = sc->sc_size << sc->sc_parent->dk_blkshift;
1701
1702 if (blkno < 0 || blkno + size / DEV_BSIZE > p_size) {
1703 printf("%s: blkno (%" PRIu64 ") + size / DEV_BSIZE (%zu) > "
1704 "p_size (%" PRIu64 ")\n", __func__, blkno,
1705 size / DEV_BSIZE, p_size);
1706 rv = EINVAL;
1707 goto out;
1708 }
1709
1710 bdev = bdevsw_lookup(sc->sc_pdev);
1711 rv = (*bdev->d_dump)(sc->sc_pdev, blkno + p_offset, va, size);
1712
1713 out:
1714 mutex_exit(&sc->sc_parent->dk_rawlock);
1715 mutex_exit(&sc->sc_dk.dk_openlock);
1716
1717 return rv;
1718 }
1719
1720 /*
1721 * config glue
1722 */
1723
1724 /*
1725 * dkwedge_find_partition
1726 *
1727 * Find wedge corresponding to the specified parent name
1728 * and offset/length.
1729 */
1730 device_t
1731 dkwedge_find_partition(device_t parent, daddr_t startblk, uint64_t nblks)
1732 {
1733 struct dkwedge_softc *sc;
1734 int i;
1735 device_t wedge = NULL;
1736
1737 rw_enter(&dkwedges_lock, RW_READER);
1738 for (i = 0; i < ndkwedges; i++) {
1739 if ((sc = dkwedges[i]) == NULL)
1740 continue;
1741 if (strcmp(sc->sc_parent->dk_name, device_xname(parent)) == 0 &&
1742 sc->sc_offset == startblk &&
1743 sc->sc_size == nblks) {
1744 if (wedge) {
1745 printf("WARNING: double match for boot wedge "
1746 "(%s, %s)\n",
1747 device_xname(wedge),
1748 device_xname(sc->sc_dev));
1749 continue;
1750 }
1751 wedge = sc->sc_dev;
1752 }
1753 }
1754 rw_exit(&dkwedges_lock);
1755
1756 return wedge;
1757 }
1758
1759 const char *
1760 dkwedge_get_parent_name(dev_t dev)
1761 {
1762 /* XXX: perhaps do this in lookup? */
1763 int bmaj = bdevsw_lookup_major(&dk_bdevsw);
1764 int cmaj = cdevsw_lookup_major(&dk_cdevsw);
1765 if (major(dev) != bmaj && major(dev) != cmaj)
1766 return NULL;
1767 struct dkwedge_softc *sc = dkwedge_lookup(dev);
1768 if (sc == NULL)
1769 return NULL;
1770 return sc->sc_parent->dk_name;
1771 }
1772