dk.c revision 1.133 1 /* $NetBSD: dk.c,v 1.133 2023/04/21 18:25:22 riastradh Exp $ */
2
3 /*-
4 * Copyright (c) 2004, 2005, 2006, 2007 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 #include <sys/cdefs.h>
33 __KERNEL_RCSID(0, "$NetBSD: dk.c,v 1.133 2023/04/21 18:25:22 riastradh Exp $");
34
35 #ifdef _KERNEL_OPT
36 #include "opt_dkwedge.h"
37 #endif
38
39 #include <sys/param.h>
40 #include <sys/types.h>
41
42 #include <sys/buf.h>
43 #include <sys/bufq.h>
44 #include <sys/callout.h>
45 #include <sys/conf.h>
46 #include <sys/device.h>
47 #include <sys/disk.h>
48 #include <sys/disklabel.h>
49 #include <sys/errno.h>
50 #include <sys/fcntl.h>
51 #include <sys/ioctl.h>
52 #include <sys/kauth.h>
53 #include <sys/kernel.h>
54 #include <sys/malloc.h>
55 #include <sys/pool.h>
56 #include <sys/proc.h>
57 #include <sys/stat.h>
58 #include <sys/systm.h>
59 #include <sys/vnode.h>
60
61 #include <miscfs/specfs/specdev.h>
62
63 MALLOC_DEFINE(M_DKWEDGE, "dkwedge", "Disk wedge structures");
64
65 typedef enum {
66 DKW_STATE_LARVAL = 0,
67 DKW_STATE_RUNNING = 1,
68 DKW_STATE_DYING = 2,
69 DKW_STATE_DEAD = 666
70 } dkwedge_state_t;
71
72 struct dkwedge_softc {
73 device_t sc_dev; /* pointer to our pseudo-device */
74 struct cfdata sc_cfdata; /* our cfdata structure */
75 uint8_t sc_wname[128]; /* wedge name (Unicode, UTF-8) */
76
77 dkwedge_state_t sc_state; /* state this wedge is in */
78
79 struct disk *sc_parent; /* parent disk */
80 daddr_t sc_offset; /* LBA offset of wedge in parent */
81 uint64_t sc_size; /* size of wedge in blocks */
82 char sc_ptype[32]; /* partition type */
83 dev_t sc_pdev; /* cached parent's dev_t */
84 /* link on parent's wedge list */
85 LIST_ENTRY(dkwedge_softc) sc_plink;
86
87 struct disk sc_dk; /* our own disk structure */
88 struct bufq_state *sc_bufq; /* buffer queue */
89 struct callout sc_restart_ch; /* callout to restart I/O */
90
91 kmutex_t sc_iolock;
92 kcondvar_t sc_dkdrn;
93 u_int sc_iopend; /* I/Os pending */
94 int sc_mode; /* parent open mode */
95 };
96
97 static void dkstart(struct dkwedge_softc *);
98 static void dkiodone(struct buf *);
99 static void dkrestart(void *);
100 static void dkminphys(struct buf *);
101
102 static int dkfirstopen(struct dkwedge_softc *, int);
103 static void dklastclose(struct dkwedge_softc *);
104 static int dkwedge_cleanup_parent(struct dkwedge_softc *, int);
105 static int dkwedge_detach(device_t, int);
106 static void dkwedge_delall1(struct disk *, bool);
107 static int dkwedge_del1(struct dkwedge_info *, int);
108 static int dk_open_parent(dev_t, int, struct vnode **);
109 static int dk_close_parent(struct vnode *, int);
110
111 static dev_type_open(dkopen);
112 static dev_type_close(dkclose);
113 static dev_type_read(dkread);
114 static dev_type_write(dkwrite);
115 static dev_type_ioctl(dkioctl);
116 static dev_type_strategy(dkstrategy);
117 static dev_type_dump(dkdump);
118 static dev_type_size(dksize);
119 static dev_type_discard(dkdiscard);
120
121 const struct bdevsw dk_bdevsw = {
122 .d_open = dkopen,
123 .d_close = dkclose,
124 .d_strategy = dkstrategy,
125 .d_ioctl = dkioctl,
126 .d_dump = dkdump,
127 .d_psize = dksize,
128 .d_discard = dkdiscard,
129 .d_flag = D_DISK | D_MPSAFE
130 };
131
132 const struct cdevsw dk_cdevsw = {
133 .d_open = dkopen,
134 .d_close = dkclose,
135 .d_read = dkread,
136 .d_write = dkwrite,
137 .d_ioctl = dkioctl,
138 .d_stop = nostop,
139 .d_tty = notty,
140 .d_poll = nopoll,
141 .d_mmap = nommap,
142 .d_kqfilter = nokqfilter,
143 .d_discard = dkdiscard,
144 .d_flag = D_DISK | D_MPSAFE
145 };
146
147 static struct dkwedge_softc **dkwedges;
148 static u_int ndkwedges;
149 static krwlock_t dkwedges_lock;
150
151 static LIST_HEAD(, dkwedge_discovery_method) dkwedge_discovery_methods;
152 static krwlock_t dkwedge_discovery_methods_lock;
153
154 /*
155 * dkwedge_match:
156 *
157 * Autoconfiguration match function for pseudo-device glue.
158 */
159 static int
160 dkwedge_match(device_t parent, cfdata_t match, void *aux)
161 {
162
163 /* Pseudo-device; always present. */
164 return 1;
165 }
166
167 /*
168 * dkwedge_attach:
169 *
170 * Autoconfiguration attach function for pseudo-device glue.
171 */
172 static void
173 dkwedge_attach(device_t parent, device_t self, void *aux)
174 {
175
176 if (!pmf_device_register(self, NULL, NULL))
177 aprint_error_dev(self, "couldn't establish power handler\n");
178 }
179
180 CFDRIVER_DECL(dk, DV_DISK, NULL);
181 CFATTACH_DECL3_NEW(dk, 0,
182 dkwedge_match, dkwedge_attach, dkwedge_detach, NULL, NULL, NULL,
183 DVF_DETACH_SHUTDOWN);
184
185 /*
186 * dkwedge_wait_drain:
187 *
188 * Wait for I/O on the wedge to drain.
189 */
190 static void
191 dkwedge_wait_drain(struct dkwedge_softc *sc)
192 {
193
194 mutex_enter(&sc->sc_iolock);
195 while (sc->sc_iopend != 0)
196 cv_wait(&sc->sc_dkdrn, &sc->sc_iolock);
197 mutex_exit(&sc->sc_iolock);
198 }
199
200 /*
201 * dkwedge_compute_pdev:
202 *
203 * Compute the parent disk's dev_t.
204 */
205 static int
206 dkwedge_compute_pdev(const char *pname, dev_t *pdevp, enum vtype type)
207 {
208 const char *name, *cp;
209 devmajor_t pmaj;
210 int punit;
211 char devname[16];
212
213 name = pname;
214 switch (type) {
215 case VBLK:
216 pmaj = devsw_name2blk(name, devname, sizeof(devname));
217 break;
218 case VCHR:
219 pmaj = devsw_name2chr(name, devname, sizeof(devname));
220 break;
221 default:
222 pmaj = NODEVMAJOR;
223 break;
224 }
225 if (pmaj == NODEVMAJOR)
226 return ENXIO;
227
228 name += strlen(devname);
229 for (cp = name, punit = 0; *cp >= '0' && *cp <= '9'; cp++)
230 punit = (punit * 10) + (*cp - '0');
231 if (cp == name) {
232 /* Invalid parent disk name. */
233 return ENXIO;
234 }
235
236 *pdevp = MAKEDISKDEV(pmaj, punit, RAW_PART);
237
238 return 0;
239 }
240
241 /*
242 * dkwedge_array_expand:
243 *
244 * Expand the dkwedges array.
245 *
246 * Releases and reacquires dkwedges_lock as a writer.
247 */
248 static int
249 dkwedge_array_expand(void)
250 {
251
252 const unsigned incr = 16;
253 unsigned newcnt, oldcnt;
254 struct dkwedge_softc **newarray = NULL, **oldarray = NULL;
255
256 KASSERT(rw_write_held(&dkwedges_lock));
257
258 oldcnt = ndkwedges;
259 oldarray = dkwedges;
260
261 if (oldcnt >= INT_MAX - incr)
262 return ENFILE; /* XXX */
263 newcnt = oldcnt + incr;
264
265 rw_exit(&dkwedges_lock);
266 newarray = malloc(newcnt * sizeof(*newarray), M_DKWEDGE,
267 M_WAITOK|M_ZERO);
268 rw_enter(&dkwedges_lock, RW_WRITER);
269
270 if (ndkwedges != oldcnt || dkwedges != oldarray) {
271 oldarray = NULL; /* already recycled */
272 goto out;
273 }
274
275 if (oldarray != NULL)
276 memcpy(newarray, dkwedges, ndkwedges * sizeof(*newarray));
277 dkwedges = newarray;
278 newarray = NULL; /* transferred to dkwedges */
279 ndkwedges = newcnt;
280
281 out: rw_exit(&dkwedges_lock);
282 if (oldarray != NULL)
283 free(oldarray, M_DKWEDGE);
284 if (newarray != NULL)
285 free(newarray, M_DKWEDGE);
286 rw_enter(&dkwedges_lock, RW_WRITER);
287 return 0;
288 }
289
290 static void
291 dk_set_geometry(struct dkwedge_softc *sc, struct disk *pdk)
292 {
293 struct disk *dk = &sc->sc_dk;
294 struct disk_geom *dg = &dk->dk_geom;
295
296 memset(dg, 0, sizeof(*dg));
297
298 dg->dg_secperunit = sc->sc_size;
299 dg->dg_secsize = DEV_BSIZE << pdk->dk_blkshift;
300
301 /* fake numbers, 1 cylinder is 1 MB with default sector size */
302 dg->dg_nsectors = 32;
303 dg->dg_ntracks = 64;
304 dg->dg_ncylinders =
305 dg->dg_secperunit / (dg->dg_nsectors * dg->dg_ntracks);
306
307 disk_set_info(sc->sc_dev, dk, NULL);
308 }
309
310 /*
311 * dkwedge_add: [exported function]
312 *
313 * Add a disk wedge based on the provided information.
314 *
315 * The incoming dkw_devname[] is ignored, instead being
316 * filled in and returned to the caller.
317 */
318 int
319 dkwedge_add(struct dkwedge_info *dkw)
320 {
321 struct dkwedge_softc *sc, *lsc;
322 struct disk *pdk;
323 u_int unit;
324 int error;
325 dev_t pdev;
326
327 dkw->dkw_parent[sizeof(dkw->dkw_parent) - 1] = '\0';
328 pdk = disk_find(dkw->dkw_parent);
329 if (pdk == NULL)
330 return ENXIO;
331
332 error = dkwedge_compute_pdev(pdk->dk_name, &pdev, VBLK);
333 if (error)
334 return error;
335
336 if (dkw->dkw_offset < 0)
337 return EINVAL;
338
339 /*
340 * Check for an existing wedge at the same disk offset. Allow
341 * updating a wedge if the only change is the size, and the new
342 * size is larger than the old.
343 */
344 sc = NULL;
345 mutex_enter(&pdk->dk_openlock);
346 LIST_FOREACH(lsc, &pdk->dk_wedges, sc_plink) {
347 if (lsc->sc_offset != dkw->dkw_offset)
348 continue;
349 if (strcmp(lsc->sc_wname, dkw->dkw_wname) != 0)
350 break;
351 if (strcmp(lsc->sc_ptype, dkw->dkw_ptype) != 0)
352 break;
353 if (lsc->sc_size > dkw->dkw_size)
354 break;
355
356 sc = lsc;
357 sc->sc_size = dkw->dkw_size;
358 dk_set_geometry(sc, pdk);
359
360 break;
361 }
362 mutex_exit(&pdk->dk_openlock);
363
364 if (sc != NULL)
365 goto announce;
366
367 sc = malloc(sizeof(*sc), M_DKWEDGE, M_WAITOK|M_ZERO);
368 sc->sc_state = DKW_STATE_LARVAL;
369 sc->sc_parent = pdk;
370 sc->sc_pdev = pdev;
371 sc->sc_offset = dkw->dkw_offset;
372 sc->sc_size = dkw->dkw_size;
373
374 memcpy(sc->sc_wname, dkw->dkw_wname, sizeof(sc->sc_wname));
375 sc->sc_wname[sizeof(sc->sc_wname) - 1] = '\0';
376
377 memcpy(sc->sc_ptype, dkw->dkw_ptype, sizeof(sc->sc_ptype));
378 sc->sc_ptype[sizeof(sc->sc_ptype) - 1] = '\0';
379
380 bufq_alloc(&sc->sc_bufq, "fcfs", 0);
381
382 callout_init(&sc->sc_restart_ch, 0);
383 callout_setfunc(&sc->sc_restart_ch, dkrestart, sc);
384
385 mutex_init(&sc->sc_iolock, MUTEX_DEFAULT, IPL_BIO);
386 cv_init(&sc->sc_dkdrn, "dkdrn");
387
388 /*
389 * Wedge will be added; increment the wedge count for the parent.
390 * Only allow this to happen if RAW_PART is the only thing open.
391 */
392 mutex_enter(&pdk->dk_openlock);
393 if (pdk->dk_openmask & ~(1 << RAW_PART))
394 error = EBUSY;
395 else {
396 /* Check for wedge overlap. */
397 LIST_FOREACH(lsc, &pdk->dk_wedges, sc_plink) {
398 daddr_t lastblk = sc->sc_offset + sc->sc_size - 1;
399 daddr_t llastblk = lsc->sc_offset + lsc->sc_size - 1;
400
401 if (sc->sc_offset >= lsc->sc_offset &&
402 sc->sc_offset <= llastblk) {
403 /* Overlaps the tail of the existing wedge. */
404 break;
405 }
406 if (lastblk >= lsc->sc_offset &&
407 lastblk <= llastblk) {
408 /* Overlaps the head of the existing wedge. */
409 break;
410 }
411 }
412 if (lsc != NULL) {
413 if (sc->sc_offset == lsc->sc_offset &&
414 sc->sc_size == lsc->sc_size &&
415 strcmp(sc->sc_wname, lsc->sc_wname) == 0)
416 error = EEXIST;
417 else
418 error = EINVAL;
419 } else {
420 pdk->dk_nwedges++;
421 LIST_INSERT_HEAD(&pdk->dk_wedges, sc, sc_plink);
422 }
423 }
424 mutex_exit(&pdk->dk_openlock);
425 if (error) {
426 cv_destroy(&sc->sc_dkdrn);
427 mutex_destroy(&sc->sc_iolock);
428 bufq_free(sc->sc_bufq);
429 free(sc, M_DKWEDGE);
430 return error;
431 }
432
433 /* Fill in our cfdata for the pseudo-device glue. */
434 sc->sc_cfdata.cf_name = dk_cd.cd_name;
435 sc->sc_cfdata.cf_atname = dk_ca.ca_name;
436 /* sc->sc_cfdata.cf_unit set below */
437 sc->sc_cfdata.cf_fstate = FSTATE_STAR;
438
439 /* Insert the larval wedge into the array. */
440 rw_enter(&dkwedges_lock, RW_WRITER);
441 for (error = 0;;) {
442 struct dkwedge_softc **scpp;
443
444 /*
445 * Check for a duplicate wname while searching for
446 * a slot.
447 */
448 for (scpp = NULL, unit = 0; unit < ndkwedges; unit++) {
449 if (dkwedges[unit] == NULL) {
450 if (scpp == NULL) {
451 scpp = &dkwedges[unit];
452 sc->sc_cfdata.cf_unit = unit;
453 }
454 } else {
455 /* XXX Unicode. */
456 if (strcmp(dkwedges[unit]->sc_wname,
457 sc->sc_wname) == 0) {
458 error = EEXIST;
459 break;
460 }
461 }
462 }
463 if (error)
464 break;
465 KASSERT(unit == ndkwedges);
466 if (scpp == NULL) {
467 error = dkwedge_array_expand();
468 if (error)
469 break;
470 } else {
471 KASSERT(scpp == &dkwedges[sc->sc_cfdata.cf_unit]);
472 *scpp = sc;
473 break;
474 }
475 }
476 rw_exit(&dkwedges_lock);
477 if (error) {
478 mutex_enter(&pdk->dk_openlock);
479 pdk->dk_nwedges--;
480 LIST_REMOVE(sc, sc_plink);
481 mutex_exit(&pdk->dk_openlock);
482
483 cv_destroy(&sc->sc_dkdrn);
484 mutex_destroy(&sc->sc_iolock);
485 bufq_free(sc->sc_bufq);
486 free(sc, M_DKWEDGE);
487 return error;
488 }
489
490 /*
491 * Now that we know the unit #, attach a pseudo-device for
492 * this wedge instance. This will provide us with the
493 * device_t necessary for glue to other parts of the system.
494 *
495 * This should never fail, unless we're almost totally out of
496 * memory.
497 */
498 if ((sc->sc_dev = config_attach_pseudo(&sc->sc_cfdata)) == NULL) {
499 aprint_error("%s%u: unable to attach pseudo-device\n",
500 sc->sc_cfdata.cf_name, sc->sc_cfdata.cf_unit);
501
502 rw_enter(&dkwedges_lock, RW_WRITER);
503 dkwedges[sc->sc_cfdata.cf_unit] = NULL;
504 rw_exit(&dkwedges_lock);
505
506 mutex_enter(&pdk->dk_openlock);
507 pdk->dk_nwedges--;
508 LIST_REMOVE(sc, sc_plink);
509 mutex_exit(&pdk->dk_openlock);
510
511 cv_destroy(&sc->sc_dkdrn);
512 mutex_destroy(&sc->sc_iolock);
513 bufq_free(sc->sc_bufq);
514 free(sc, M_DKWEDGE);
515 return ENOMEM;
516 }
517
518 /*
519 * XXX Really ought to make the disk_attach() and the changing
520 * of state to RUNNING atomic.
521 */
522
523 disk_init(&sc->sc_dk, device_xname(sc->sc_dev), NULL);
524 dk_set_geometry(sc, pdk);
525 disk_attach(&sc->sc_dk);
526
527 /* Disk wedge is ready for use! */
528 sc->sc_state = DKW_STATE_RUNNING;
529
530 announce:
531 /* Announce our arrival. */
532 aprint_normal(
533 "%s at %s: \"%s\", %"PRIu64" blocks at %"PRId64", type: %s\n",
534 device_xname(sc->sc_dev), pdk->dk_name,
535 sc->sc_wname, /* XXX Unicode */
536 sc->sc_size, sc->sc_offset,
537 sc->sc_ptype[0] == '\0' ? "<unknown>" : sc->sc_ptype);
538
539 /* Return the devname to the caller. */
540 strlcpy(dkw->dkw_devname, device_xname(sc->sc_dev),
541 sizeof(dkw->dkw_devname));
542
543 return 0;
544 }
545
546 /*
547 * dkwedge_find:
548 *
549 * Lookup a disk wedge based on the provided information.
550 * NOTE: We look up the wedge based on the wedge devname,
551 * not wname.
552 *
553 * Return NULL if the wedge is not found, otherwise return
554 * the wedge's softc. Assign the wedge's unit number to unitp
555 * if unitp is not NULL.
556 */
557 static struct dkwedge_softc *
558 dkwedge_find(struct dkwedge_info *dkw, u_int *unitp)
559 {
560 struct dkwedge_softc *sc = NULL;
561 u_int unit;
562
563 /* Find our softc. */
564 dkw->dkw_devname[sizeof(dkw->dkw_devname) - 1] = '\0';
565 rw_enter(&dkwedges_lock, RW_READER);
566 for (unit = 0; unit < ndkwedges; unit++) {
567 if ((sc = dkwedges[unit]) != NULL &&
568 strcmp(device_xname(sc->sc_dev), dkw->dkw_devname) == 0 &&
569 strcmp(sc->sc_parent->dk_name, dkw->dkw_parent) == 0) {
570 break;
571 }
572 }
573 rw_exit(&dkwedges_lock);
574 if (unit == ndkwedges)
575 return NULL;
576
577 if (unitp != NULL)
578 *unitp = unit;
579
580 return sc;
581 }
582
583 /*
584 * dkwedge_del: [exported function]
585 *
586 * Delete a disk wedge based on the provided information.
587 * NOTE: We look up the wedge based on the wedge devname,
588 * not wname.
589 */
590 int
591 dkwedge_del(struct dkwedge_info *dkw)
592 {
593
594 return dkwedge_del1(dkw, 0);
595 }
596
597 int
598 dkwedge_del1(struct dkwedge_info *dkw, int flags)
599 {
600 struct dkwedge_softc *sc = NULL;
601
602 /* Find our softc. */
603 if ((sc = dkwedge_find(dkw, NULL)) == NULL)
604 return ESRCH;
605
606 return config_detach(sc->sc_dev, flags);
607 }
608
609 static int
610 dkwedge_cleanup_parent(struct dkwedge_softc *sc, int flags)
611 {
612 struct disk *dk = &sc->sc_dk;
613 int rc;
614
615 rc = 0;
616 mutex_enter(&dk->dk_openlock);
617 if (dk->dk_openmask == 0) {
618 /* nothing to do */
619 } else if ((flags & DETACH_FORCE) == 0) {
620 rc = EBUSY;
621 } else {
622 mutex_enter(&sc->sc_parent->dk_rawlock);
623 dklastclose(sc);
624 mutex_exit(&sc->sc_parent->dk_rawlock);
625 }
626 mutex_exit(&sc->sc_dk.dk_openlock);
627
628 return rc;
629 }
630
631 /*
632 * dkwedge_detach:
633 *
634 * Autoconfiguration detach function for pseudo-device glue.
635 */
636 static int
637 dkwedge_detach(device_t self, int flags)
638 {
639 struct dkwedge_softc *sc = NULL;
640 u_int unit;
641 int bmaj, cmaj, rc;
642
643 rw_enter(&dkwedges_lock, RW_WRITER);
644 for (unit = 0; unit < ndkwedges; unit++) {
645 if ((sc = dkwedges[unit]) != NULL && sc->sc_dev == self)
646 break;
647 }
648 if (unit == ndkwedges)
649 rc = ENXIO;
650 else if ((rc = dkwedge_cleanup_parent(sc, flags)) == 0) {
651 /* Mark the wedge as dying. */
652 sc->sc_state = DKW_STATE_DYING;
653 }
654 rw_exit(&dkwedges_lock);
655
656 if (rc != 0)
657 return rc;
658
659 pmf_device_deregister(self);
660
661 /* Locate the wedge major numbers. */
662 bmaj = bdevsw_lookup_major(&dk_bdevsw);
663 cmaj = cdevsw_lookup_major(&dk_cdevsw);
664
665 /* Kill any pending restart. */
666 callout_stop(&sc->sc_restart_ch);
667
668 /*
669 * dkstart() will kill any queued buffers now that the
670 * state of the wedge is not RUNNING. Once we've done
671 * that, wait for any other pending I/O to complete.
672 */
673 dkstart(sc);
674 dkwedge_wait_drain(sc);
675
676 /* Nuke the vnodes for any open instances. */
677 vdevgone(bmaj, unit, unit, VBLK);
678 vdevgone(cmaj, unit, unit, VCHR);
679
680 /* Clean up the parent. */
681 dkwedge_cleanup_parent(sc, flags | DETACH_FORCE);
682
683 /* Announce our departure. */
684 aprint_normal("%s at %s (%s) deleted\n", device_xname(sc->sc_dev),
685 sc->sc_parent->dk_name,
686 sc->sc_wname); /* XXX Unicode */
687
688 mutex_enter(&sc->sc_parent->dk_openlock);
689 sc->sc_parent->dk_nwedges--;
690 LIST_REMOVE(sc, sc_plink);
691 mutex_exit(&sc->sc_parent->dk_openlock);
692
693 /* Delete our buffer queue. */
694 bufq_free(sc->sc_bufq);
695
696 /* Detach from the disk list. */
697 disk_detach(&sc->sc_dk);
698 disk_destroy(&sc->sc_dk);
699
700 /* Poof. */
701 rw_enter(&dkwedges_lock, RW_WRITER);
702 dkwedges[unit] = NULL;
703 sc->sc_state = DKW_STATE_DEAD;
704 rw_exit(&dkwedges_lock);
705
706 mutex_destroy(&sc->sc_iolock);
707 cv_destroy(&sc->sc_dkdrn);
708
709 free(sc, M_DKWEDGE);
710
711 return 0;
712 }
713
714 /*
715 * dkwedge_delall: [exported function]
716 *
717 * Delete all of the wedges on the specified disk. Used when
718 * a disk is being detached.
719 */
720 void
721 dkwedge_delall(struct disk *pdk)
722 {
723
724 dkwedge_delall1(pdk, false);
725 }
726
727 static void
728 dkwedge_delall1(struct disk *pdk, bool idleonly)
729 {
730 struct dkwedge_info dkw;
731 struct dkwedge_softc *sc;
732 int flags;
733
734 flags = DETACH_QUIET;
735 if (!idleonly)
736 flags |= DETACH_FORCE;
737
738 for (;;) {
739 mutex_enter(&pdk->dk_openlock);
740 LIST_FOREACH(sc, &pdk->dk_wedges, sc_plink) {
741 if (!idleonly || sc->sc_dk.dk_openmask == 0)
742 break;
743 }
744 if (sc == NULL) {
745 KASSERT(idleonly || pdk->dk_nwedges == 0);
746 mutex_exit(&pdk->dk_openlock);
747 return;
748 }
749 strlcpy(dkw.dkw_parent, pdk->dk_name, sizeof(dkw.dkw_parent));
750 strlcpy(dkw.dkw_devname, device_xname(sc->sc_dev),
751 sizeof(dkw.dkw_devname));
752 mutex_exit(&pdk->dk_openlock);
753 (void) dkwedge_del1(&dkw, flags);
754 }
755 }
756
757 /*
758 * dkwedge_list: [exported function]
759 *
760 * List all of the wedges on a particular disk.
761 */
762 int
763 dkwedge_list(struct disk *pdk, struct dkwedge_list *dkwl, struct lwp *l)
764 {
765 struct uio uio;
766 struct iovec iov;
767 struct dkwedge_softc *sc;
768 struct dkwedge_info dkw;
769 int error = 0;
770
771 iov.iov_base = dkwl->dkwl_buf;
772 iov.iov_len = dkwl->dkwl_bufsize;
773
774 uio.uio_iov = &iov;
775 uio.uio_iovcnt = 1;
776 uio.uio_offset = 0;
777 uio.uio_resid = dkwl->dkwl_bufsize;
778 uio.uio_rw = UIO_READ;
779 KASSERT(l == curlwp);
780 uio.uio_vmspace = l->l_proc->p_vmspace;
781
782 dkwl->dkwl_ncopied = 0;
783
784 mutex_enter(&pdk->dk_openlock);
785 LIST_FOREACH(sc, &pdk->dk_wedges, sc_plink) {
786 if (uio.uio_resid < sizeof(dkw))
787 break;
788
789 if (sc->sc_state != DKW_STATE_RUNNING)
790 continue;
791
792 strlcpy(dkw.dkw_devname, device_xname(sc->sc_dev),
793 sizeof(dkw.dkw_devname));
794 memcpy(dkw.dkw_wname, sc->sc_wname, sizeof(dkw.dkw_wname));
795 dkw.dkw_wname[sizeof(dkw.dkw_wname) - 1] = '\0';
796 strlcpy(dkw.dkw_parent, sc->sc_parent->dk_name,
797 sizeof(dkw.dkw_parent));
798 dkw.dkw_offset = sc->sc_offset;
799 dkw.dkw_size = sc->sc_size;
800 strlcpy(dkw.dkw_ptype, sc->sc_ptype, sizeof(dkw.dkw_ptype));
801
802 error = uiomove(&dkw, sizeof(dkw), &uio);
803 if (error)
804 break;
805 dkwl->dkwl_ncopied++;
806 }
807 dkwl->dkwl_nwedges = pdk->dk_nwedges;
808 mutex_exit(&pdk->dk_openlock);
809
810 return error;
811 }
812
813 device_t
814 dkwedge_find_by_wname(const char *wname)
815 {
816 device_t dv = NULL;
817 struct dkwedge_softc *sc;
818 int i;
819
820 rw_enter(&dkwedges_lock, RW_WRITER);
821 for (i = 0; i < ndkwedges; i++) {
822 if ((sc = dkwedges[i]) == NULL)
823 continue;
824 if (strcmp(sc->sc_wname, wname) == 0) {
825 if (dv != NULL) {
826 printf(
827 "WARNING: double match for wedge name %s "
828 "(%s, %s)\n", wname, device_xname(dv),
829 device_xname(sc->sc_dev));
830 continue;
831 }
832 dv = sc->sc_dev;
833 }
834 }
835 rw_exit(&dkwedges_lock);
836 return dv;
837 }
838
839 device_t
840 dkwedge_find_by_parent(const char *name, size_t *i)
841 {
842
843 rw_enter(&dkwedges_lock, RW_WRITER);
844 for (; *i < (size_t)ndkwedges; (*i)++) {
845 struct dkwedge_softc *sc;
846 if ((sc = dkwedges[*i]) == NULL)
847 continue;
848 if (strcmp(sc->sc_parent->dk_name, name) != 0)
849 continue;
850 rw_exit(&dkwedges_lock);
851 return sc->sc_dev;
852 }
853 rw_exit(&dkwedges_lock);
854 return NULL;
855 }
856
857 void
858 dkwedge_print_wnames(void)
859 {
860 struct dkwedge_softc *sc;
861 int i;
862
863 rw_enter(&dkwedges_lock, RW_WRITER);
864 for (i = 0; i < ndkwedges; i++) {
865 if ((sc = dkwedges[i]) == NULL)
866 continue;
867 printf(" wedge:%s", sc->sc_wname);
868 }
869 rw_exit(&dkwedges_lock);
870 }
871
872 /*
873 * We need a dummy object to stuff into the dkwedge discovery method link
874 * set to ensure that there is always at least one object in the set.
875 */
876 static struct dkwedge_discovery_method dummy_discovery_method;
877 __link_set_add_bss(dkwedge_methods, dummy_discovery_method);
878
879 /*
880 * dkwedge_init:
881 *
882 * Initialize the disk wedge subsystem.
883 */
884 void
885 dkwedge_init(void)
886 {
887 __link_set_decl(dkwedge_methods, struct dkwedge_discovery_method);
888 struct dkwedge_discovery_method * const *ddmp;
889 struct dkwedge_discovery_method *lddm, *ddm;
890
891 rw_init(&dkwedges_lock);
892 rw_init(&dkwedge_discovery_methods_lock);
893
894 if (config_cfdriver_attach(&dk_cd) != 0)
895 panic("dkwedge: unable to attach cfdriver");
896 if (config_cfattach_attach(dk_cd.cd_name, &dk_ca) != 0)
897 panic("dkwedge: unable to attach cfattach");
898
899 rw_enter(&dkwedge_discovery_methods_lock, RW_WRITER);
900
901 LIST_INIT(&dkwedge_discovery_methods);
902
903 __link_set_foreach(ddmp, dkwedge_methods) {
904 ddm = *ddmp;
905 if (ddm == &dummy_discovery_method)
906 continue;
907 if (LIST_EMPTY(&dkwedge_discovery_methods)) {
908 LIST_INSERT_HEAD(&dkwedge_discovery_methods,
909 ddm, ddm_list);
910 continue;
911 }
912 LIST_FOREACH(lddm, &dkwedge_discovery_methods, ddm_list) {
913 if (ddm->ddm_priority == lddm->ddm_priority) {
914 aprint_error("dk-method-%s: method \"%s\" "
915 "already exists at priority %d\n",
916 ddm->ddm_name, lddm->ddm_name,
917 lddm->ddm_priority);
918 /* Not inserted. */
919 break;
920 }
921 if (ddm->ddm_priority < lddm->ddm_priority) {
922 /* Higher priority; insert before. */
923 LIST_INSERT_BEFORE(lddm, ddm, ddm_list);
924 break;
925 }
926 if (LIST_NEXT(lddm, ddm_list) == NULL) {
927 /* Last one; insert after. */
928 KASSERT(lddm->ddm_priority < ddm->ddm_priority);
929 LIST_INSERT_AFTER(lddm, ddm, ddm_list);
930 break;
931 }
932 }
933 }
934
935 rw_exit(&dkwedge_discovery_methods_lock);
936 }
937
938 #ifdef DKWEDGE_AUTODISCOVER
939 int dkwedge_autodiscover = 1;
940 #else
941 int dkwedge_autodiscover = 0;
942 #endif
943
944 /*
945 * dkwedge_discover: [exported function]
946 *
947 * Discover the wedges on a newly attached disk.
948 * Remove all unused wedges on the disk first.
949 */
950 void
951 dkwedge_discover(struct disk *pdk)
952 {
953 struct dkwedge_discovery_method *ddm;
954 struct vnode *vp;
955 int error;
956 dev_t pdev;
957
958 /*
959 * Require people playing with wedges to enable this explicitly.
960 */
961 if (dkwedge_autodiscover == 0)
962 return;
963
964 rw_enter(&dkwedge_discovery_methods_lock, RW_READER);
965
966 /*
967 * Use the character device for scanning, the block device
968 * is busy if there are already wedges attached.
969 */
970 error = dkwedge_compute_pdev(pdk->dk_name, &pdev, VCHR);
971 if (error) {
972 aprint_error("%s: unable to compute pdev, error = %d\n",
973 pdk->dk_name, error);
974 goto out;
975 }
976
977 error = cdevvp(pdev, &vp);
978 if (error) {
979 aprint_error("%s: unable to find vnode for pdev, error = %d\n",
980 pdk->dk_name, error);
981 goto out;
982 }
983
984 error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
985 if (error) {
986 aprint_error("%s: unable to lock vnode for pdev, error = %d\n",
987 pdk->dk_name, error);
988 vrele(vp);
989 goto out;
990 }
991
992 error = VOP_OPEN(vp, FREAD | FSILENT, NOCRED);
993 if (error) {
994 if (error != ENXIO)
995 aprint_error("%s: unable to open device, error = %d\n",
996 pdk->dk_name, error);
997 vput(vp);
998 goto out;
999 }
1000 VOP_UNLOCK(vp);
1001
1002 /*
1003 * Remove unused wedges
1004 */
1005 dkwedge_delall1(pdk, true);
1006
1007 /*
1008 * For each supported partition map type, look to see if
1009 * this map type exists. If so, parse it and add the
1010 * corresponding wedges.
1011 */
1012 LIST_FOREACH(ddm, &dkwedge_discovery_methods, ddm_list) {
1013 error = (*ddm->ddm_discover)(pdk, vp);
1014 if (error == 0) {
1015 /* Successfully created wedges; we're done. */
1016 break;
1017 }
1018 }
1019
1020 error = vn_close(vp, FREAD, NOCRED);
1021 if (error) {
1022 aprint_error("%s: unable to close device, error = %d\n",
1023 pdk->dk_name, error);
1024 /* We'll just assume the vnode has been cleaned up. */
1025 }
1026
1027 out:
1028 rw_exit(&dkwedge_discovery_methods_lock);
1029 }
1030
1031 /*
1032 * dkwedge_read:
1033 *
1034 * Read some data from the specified disk, used for
1035 * partition discovery.
1036 */
1037 int
1038 dkwedge_read(struct disk *pdk, struct vnode *vp, daddr_t blkno,
1039 void *tbuf, size_t len)
1040 {
1041 buf_t *bp;
1042 int error;
1043 bool isopen;
1044 dev_t bdev;
1045 struct vnode *bdvp;
1046
1047 /*
1048 * The kernel cannot read from a character device vnode
1049 * as physio() only handles user memory.
1050 *
1051 * If the block device has already been opened by a wedge
1052 * use that vnode and temporarily bump the open counter.
1053 *
1054 * Otherwise try to open the block device.
1055 */
1056
1057 bdev = devsw_chr2blk(vp->v_rdev);
1058
1059 mutex_enter(&pdk->dk_rawlock);
1060 if (pdk->dk_rawopens != 0) {
1061 KASSERT(pdk->dk_rawvp != NULL);
1062 isopen = true;
1063 ++pdk->dk_rawopens;
1064 bdvp = pdk->dk_rawvp;
1065 error = 0;
1066 } else {
1067 isopen = false;
1068 error = dk_open_parent(bdev, FREAD, &bdvp);
1069 }
1070 mutex_exit(&pdk->dk_rawlock);
1071
1072 if (error)
1073 return error;
1074
1075 bp = getiobuf(bdvp, true);
1076 bp->b_flags = B_READ;
1077 bp->b_cflags = BC_BUSY;
1078 bp->b_dev = bdev;
1079 bp->b_data = tbuf;
1080 bp->b_bufsize = bp->b_bcount = len;
1081 bp->b_blkno = blkno;
1082 bp->b_cylinder = 0;
1083 bp->b_error = 0;
1084
1085 VOP_STRATEGY(bdvp, bp);
1086 error = biowait(bp);
1087 putiobuf(bp);
1088
1089 mutex_enter(&pdk->dk_rawlock);
1090 if (isopen) {
1091 --pdk->dk_rawopens;
1092 } else {
1093 dk_close_parent(bdvp, FREAD);
1094 }
1095 mutex_exit(&pdk->dk_rawlock);
1096
1097 return error;
1098 }
1099
1100 /*
1101 * dkwedge_lookup:
1102 *
1103 * Look up a dkwedge_softc based on the provided dev_t.
1104 */
1105 static struct dkwedge_softc *
1106 dkwedge_lookup(dev_t dev)
1107 {
1108 int unit = minor(dev);
1109
1110 if (unit >= ndkwedges)
1111 return NULL;
1112
1113 KASSERT(dkwedges != NULL);
1114
1115 return dkwedges[unit];
1116 }
1117
1118 static int
1119 dk_open_parent(dev_t dev, int mode, struct vnode **vpp)
1120 {
1121 struct vnode *vp;
1122 int error;
1123
1124 error = bdevvp(dev, &vp);
1125 if (error)
1126 return error;
1127
1128 error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1129 if (error) {
1130 vrele(vp);
1131 return error;
1132 }
1133 error = VOP_OPEN(vp, mode, NOCRED);
1134 if (error) {
1135 vput(vp);
1136 return error;
1137 }
1138
1139 /* VOP_OPEN() doesn't do this for us. */
1140 if (mode & FWRITE) {
1141 mutex_enter(vp->v_interlock);
1142 vp->v_writecount++;
1143 mutex_exit(vp->v_interlock);
1144 }
1145
1146 VOP_UNLOCK(vp);
1147
1148 *vpp = vp;
1149
1150 return 0;
1151 }
1152
1153 static int
1154 dk_close_parent(struct vnode *vp, int mode)
1155 {
1156 int error;
1157
1158 error = vn_close(vp, mode, NOCRED);
1159 return error;
1160 }
1161
1162 /*
1163 * dkopen: [devsw entry point]
1164 *
1165 * Open a wedge.
1166 */
1167 static int
1168 dkopen(dev_t dev, int flags, int fmt, struct lwp *l)
1169 {
1170 struct dkwedge_softc *sc = dkwedge_lookup(dev);
1171 int error = 0;
1172
1173 if (sc == NULL)
1174 return ENXIO;
1175 if (sc->sc_state != DKW_STATE_RUNNING)
1176 return ENXIO;
1177
1178 /*
1179 * We go through a complicated little dance to only open the parent
1180 * vnode once per wedge, no matter how many times the wedge is
1181 * opened. The reason? We see one dkopen() per open call, but
1182 * only dkclose() on the last close.
1183 */
1184 mutex_enter(&sc->sc_dk.dk_openlock);
1185 mutex_enter(&sc->sc_parent->dk_rawlock);
1186 if (sc->sc_dk.dk_openmask == 0) {
1187 error = dkfirstopen(sc, flags);
1188 if (error)
1189 goto popen_fail;
1190 }
1191 KASSERT(sc->sc_mode != 0);
1192 if (flags & ~sc->sc_mode & FWRITE) {
1193 error = EROFS;
1194 goto popen_fail;
1195 }
1196 if (fmt == S_IFCHR)
1197 sc->sc_dk.dk_copenmask |= 1;
1198 else
1199 sc->sc_dk.dk_bopenmask |= 1;
1200 sc->sc_dk.dk_openmask =
1201 sc->sc_dk.dk_copenmask | sc->sc_dk.dk_bopenmask;
1202
1203 popen_fail:
1204 mutex_exit(&sc->sc_parent->dk_rawlock);
1205 mutex_exit(&sc->sc_dk.dk_openlock);
1206 return error;
1207 }
1208
1209 static int
1210 dkfirstopen(struct dkwedge_softc *sc, int flags)
1211 {
1212 struct dkwedge_softc *nsc;
1213 struct vnode *vp;
1214 int mode;
1215 int error;
1216
1217 KASSERT(mutex_owned(&sc->sc_dk.dk_openlock));
1218 KASSERT(mutex_owned(&sc->sc_parent->dk_rawlock));
1219
1220 if (sc->sc_parent->dk_rawopens == 0) {
1221 KASSERT(sc->sc_parent->dk_rawvp == NULL);
1222 /*
1223 * Try open read-write. If this fails for EROFS
1224 * and wedge is read-only, retry to open read-only.
1225 */
1226 mode = FREAD | FWRITE;
1227 error = dk_open_parent(sc->sc_pdev, mode, &vp);
1228 if (error == EROFS && (flags & FWRITE) == 0) {
1229 mode &= ~FWRITE;
1230 error = dk_open_parent(sc->sc_pdev, mode, &vp);
1231 }
1232 if (error)
1233 return error;
1234 sc->sc_parent->dk_rawvp = vp;
1235 } else {
1236 /*
1237 * Retrieve mode from an already opened wedge.
1238 *
1239 * At this point, dk_rawopens is bounded by the number
1240 * of dkwedge devices in the system, which is limited
1241 * by autoconf device numbering to INT_MAX. Since
1242 * dk_rawopens is unsigned, this can't overflow.
1243 */
1244 KASSERT(sc->sc_parent->dk_rawopens < UINT_MAX);
1245 mode = 0;
1246 LIST_FOREACH(nsc, &sc->sc_parent->dk_wedges, sc_plink) {
1247 if (nsc == sc || nsc->sc_dk.dk_openmask == 0)
1248 continue;
1249 mode = nsc->sc_mode;
1250 break;
1251 }
1252 }
1253 sc->sc_mode = mode;
1254 sc->sc_parent->dk_rawopens++;
1255
1256 return 0;
1257 }
1258
1259 static void
1260 dklastclose(struct dkwedge_softc *sc)
1261 {
1262
1263 KASSERT(mutex_owned(&sc->sc_dk.dk_openlock));
1264 KASSERT(mutex_owned(&sc->sc_parent->dk_rawlock));
1265 KASSERT(sc->sc_parent->dk_rawopens > 0);
1266 KASSERT(sc->sc_parent->dk_rawvp != NULL);
1267
1268 if (--sc->sc_parent->dk_rawopens == 0) {
1269 struct vnode *const vp = sc->sc_parent->dk_rawvp;
1270 const int mode = sc->sc_mode;
1271
1272 sc->sc_parent->dk_rawvp = NULL;
1273 sc->sc_mode = 0;
1274
1275 dk_close_parent(vp, mode);
1276 }
1277 }
1278
1279 /*
1280 * dkclose: [devsw entry point]
1281 *
1282 * Close a wedge.
1283 */
1284 static int
1285 dkclose(dev_t dev, int flags, int fmt, struct lwp *l)
1286 {
1287 struct dkwedge_softc *sc = dkwedge_lookup(dev);
1288
1289 if (sc == NULL)
1290 return ENXIO;
1291 if (sc->sc_state != DKW_STATE_RUNNING)
1292 return ENXIO;
1293
1294 mutex_enter(&sc->sc_dk.dk_openlock);
1295 mutex_enter(&sc->sc_parent->dk_rawlock);
1296
1297 KASSERT(sc->sc_dk.dk_openmask != 0);
1298
1299 if (fmt == S_IFCHR)
1300 sc->sc_dk.dk_copenmask &= ~1;
1301 else
1302 sc->sc_dk.dk_bopenmask &= ~1;
1303 sc->sc_dk.dk_openmask =
1304 sc->sc_dk.dk_copenmask | sc->sc_dk.dk_bopenmask;
1305
1306 if (sc->sc_dk.dk_openmask == 0) {
1307 dklastclose(sc);
1308 }
1309
1310 mutex_exit(&sc->sc_parent->dk_rawlock);
1311 mutex_exit(&sc->sc_dk.dk_openlock);
1312
1313 return 0;
1314 }
1315
1316 /*
1317 * dkstrategy: [devsw entry point]
1318 *
1319 * Perform I/O based on the wedge I/O strategy.
1320 */
1321 static void
1322 dkstrategy(struct buf *bp)
1323 {
1324 struct dkwedge_softc *sc = dkwedge_lookup(bp->b_dev);
1325 uint64_t p_size, p_offset;
1326
1327 if (sc == NULL) {
1328 bp->b_error = ENXIO;
1329 goto done;
1330 }
1331
1332 if (sc->sc_state != DKW_STATE_RUNNING ||
1333 sc->sc_parent->dk_rawvp == NULL) {
1334 bp->b_error = ENXIO;
1335 goto done;
1336 }
1337
1338 /* If it's an empty transfer, wake up the top half now. */
1339 if (bp->b_bcount == 0)
1340 goto done;
1341
1342 p_offset = sc->sc_offset << sc->sc_parent->dk_blkshift;
1343 p_size = sc->sc_size << sc->sc_parent->dk_blkshift;
1344
1345 /* Make sure it's in-range. */
1346 if (bounds_check_with_mediasize(bp, DEV_BSIZE, p_size) <= 0)
1347 goto done;
1348
1349 /* Translate it to the parent's raw LBA. */
1350 bp->b_rawblkno = bp->b_blkno + p_offset;
1351
1352 /* Place it in the queue and start I/O on the unit. */
1353 mutex_enter(&sc->sc_iolock);
1354 sc->sc_iopend++;
1355 disk_wait(&sc->sc_dk);
1356 bufq_put(sc->sc_bufq, bp);
1357 mutex_exit(&sc->sc_iolock);
1358
1359 dkstart(sc);
1360 return;
1361
1362 done:
1363 bp->b_resid = bp->b_bcount;
1364 biodone(bp);
1365 }
1366
1367 /*
1368 * dkstart:
1369 *
1370 * Start I/O that has been enqueued on the wedge.
1371 */
1372 static void
1373 dkstart(struct dkwedge_softc *sc)
1374 {
1375 struct vnode *vp;
1376 struct buf *bp, *nbp;
1377
1378 mutex_enter(&sc->sc_iolock);
1379
1380 /* Do as much work as has been enqueued. */
1381 while ((bp = bufq_peek(sc->sc_bufq)) != NULL) {
1382 if (sc->sc_state != DKW_STATE_RUNNING) {
1383 (void) bufq_get(sc->sc_bufq);
1384 if (--sc->sc_iopend == 0)
1385 cv_broadcast(&sc->sc_dkdrn);
1386 mutex_exit(&sc->sc_iolock);
1387 bp->b_error = ENXIO;
1388 bp->b_resid = bp->b_bcount;
1389 biodone(bp);
1390 mutex_enter(&sc->sc_iolock);
1391 continue;
1392 }
1393
1394 /* fetch an I/O buf with sc_iolock dropped */
1395 mutex_exit(&sc->sc_iolock);
1396 nbp = getiobuf(sc->sc_parent->dk_rawvp, false);
1397 mutex_enter(&sc->sc_iolock);
1398 if (nbp == NULL) {
1399 /*
1400 * No resources to run this request; leave the
1401 * buffer queued up, and schedule a timer to
1402 * restart the queue in 1/2 a second.
1403 */
1404 callout_schedule(&sc->sc_restart_ch, hz/2);
1405 break;
1406 }
1407
1408 /*
1409 * fetch buf, this can fail if another thread
1410 * has already processed the queue, it can also
1411 * return a completely different buf.
1412 */
1413 bp = bufq_get(sc->sc_bufq);
1414 if (bp == NULL) {
1415 mutex_exit(&sc->sc_iolock);
1416 putiobuf(nbp);
1417 mutex_enter(&sc->sc_iolock);
1418 continue;
1419 }
1420
1421 /* Instrumentation. */
1422 disk_busy(&sc->sc_dk);
1423
1424 /* release lock for VOP_STRATEGY */
1425 mutex_exit(&sc->sc_iolock);
1426
1427 nbp->b_data = bp->b_data;
1428 nbp->b_flags = bp->b_flags;
1429 nbp->b_oflags = bp->b_oflags;
1430 nbp->b_cflags = bp->b_cflags;
1431 nbp->b_iodone = dkiodone;
1432 nbp->b_proc = bp->b_proc;
1433 nbp->b_blkno = bp->b_rawblkno;
1434 nbp->b_dev = sc->sc_parent->dk_rawvp->v_rdev;
1435 nbp->b_bcount = bp->b_bcount;
1436 nbp->b_private = bp;
1437 BIO_COPYPRIO(nbp, bp);
1438
1439 vp = nbp->b_vp;
1440 if ((nbp->b_flags & B_READ) == 0) {
1441 mutex_enter(vp->v_interlock);
1442 vp->v_numoutput++;
1443 mutex_exit(vp->v_interlock);
1444 }
1445 VOP_STRATEGY(vp, nbp);
1446
1447 mutex_enter(&sc->sc_iolock);
1448 }
1449
1450 mutex_exit(&sc->sc_iolock);
1451 }
1452
1453 /*
1454 * dkiodone:
1455 *
1456 * I/O to a wedge has completed; alert the top half.
1457 */
1458 static void
1459 dkiodone(struct buf *bp)
1460 {
1461 struct buf *obp = bp->b_private;
1462 struct dkwedge_softc *sc = dkwedge_lookup(obp->b_dev);
1463
1464 if (bp->b_error != 0)
1465 obp->b_error = bp->b_error;
1466 obp->b_resid = bp->b_resid;
1467 putiobuf(bp);
1468
1469 mutex_enter(&sc->sc_iolock);
1470 if (--sc->sc_iopend == 0)
1471 cv_broadcast(&sc->sc_dkdrn);
1472
1473 disk_unbusy(&sc->sc_dk, obp->b_bcount - obp->b_resid,
1474 obp->b_flags & B_READ);
1475 mutex_exit(&sc->sc_iolock);
1476
1477 biodone(obp);
1478
1479 /* Kick the queue in case there is more work we can do. */
1480 dkstart(sc);
1481 }
1482
1483 /*
1484 * dkrestart:
1485 *
1486 * Restart the work queue after it was stalled due to
1487 * a resource shortage. Invoked via a callout.
1488 */
1489 static void
1490 dkrestart(void *v)
1491 {
1492 struct dkwedge_softc *sc = v;
1493
1494 dkstart(sc);
1495 }
1496
1497 /*
1498 * dkminphys:
1499 *
1500 * Call parent's minphys function.
1501 */
1502 static void
1503 dkminphys(struct buf *bp)
1504 {
1505 struct dkwedge_softc *sc = dkwedge_lookup(bp->b_dev);
1506 dev_t dev;
1507
1508 dev = bp->b_dev;
1509 bp->b_dev = sc->sc_pdev;
1510 if (sc->sc_parent->dk_driver && sc->sc_parent->dk_driver->d_minphys)
1511 (*sc->sc_parent->dk_driver->d_minphys)(bp);
1512 else
1513 minphys(bp);
1514 bp->b_dev = dev;
1515 }
1516
1517 /*
1518 * dkread: [devsw entry point]
1519 *
1520 * Read from a wedge.
1521 */
1522 static int
1523 dkread(dev_t dev, struct uio *uio, int flags)
1524 {
1525 struct dkwedge_softc *sc = dkwedge_lookup(dev);
1526
1527 if (sc == NULL)
1528 return ENXIO;
1529 if (sc->sc_state != DKW_STATE_RUNNING)
1530 return ENXIO;
1531
1532 return physio(dkstrategy, NULL, dev, B_READ, dkminphys, uio);
1533 }
1534
1535 /*
1536 * dkwrite: [devsw entry point]
1537 *
1538 * Write to a wedge.
1539 */
1540 static int
1541 dkwrite(dev_t dev, struct uio *uio, int flags)
1542 {
1543 struct dkwedge_softc *sc = dkwedge_lookup(dev);
1544
1545 if (sc == NULL)
1546 return ENXIO;
1547 if (sc->sc_state != DKW_STATE_RUNNING)
1548 return ENXIO;
1549
1550 return physio(dkstrategy, NULL, dev, B_WRITE, dkminphys, uio);
1551 }
1552
1553 /*
1554 * dkioctl: [devsw entry point]
1555 *
1556 * Perform an ioctl request on a wedge.
1557 */
1558 static int
1559 dkioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
1560 {
1561 struct dkwedge_softc *sc = dkwedge_lookup(dev);
1562 int error = 0;
1563
1564 if (sc == NULL)
1565 return ENXIO;
1566 if (sc->sc_state != DKW_STATE_RUNNING)
1567 return ENXIO;
1568 if (sc->sc_parent->dk_rawvp == NULL)
1569 return ENXIO;
1570
1571 /*
1572 * We pass NODEV instead of our device to indicate we don't
1573 * want to handle disklabel ioctls
1574 */
1575 error = disk_ioctl(&sc->sc_dk, NODEV, cmd, data, flag, l);
1576 if (error != EPASSTHROUGH)
1577 return error;
1578
1579 error = 0;
1580
1581 switch (cmd) {
1582 case DIOCGSTRATEGY:
1583 case DIOCGCACHE:
1584 case DIOCCACHESYNC:
1585 error = VOP_IOCTL(sc->sc_parent->dk_rawvp, cmd, data, flag,
1586 l != NULL ? l->l_cred : NOCRED);
1587 break;
1588 case DIOCGWEDGEINFO: {
1589 struct dkwedge_info *dkw = data;
1590
1591 strlcpy(dkw->dkw_devname, device_xname(sc->sc_dev),
1592 sizeof(dkw->dkw_devname));
1593 memcpy(dkw->dkw_wname, sc->sc_wname, sizeof(dkw->dkw_wname));
1594 dkw->dkw_wname[sizeof(dkw->dkw_wname) - 1] = '\0';
1595 strlcpy(dkw->dkw_parent, sc->sc_parent->dk_name,
1596 sizeof(dkw->dkw_parent));
1597 dkw->dkw_offset = sc->sc_offset;
1598 dkw->dkw_size = sc->sc_size;
1599 strlcpy(dkw->dkw_ptype, sc->sc_ptype, sizeof(dkw->dkw_ptype));
1600
1601 break;
1602 }
1603 case DIOCGSECTORALIGN: {
1604 struct disk_sectoralign *dsa = data;
1605 uint32_t r;
1606
1607 error = VOP_IOCTL(sc->sc_parent->dk_rawvp, cmd, dsa, flag,
1608 l != NULL ? l->l_cred : NOCRED);
1609 if (error)
1610 break;
1611
1612 r = sc->sc_offset % dsa->dsa_alignment;
1613 if (r < dsa->dsa_firstaligned)
1614 dsa->dsa_firstaligned = dsa->dsa_firstaligned - r;
1615 else
1616 dsa->dsa_firstaligned = (dsa->dsa_firstaligned +
1617 dsa->dsa_alignment) - r;
1618 break;
1619 }
1620 default:
1621 error = ENOTTY;
1622 }
1623
1624 return error;
1625 }
1626
1627 /*
1628 * dkdiscard: [devsw entry point]
1629 *
1630 * Perform a discard-range request on a wedge.
1631 */
1632 static int
1633 dkdiscard(dev_t dev, off_t pos, off_t len)
1634 {
1635 struct dkwedge_softc *sc = dkwedge_lookup(dev);
1636 unsigned shift;
1637 off_t offset, maxlen;
1638 int error;
1639
1640 if (sc == NULL)
1641 return ENXIO;
1642 if (sc->sc_state != DKW_STATE_RUNNING)
1643 return ENXIO;
1644 if (sc->sc_parent->dk_rawvp == NULL)
1645 return ENXIO;
1646
1647 shift = (sc->sc_parent->dk_blkshift + DEV_BSHIFT);
1648 KASSERT(__type_fit(off_t, sc->sc_size));
1649 KASSERT(__type_fit(off_t, sc->sc_offset));
1650 KASSERT(0 <= sc->sc_offset);
1651 KASSERT(sc->sc_size <= (__type_max(off_t) >> shift));
1652 KASSERT(sc->sc_offset <= ((__type_max(off_t) >> shift) - sc->sc_size));
1653 offset = ((off_t)sc->sc_offset << shift);
1654 maxlen = ((off_t)sc->sc_size << shift);
1655
1656 if (len > maxlen)
1657 return EINVAL;
1658 if (pos > (maxlen - len))
1659 return EINVAL;
1660
1661 pos += offset;
1662
1663 vn_lock(sc->sc_parent->dk_rawvp, LK_EXCLUSIVE | LK_RETRY);
1664 error = VOP_FDISCARD(sc->sc_parent->dk_rawvp, pos, len);
1665 VOP_UNLOCK(sc->sc_parent->dk_rawvp);
1666
1667 return error;
1668 }
1669
1670 /*
1671 * dksize: [devsw entry point]
1672 *
1673 * Query the size of a wedge for the purpose of performing a dump
1674 * or for swapping to.
1675 */
1676 static int
1677 dksize(dev_t dev)
1678 {
1679 struct dkwedge_softc *sc = dkwedge_lookup(dev);
1680 uint64_t p_size;
1681 int rv = -1;
1682
1683 if (sc == NULL)
1684 return -1;
1685 if (sc->sc_state != DKW_STATE_RUNNING)
1686 return -1;
1687
1688 mutex_enter(&sc->sc_dk.dk_openlock);
1689 mutex_enter(&sc->sc_parent->dk_rawlock);
1690
1691 /* Our content type is static, no need to open the device. */
1692
1693 p_size = sc->sc_size << sc->sc_parent->dk_blkshift;
1694 if (strcmp(sc->sc_ptype, DKW_PTYPE_SWAP) == 0) {
1695 /* Saturate if we are larger than INT_MAX. */
1696 if (p_size > INT_MAX)
1697 rv = INT_MAX;
1698 else
1699 rv = (int)p_size;
1700 }
1701
1702 mutex_exit(&sc->sc_parent->dk_rawlock);
1703 mutex_exit(&sc->sc_dk.dk_openlock);
1704
1705 return rv;
1706 }
1707
1708 /*
1709 * dkdump: [devsw entry point]
1710 *
1711 * Perform a crash dump to a wedge.
1712 */
1713 static int
1714 dkdump(dev_t dev, daddr_t blkno, void *va, size_t size)
1715 {
1716 struct dkwedge_softc *sc = dkwedge_lookup(dev);
1717 const struct bdevsw *bdev;
1718 uint64_t p_size, p_offset;
1719 int rv = 0;
1720
1721 if (sc == NULL)
1722 return ENXIO;
1723 if (sc->sc_state != DKW_STATE_RUNNING)
1724 return ENXIO;
1725
1726 mutex_enter(&sc->sc_dk.dk_openlock);
1727 mutex_enter(&sc->sc_parent->dk_rawlock);
1728
1729 /* Our content type is static, no need to open the device. */
1730
1731 if (strcmp(sc->sc_ptype, DKW_PTYPE_SWAP) != 0 &&
1732 strcmp(sc->sc_ptype, DKW_PTYPE_RAID) != 0 &&
1733 strcmp(sc->sc_ptype, DKW_PTYPE_CGD) != 0) {
1734 rv = ENXIO;
1735 goto out;
1736 }
1737 if (size % DEV_BSIZE != 0) {
1738 rv = EINVAL;
1739 goto out;
1740 }
1741
1742 p_offset = sc->sc_offset << sc->sc_parent->dk_blkshift;
1743 p_size = sc->sc_size << sc->sc_parent->dk_blkshift;
1744
1745 if (blkno < 0 || blkno + size/DEV_BSIZE > p_size) {
1746 printf("%s: blkno (%" PRIu64 ") + size / DEV_BSIZE (%zu) > "
1747 "p_size (%" PRIu64 ")\n", __func__, blkno,
1748 size/DEV_BSIZE, p_size);
1749 rv = EINVAL;
1750 goto out;
1751 }
1752
1753 bdev = bdevsw_lookup(sc->sc_pdev);
1754 rv = (*bdev->d_dump)(sc->sc_pdev, blkno + p_offset, va, size);
1755
1756 out:
1757 mutex_exit(&sc->sc_parent->dk_rawlock);
1758 mutex_exit(&sc->sc_dk.dk_openlock);
1759
1760 return rv;
1761 }
1762
1763 /*
1764 * config glue
1765 */
1766
1767 /*
1768 * dkwedge_find_partition
1769 *
1770 * Find wedge corresponding to the specified parent name
1771 * and offset/length.
1772 */
1773 device_t
1774 dkwedge_find_partition(device_t parent, daddr_t startblk, uint64_t nblks)
1775 {
1776 struct dkwedge_softc *sc;
1777 int i;
1778 device_t wedge = NULL;
1779
1780 rw_enter(&dkwedges_lock, RW_READER);
1781 for (i = 0; i < ndkwedges; i++) {
1782 if ((sc = dkwedges[i]) == NULL)
1783 continue;
1784 if (strcmp(sc->sc_parent->dk_name, device_xname(parent)) == 0 &&
1785 sc->sc_offset == startblk &&
1786 sc->sc_size == nblks) {
1787 if (wedge) {
1788 printf("WARNING: double match for boot wedge "
1789 "(%s, %s)\n",
1790 device_xname(wedge),
1791 device_xname(sc->sc_dev));
1792 continue;
1793 }
1794 wedge = sc->sc_dev;
1795 }
1796 }
1797 rw_exit(&dkwedges_lock);
1798
1799 return wedge;
1800 }
1801
1802 const char *
1803 dkwedge_get_parent_name(dev_t dev)
1804 {
1805 /* XXX: perhaps do this in lookup? */
1806 int bmaj = bdevsw_lookup_major(&dk_bdevsw);
1807 int cmaj = cdevsw_lookup_major(&dk_cdevsw);
1808
1809 if (major(dev) != bmaj && major(dev) != cmaj)
1810 return NULL;
1811 struct dkwedge_softc *sc = dkwedge_lookup(dev);
1812 if (sc == NULL)
1813 return NULL;
1814 return sc->sc_parent->dk_name;
1815 }
1816