dk.c revision 1.173 1 /* $NetBSD: dk.c,v 1.173 2025/04/13 14:01:00 jakllsch Exp $ */
2
3 /*-
4 * Copyright (c) 2004, 2005, 2006, 2007 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 #include <sys/cdefs.h>
33 __KERNEL_RCSID(0, "$NetBSD: dk.c,v 1.173 2025/04/13 14:01:00 jakllsch Exp $");
34
35 #ifdef _KERNEL_OPT
36 #include "opt_dkwedge.h"
37 #endif
38
39 #include <sys/param.h>
40 #include <sys/types.h>
41
42 #include <sys/buf.h>
43 #include <sys/bufq.h>
44 #include <sys/callout.h>
45 #include <sys/conf.h>
46 #include <sys/device.h>
47 #include <sys/disk.h>
48 #include <sys/disklabel.h>
49 #include <sys/errno.h>
50 #include <sys/fcntl.h>
51 #include <sys/ioctl.h>
52 #include <sys/kauth.h>
53 #include <sys/kernel.h>
54 #include <sys/malloc.h>
55 #include <sys/pool.h>
56 #include <sys/proc.h>
57 #include <sys/rwlock.h>
58 #include <sys/stat.h>
59 #include <sys/systm.h>
60 #include <sys/vnode.h>
61
62 #include <miscfs/specfs/specdev.h>
63
64 MALLOC_DEFINE(M_DKWEDGE, "dkwedge", "Disk wedge structures");
65
66 typedef enum {
67 DKW_STATE_LARVAL = 0,
68 DKW_STATE_RUNNING = 1,
69 DKW_STATE_DYING = 2,
70 DKW_STATE_DEAD = 666
71 } dkwedge_state_t;
72
73 /*
74 * Lock order:
75 *
76 * sc->sc_dk.dk_openlock
77 * => sc->sc_parent->dk_rawlock
78 * => sc->sc_parent->dk_openlock
79 * => dkwedges_lock
80 * => sc->sc_sizelock
81 *
82 * Locking notes:
83 *
84 * W dkwedges_lock
85 * D device reference
86 * O sc->sc_dk.dk_openlock
87 * P sc->sc_parent->dk_openlock
88 * R sc->sc_parent->dk_rawlock
89 * S sc->sc_sizelock
90 * I sc->sc_iolock
91 * $ stable after initialization
92 * 1 used only by a single thread
93 *
94 * x&y means both x and y must be held to write (with a write lock if
95 * one is rwlock), and either x or y must be held to read.
96 */
97
98 struct dkwedge_softc {
99 device_t sc_dev; /* P&W: pointer to our pseudo-device */
100 /* sc_dev is also stable while device is referenced */
101 struct cfdata sc_cfdata; /* 1: our cfdata structure */
102 uint8_t sc_wname[128]; /* $: wedge name (Unicode, UTF-8) */
103
104 dkwedge_state_t sc_state; /* state this wedge is in */
105 /* stable while device is referenced */
106 /* used only in assertions when stable, and in dump in ddb */
107
108 struct disk *sc_parent; /* $: parent disk */
109 /* P: sc_parent->dk_openmask */
110 /* P: sc_parent->dk_nwedges */
111 /* P: sc_parent->dk_wedges */
112 /* R: sc_parent->dk_rawopens */
113 /* R: sc_parent->dk_rawvp (also stable while wedge is open) */
114 daddr_t sc_offset; /* $: LBA offset of wedge in parent */
115 krwlock_t sc_sizelock;
116 uint64_t sc_size; /* S: size of wedge in blocks */
117 char sc_ptype[32]; /* $: partition type */
118 dev_t sc_pdev; /* $: cached parent's dev_t */
119 /* P: link on parent's wedge list */
120 LIST_ENTRY(dkwedge_softc) sc_plink;
121
122 struct disk sc_dk; /* our own disk structure */
123 /* O&R: sc_dk.dk_bopenmask */
124 /* O&R: sc_dk.dk_copenmask */
125 /* O&R: sc_dk.dk_openmask */
126 struct bufq_state *sc_bufq; /* $: buffer queue */
127 struct callout sc_restart_ch; /* I: callout to restart I/O */
128
129 kmutex_t sc_iolock;
130 bool sc_iostop; /* I: don't schedule restart */
131 int sc_mode; /* O&R: parent open mode */
132 };
133
134 static int dkwedge_match(device_t, cfdata_t, void *);
135 static void dkwedge_attach(device_t, device_t, void *);
136 static int dkwedge_detach(device_t, int);
137
138 static void dk_set_geometry(struct dkwedge_softc *, struct disk *);
139
140 static void dkstart(struct dkwedge_softc *);
141 static void dkiodone(struct buf *);
142 static void dkrestart(void *);
143 static void dkminphys(struct buf *);
144
145 static int dkfirstopen(struct dkwedge_softc *, int);
146 static void dklastclose(struct dkwedge_softc *);
147 static int dkwedge_detach(device_t, int);
148 static void dkwedge_delall1(struct disk *, bool);
149 static int dkwedge_del1(struct dkwedge_info *, int);
150 static int dk_open_parent(dev_t, int, struct vnode **);
151 static int dk_close_parent(struct vnode *, int);
152
153 static dev_type_open(dkopen);
154 static dev_type_close(dkclose);
155 static dev_type_cancel(dkcancel);
156 static dev_type_read(dkread);
157 static dev_type_write(dkwrite);
158 static dev_type_ioctl(dkioctl);
159 static dev_type_strategy(dkstrategy);
160 static dev_type_dump(dkdump);
161 static dev_type_size(dksize);
162 static dev_type_discard(dkdiscard);
163
164 CFDRIVER_DECL(dk, DV_DISK, NULL);
165 CFATTACH_DECL3_NEW(dk, 0,
166 dkwedge_match, dkwedge_attach, dkwedge_detach, NULL, NULL, NULL,
167 DVF_DETACH_SHUTDOWN);
168
169 const struct bdevsw dk_bdevsw = {
170 .d_open = dkopen,
171 .d_close = dkclose,
172 .d_cancel = dkcancel,
173 .d_strategy = dkstrategy,
174 .d_ioctl = dkioctl,
175 .d_dump = dkdump,
176 .d_psize = dksize,
177 .d_discard = dkdiscard,
178 .d_cfdriver = &dk_cd,
179 .d_devtounit = dev_minor_unit,
180 .d_flag = D_DISK | D_MPSAFE
181 };
182
183 const struct cdevsw dk_cdevsw = {
184 .d_open = dkopen,
185 .d_close = dkclose,
186 .d_cancel = dkcancel,
187 .d_read = dkread,
188 .d_write = dkwrite,
189 .d_ioctl = dkioctl,
190 .d_stop = nostop,
191 .d_tty = notty,
192 .d_poll = nopoll,
193 .d_mmap = nommap,
194 .d_kqfilter = nokqfilter,
195 .d_discard = dkdiscard,
196 .d_cfdriver = &dk_cd,
197 .d_devtounit = dev_minor_unit,
198 .d_flag = D_DISK | D_MPSAFE
199 };
200
201 static struct dkwedge_softc **dkwedges;
202 static u_int ndkwedges;
203 static krwlock_t dkwedges_lock;
204
205 static LIST_HEAD(, dkwedge_discovery_method) dkwedge_discovery_methods;
206 static krwlock_t dkwedge_discovery_methods_lock;
207
208 /*
209 * dkwedge_match:
210 *
211 * Autoconfiguration match function for pseudo-device glue.
212 */
213 static int
214 dkwedge_match(device_t parent, cfdata_t match, void *aux)
215 {
216
217 /* Pseudo-device; always present. */
218 return 1;
219 }
220
221 /*
222 * dkwedge_attach:
223 *
224 * Autoconfiguration attach function for pseudo-device glue.
225 */
226 static void
227 dkwedge_attach(device_t parent, device_t self, void *aux)
228 {
229 struct dkwedge_softc *sc = aux;
230 struct disk *pdk = sc->sc_parent;
231 int unit = device_unit(self);
232
233 KASSERTMSG(unit >= 0, "unit=%d", unit);
234
235 if (!pmf_device_register(self, NULL, NULL))
236 aprint_error_dev(self, "couldn't establish power handler\n");
237
238 mutex_enter(&pdk->dk_openlock);
239 rw_enter(&dkwedges_lock, RW_WRITER);
240 KASSERTMSG(unit < ndkwedges, "unit=%d ndkwedges=%u", unit, ndkwedges);
241 KASSERTMSG(sc == dkwedges[unit], "sc=%p dkwedges[%d]=%p",
242 sc, unit, dkwedges[unit]);
243 KASSERTMSG(sc->sc_dev == NULL, "sc=%p sc->sc_dev=%p", sc, sc->sc_dev);
244 sc->sc_dev = self;
245 rw_exit(&dkwedges_lock);
246 mutex_exit(&pdk->dk_openlock);
247
248 disk_init(&sc->sc_dk, device_xname(sc->sc_dev), NULL);
249 mutex_enter(&pdk->dk_openlock);
250 dk_set_geometry(sc, pdk);
251 mutex_exit(&pdk->dk_openlock);
252 disk_attach(&sc->sc_dk);
253
254 /* Disk wedge is ready for use! */
255 device_set_private(self, sc);
256 sc->sc_state = DKW_STATE_RUNNING;
257 }
258
259 /*
260 * dkwedge_compute_pdev:
261 *
262 * Compute the parent disk's dev_t.
263 */
264 static int
265 dkwedge_compute_pdev(const char *pname, dev_t *pdevp, enum vtype type)
266 {
267 const char *name, *cp;
268 devmajor_t pmaj;
269 int punit;
270 char devname[16];
271
272 name = pname;
273 switch (type) {
274 case VBLK:
275 pmaj = devsw_name2blk(name, devname, sizeof(devname));
276 break;
277 case VCHR:
278 pmaj = devsw_name2chr(name, devname, sizeof(devname));
279 break;
280 default:
281 pmaj = NODEVMAJOR;
282 break;
283 }
284 if (pmaj == NODEVMAJOR)
285 return ENXIO;
286
287 name += strlen(devname);
288 for (cp = name, punit = 0; *cp >= '0' && *cp <= '9'; cp++)
289 punit = (punit * 10) + (*cp - '0');
290 if (cp == name) {
291 /* Invalid parent disk name. */
292 return ENXIO;
293 }
294
295 *pdevp = MAKEDISKDEV(pmaj, punit, RAW_PART);
296
297 return 0;
298 }
299
300 /*
301 * dkwedge_array_expand:
302 *
303 * Expand the dkwedges array.
304 *
305 * Releases and reacquires dkwedges_lock as a writer.
306 */
307 static int
308 dkwedge_array_expand(void)
309 {
310
311 const unsigned incr = 16;
312 unsigned newcnt, oldcnt;
313 struct dkwedge_softc **newarray = NULL, **oldarray = NULL;
314
315 KASSERT(rw_write_held(&dkwedges_lock));
316
317 oldcnt = ndkwedges;
318 oldarray = dkwedges;
319
320 if (oldcnt >= INT_MAX - incr)
321 return ENFILE; /* XXX */
322 newcnt = oldcnt + incr;
323
324 rw_exit(&dkwedges_lock);
325 newarray = malloc(newcnt * sizeof(*newarray), M_DKWEDGE,
326 M_WAITOK|M_ZERO);
327 rw_enter(&dkwedges_lock, RW_WRITER);
328
329 if (ndkwedges != oldcnt || dkwedges != oldarray) {
330 oldarray = NULL; /* already recycled */
331 goto out;
332 }
333
334 if (oldarray != NULL)
335 memcpy(newarray, dkwedges, ndkwedges * sizeof(*newarray));
336 dkwedges = newarray;
337 newarray = NULL; /* transferred to dkwedges */
338 ndkwedges = newcnt;
339
340 out: rw_exit(&dkwedges_lock);
341 if (oldarray != NULL)
342 free(oldarray, M_DKWEDGE);
343 if (newarray != NULL)
344 free(newarray, M_DKWEDGE);
345 rw_enter(&dkwedges_lock, RW_WRITER);
346 return 0;
347 }
348
349 static void
350 dkwedge_size_init(struct dkwedge_softc *sc, uint64_t size)
351 {
352
353 rw_init(&sc->sc_sizelock);
354 sc->sc_size = size;
355 }
356
357 static void
358 dkwedge_size_fini(struct dkwedge_softc *sc)
359 {
360
361 rw_destroy(&sc->sc_sizelock);
362 }
363
364 static uint64_t
365 dkwedge_size(struct dkwedge_softc *sc)
366 {
367 uint64_t size;
368
369 rw_enter(&sc->sc_sizelock, RW_READER);
370 size = sc->sc_size;
371 rw_exit(&sc->sc_sizelock);
372
373 return size;
374 }
375
376 static void
377 dkwedge_size_increase(struct dkwedge_softc *sc, uint64_t size)
378 {
379
380 KASSERT(mutex_owned(&sc->sc_parent->dk_openlock));
381
382 rw_enter(&sc->sc_sizelock, RW_WRITER);
383 KASSERTMSG(size >= sc->sc_size,
384 "decreasing dkwedge size from %"PRIu64" to %"PRIu64,
385 sc->sc_size, size);
386 sc->sc_size = size;
387 rw_exit(&sc->sc_sizelock);
388 }
389
390 static void
391 dk_set_geometry(struct dkwedge_softc *sc, struct disk *pdk)
392 {
393 struct disk *dk = &sc->sc_dk;
394 struct disk_geom *dg = &dk->dk_geom;
395 uint32_t r, lspps;
396
397 KASSERT(mutex_owned(&pdk->dk_openlock));
398
399 memset(dg, 0, sizeof(*dg));
400
401 dg->dg_secperunit = dkwedge_size(sc);
402 dg->dg_secsize = DEV_BSIZE << pdk->dk_blkshift;
403
404 /* fake numbers, 1 cylinder is 1 MB with default sector size */
405 dg->dg_nsectors = 32;
406 dg->dg_ntracks = 64;
407 dg->dg_ncylinders =
408 dg->dg_secperunit / (dg->dg_nsectors * dg->dg_ntracks);
409
410 dg->dg_physsecsize = pdk->dk_geom.dg_physsecsize;
411 dg->dg_alignedsec = pdk->dk_geom.dg_alignedsec;
412 lspps = MAX(1u, dg->dg_physsecsize / dg->dg_secsize);
413 r = sc->sc_offset % lspps;
414 if (r > dg->dg_alignedsec)
415 dg->dg_alignedsec += lspps;
416 dg->dg_alignedsec -= r;
417 dg->dg_alignedsec %= lspps;
418
419 disk_set_info(sc->sc_dev, dk, NULL);
420 }
421
422 /*
423 * dkwedge_add: [exported function]
424 *
425 * Add a disk wedge based on the provided information.
426 *
427 * The incoming dkw_devname[] is ignored, instead being
428 * filled in and returned to the caller.
429 */
430 int
431 dkwedge_add(struct dkwedge_info *dkw)
432 {
433 struct dkwedge_softc *sc, *lsc;
434 struct disk *pdk;
435 u_int unit;
436 int error;
437 dev_t pdev;
438 device_t dev __diagused;
439
440 dkw->dkw_parent[sizeof(dkw->dkw_parent) - 1] = '\0';
441 pdk = disk_find(dkw->dkw_parent);
442 if (pdk == NULL)
443 return ENXIO;
444
445 error = dkwedge_compute_pdev(pdk->dk_name, &pdev, VBLK);
446 if (error)
447 return error;
448
449 if (dkw->dkw_offset < 0)
450 return EINVAL;
451
452 /*
453 * Check for an existing wedge at the same disk offset. Allow
454 * updating a wedge if the only change is the size, and the new
455 * size is larger than the old.
456 */
457 sc = NULL;
458 mutex_enter(&pdk->dk_openlock);
459 LIST_FOREACH(lsc, &pdk->dk_wedges, sc_plink) {
460 if (lsc->sc_offset != dkw->dkw_offset)
461 continue;
462 if (strcmp(lsc->sc_wname, dkw->dkw_wname) != 0)
463 break;
464 if (strcmp(lsc->sc_ptype, dkw->dkw_ptype) != 0)
465 break;
466 if (dkwedge_size(lsc) > dkw->dkw_size)
467 break;
468 if (lsc->sc_dev == NULL)
469 break;
470
471 sc = lsc;
472 device_acquire(sc->sc_dev);
473 dkwedge_size_increase(sc, dkw->dkw_size);
474 dk_set_geometry(sc, pdk);
475
476 break;
477 }
478 mutex_exit(&pdk->dk_openlock);
479
480 if (sc != NULL)
481 goto announce;
482
483 sc = malloc(sizeof(*sc), M_DKWEDGE, M_WAITOK|M_ZERO);
484 sc->sc_state = DKW_STATE_LARVAL;
485 sc->sc_parent = pdk;
486 sc->sc_pdev = pdev;
487 sc->sc_offset = dkw->dkw_offset;
488 dkwedge_size_init(sc, dkw->dkw_size);
489
490 memcpy(sc->sc_wname, dkw->dkw_wname, sizeof(sc->sc_wname));
491 sc->sc_wname[sizeof(sc->sc_wname) - 1] = '\0';
492
493 memcpy(sc->sc_ptype, dkw->dkw_ptype, sizeof(sc->sc_ptype));
494 sc->sc_ptype[sizeof(sc->sc_ptype) - 1] = '\0';
495
496 bufq_alloc(&sc->sc_bufq, "fcfs", 0);
497
498 callout_init(&sc->sc_restart_ch, 0);
499 callout_setfunc(&sc->sc_restart_ch, dkrestart, sc);
500
501 mutex_init(&sc->sc_iolock, MUTEX_DEFAULT, IPL_BIO);
502
503 /*
504 * Wedge will be added; increment the wedge count for the parent.
505 * Only allow this to happen if RAW_PART is the only thing open.
506 */
507 mutex_enter(&pdk->dk_openlock);
508 if (pdk->dk_openmask & ~(1 << RAW_PART))
509 error = EBUSY;
510 else {
511 /* Check for wedge overlap. */
512 LIST_FOREACH(lsc, &pdk->dk_wedges, sc_plink) {
513 /* XXX arithmetic overflow */
514 uint64_t size = dkwedge_size(sc);
515 uint64_t lsize = dkwedge_size(lsc);
516 daddr_t lastblk = sc->sc_offset + size - 1;
517 daddr_t llastblk = lsc->sc_offset + lsize - 1;
518
519 if (sc->sc_offset >= lsc->sc_offset &&
520 sc->sc_offset <= llastblk) {
521 /* Overlaps the tail of the existing wedge. */
522 break;
523 }
524 if (lastblk >= lsc->sc_offset &&
525 lastblk <= llastblk) {
526 /* Overlaps the head of the existing wedge. */
527 break;
528 }
529 }
530 if (lsc != NULL) {
531 if (sc->sc_offset == lsc->sc_offset &&
532 dkwedge_size(sc) == dkwedge_size(lsc) &&
533 strcmp(sc->sc_wname, lsc->sc_wname) == 0)
534 error = EEXIST;
535 else
536 error = EINVAL;
537 } else {
538 pdk->dk_nwedges++;
539 LIST_INSERT_HEAD(&pdk->dk_wedges, sc, sc_plink);
540 }
541 }
542 mutex_exit(&pdk->dk_openlock);
543 if (error) {
544 mutex_destroy(&sc->sc_iolock);
545 bufq_free(sc->sc_bufq);
546 dkwedge_size_fini(sc);
547 free(sc, M_DKWEDGE);
548 return error;
549 }
550
551 /* Fill in our cfdata for the pseudo-device glue. */
552 sc->sc_cfdata.cf_name = dk_cd.cd_name;
553 sc->sc_cfdata.cf_atname = dk_ca.ca_name;
554 /* sc->sc_cfdata.cf_unit set below */
555 sc->sc_cfdata.cf_fstate = FSTATE_NOTFOUND; /* use chosen cf_unit */
556
557 /* Insert the larval wedge into the array. */
558 rw_enter(&dkwedges_lock, RW_WRITER);
559 for (error = 0;;) {
560 struct dkwedge_softc **scpp;
561
562 /*
563 * Check for a duplicate wname while searching for
564 * a slot.
565 */
566 for (scpp = NULL, unit = 0; unit < ndkwedges; unit++) {
567 if (dkwedges[unit] == NULL) {
568 if (scpp == NULL) {
569 scpp = &dkwedges[unit];
570 sc->sc_cfdata.cf_unit = unit;
571 }
572 } else {
573 /* XXX Unicode. */
574 if (strcmp(dkwedges[unit]->sc_wname,
575 sc->sc_wname) == 0) {
576 error = EEXIST;
577 break;
578 }
579 }
580 }
581 if (error)
582 break;
583 KASSERT(unit == ndkwedges);
584 if (scpp == NULL) {
585 error = dkwedge_array_expand();
586 if (error)
587 break;
588 } else {
589 KASSERT(scpp == &dkwedges[sc->sc_cfdata.cf_unit]);
590 *scpp = sc;
591 break;
592 }
593 }
594 rw_exit(&dkwedges_lock);
595 if (error) {
596 mutex_enter(&pdk->dk_openlock);
597 pdk->dk_nwedges--;
598 LIST_REMOVE(sc, sc_plink);
599 mutex_exit(&pdk->dk_openlock);
600
601 mutex_destroy(&sc->sc_iolock);
602 bufq_free(sc->sc_bufq);
603 dkwedge_size_fini(sc);
604 free(sc, M_DKWEDGE);
605 return error;
606 }
607
608 /*
609 * Now that we know the unit #, attach a pseudo-device for
610 * this wedge instance. This will provide us with the
611 * device_t necessary for glue to other parts of the system.
612 *
613 * This should never fail, unless we're almost totally out of
614 * memory.
615 */
616 if ((dev = config_attach_pseudo_acquire(&sc->sc_cfdata, sc)) == NULL) {
617 aprint_error("%s%u: unable to attach pseudo-device\n",
618 sc->sc_cfdata.cf_name, sc->sc_cfdata.cf_unit);
619
620 rw_enter(&dkwedges_lock, RW_WRITER);
621 KASSERT(dkwedges[sc->sc_cfdata.cf_unit] == sc);
622 dkwedges[sc->sc_cfdata.cf_unit] = NULL;
623 rw_exit(&dkwedges_lock);
624
625 mutex_enter(&pdk->dk_openlock);
626 pdk->dk_nwedges--;
627 LIST_REMOVE(sc, sc_plink);
628 mutex_exit(&pdk->dk_openlock);
629
630 mutex_destroy(&sc->sc_iolock);
631 bufq_free(sc->sc_bufq);
632 dkwedge_size_fini(sc);
633 free(sc, M_DKWEDGE);
634 return ENOMEM;
635 }
636
637 KASSERT(dev == sc->sc_dev);
638
639 announce:
640 /* Announce our arrival. */
641 aprint_normal(
642 "%s at %s: \"%s\", %"PRIu64" blocks at %"PRId64", type: %s\n",
643 device_xname(sc->sc_dev), pdk->dk_name,
644 sc->sc_wname, /* XXX Unicode */
645 dkwedge_size(sc), sc->sc_offset,
646 sc->sc_ptype[0] == '\0' ? "<unknown>" : sc->sc_ptype);
647
648 /* Return the devname to the caller. */
649 strlcpy(dkw->dkw_devname, device_xname(sc->sc_dev),
650 sizeof(dkw->dkw_devname));
651
652 device_release(sc->sc_dev);
653 return 0;
654 }
655
656 /*
657 * dkwedge_find_acquire:
658 *
659 * Lookup a disk wedge based on the provided information.
660 * NOTE: We look up the wedge based on the wedge devname,
661 * not wname.
662 *
663 * Return NULL if the wedge is not found, otherwise return
664 * the wedge's softc. Assign the wedge's unit number to unitp
665 * if unitp is not NULL. The wedge's sc_dev is referenced and
666 * must be released by device_release or equivalent.
667 */
668 static struct dkwedge_softc *
669 dkwedge_find_acquire(struct dkwedge_info *dkw, u_int *unitp)
670 {
671 struct dkwedge_softc *sc = NULL;
672 u_int unit;
673
674 /* Find our softc. */
675 dkw->dkw_devname[sizeof(dkw->dkw_devname) - 1] = '\0';
676 rw_enter(&dkwedges_lock, RW_READER);
677 for (unit = 0; unit < ndkwedges; unit++) {
678 if ((sc = dkwedges[unit]) != NULL &&
679 sc->sc_dev != NULL &&
680 strcmp(device_xname(sc->sc_dev), dkw->dkw_devname) == 0 &&
681 strcmp(sc->sc_parent->dk_name, dkw->dkw_parent) == 0) {
682 device_acquire(sc->sc_dev);
683 break;
684 }
685 }
686 rw_exit(&dkwedges_lock);
687 if (sc == NULL)
688 return NULL;
689
690 if (unitp != NULL)
691 *unitp = unit;
692
693 return sc;
694 }
695
696 /*
697 * dkwedge_del: [exported function]
698 *
699 * Delete a disk wedge based on the provided information.
700 * NOTE: We look up the wedge based on the wedge devname,
701 * not wname.
702 */
703 int
704 dkwedge_del(struct dkwedge_info *dkw)
705 {
706
707 return dkwedge_del1(dkw, 0);
708 }
709
710 int
711 dkwedge_del1(struct dkwedge_info *dkw, int flags)
712 {
713 struct dkwedge_softc *sc = NULL;
714
715 /* Find our softc. */
716 if ((sc = dkwedge_find_acquire(dkw, NULL)) == NULL)
717 return ESRCH;
718
719 return config_detach_release(sc->sc_dev, flags);
720 }
721
722 /*
723 * dkwedge_detach:
724 *
725 * Autoconfiguration detach function for pseudo-device glue.
726 */
727 static int
728 dkwedge_detach(device_t self, int flags)
729 {
730 struct dkwedge_softc *const sc = device_private(self);
731 const u_int unit = device_unit(self);
732 int bmaj, cmaj, error;
733
734 error = disk_begindetach(&sc->sc_dk, /*lastclose*/NULL, self, flags);
735 if (error)
736 return error;
737
738 /* Mark the wedge as dying. */
739 sc->sc_state = DKW_STATE_DYING;
740
741 pmf_device_deregister(self);
742
743 /* Kill any pending restart. */
744 mutex_enter(&sc->sc_iolock);
745 sc->sc_iostop = true;
746 mutex_exit(&sc->sc_iolock);
747 callout_halt(&sc->sc_restart_ch, NULL);
748
749 /* Locate the wedge major numbers. */
750 bmaj = bdevsw_lookup_major(&dk_bdevsw);
751 cmaj = cdevsw_lookup_major(&dk_cdevsw);
752
753 /* Nuke the vnodes for any open instances. */
754 vdevgone(bmaj, unit, unit, VBLK);
755 vdevgone(cmaj, unit, unit, VCHR);
756
757 /*
758 * At this point, all block device opens have been closed,
759 * synchronously flushing any buffered writes; and all
760 * character device I/O operations have completed
761 * synchronously, and character device opens have been closed.
762 *
763 * So there can be no more opens or queued buffers by now.
764 */
765 KASSERT(sc->sc_dk.dk_openmask == 0);
766 KASSERT(bufq_peek(sc->sc_bufq) == NULL);
767 bufq_drain(sc->sc_bufq);
768
769 /* Announce our departure. */
770 aprint_normal("%s at %s (%s) deleted\n", device_xname(sc->sc_dev),
771 sc->sc_parent->dk_name,
772 sc->sc_wname); /* XXX Unicode */
773
774 mutex_enter(&sc->sc_parent->dk_openlock);
775 sc->sc_parent->dk_nwedges--;
776 LIST_REMOVE(sc, sc_plink);
777 mutex_exit(&sc->sc_parent->dk_openlock);
778
779 /* Delete our buffer queue. */
780 bufq_free(sc->sc_bufq);
781
782 /* Detach from the disk list. */
783 disk_detach(&sc->sc_dk);
784 disk_destroy(&sc->sc_dk);
785
786 /* Poof. */
787 rw_enter(&dkwedges_lock, RW_WRITER);
788 KASSERT(dkwedges[unit] == sc);
789 dkwedges[unit] = NULL;
790 sc->sc_state = DKW_STATE_DEAD;
791 rw_exit(&dkwedges_lock);
792
793 mutex_destroy(&sc->sc_iolock);
794 dkwedge_size_fini(sc);
795
796 free(sc, M_DKWEDGE);
797
798 return 0;
799 }
800
801 /*
802 * dkwedge_delall: [exported function]
803 *
804 * Forcibly delete all of the wedges on the specified disk. Used
805 * when a disk is being detached.
806 */
807 void
808 dkwedge_delall(struct disk *pdk)
809 {
810
811 dkwedge_delall1(pdk, /*idleonly*/false);
812 }
813
814 /*
815 * dkwedge_delidle: [exported function]
816 *
817 * Delete all of the wedges on the specified disk if idle. Used
818 * by ioctl(DIOCRMWEDGES).
819 */
820 void
821 dkwedge_delidle(struct disk *pdk)
822 {
823
824 dkwedge_delall1(pdk, /*idleonly*/true);
825 }
826
827 static void
828 dkwedge_delall1(struct disk *pdk, bool idleonly)
829 {
830 struct dkwedge_softc *sc;
831 int flags;
832
833 flags = DETACH_QUIET;
834 if (!idleonly)
835 flags |= DETACH_FORCE;
836
837 for (;;) {
838 mutex_enter(&pdk->dk_rawlock); /* for sc->sc_dk.dk_openmask */
839 mutex_enter(&pdk->dk_openlock);
840 LIST_FOREACH(sc, &pdk->dk_wedges, sc_plink) {
841 /*
842 * Wedge is not yet created. This is a race --
843 * it may as well have been added just after we
844 * deleted all the wedges, so pretend it's not
845 * here yet.
846 */
847 if (sc->sc_dev == NULL)
848 continue;
849 if (!idleonly || sc->sc_dk.dk_openmask == 0) {
850 device_acquire(sc->sc_dev);
851 break;
852 }
853 }
854 if (sc == NULL) {
855 KASSERT(idleonly || pdk->dk_nwedges == 0);
856 mutex_exit(&pdk->dk_openlock);
857 mutex_exit(&pdk->dk_rawlock);
858 return;
859 }
860 mutex_exit(&pdk->dk_openlock);
861 mutex_exit(&pdk->dk_rawlock);
862 (void)config_detach_release(sc->sc_dev, flags);
863 }
864 }
865
866 /*
867 * dkwedge_list: [exported function]
868 *
869 * List all of the wedges on a particular disk.
870 */
871 int
872 dkwedge_list(struct disk *pdk, struct dkwedge_list *dkwl, struct lwp *l)
873 {
874 struct uio uio;
875 struct iovec iov;
876 struct dkwedge_softc *sc;
877 struct dkwedge_info dkw;
878 int error = 0;
879
880 iov.iov_base = dkwl->dkwl_buf;
881 iov.iov_len = dkwl->dkwl_bufsize;
882
883 uio.uio_iov = &iov;
884 uio.uio_iovcnt = 1;
885 uio.uio_offset = 0;
886 uio.uio_resid = dkwl->dkwl_bufsize;
887 uio.uio_rw = UIO_READ;
888 KASSERT(l == curlwp);
889 uio.uio_vmspace = l->l_proc->p_vmspace;
890
891 dkwl->dkwl_ncopied = 0;
892
893 mutex_enter(&pdk->dk_openlock);
894 LIST_FOREACH(sc, &pdk->dk_wedges, sc_plink) {
895 if (uio.uio_resid < sizeof(dkw))
896 break;
897
898 if (sc->sc_dev == NULL)
899 continue;
900
901 strlcpy(dkw.dkw_devname, device_xname(sc->sc_dev),
902 sizeof(dkw.dkw_devname));
903 memcpy(dkw.dkw_wname, sc->sc_wname, sizeof(dkw.dkw_wname));
904 dkw.dkw_wname[sizeof(dkw.dkw_wname) - 1] = '\0';
905 strlcpy(dkw.dkw_parent, sc->sc_parent->dk_name,
906 sizeof(dkw.dkw_parent));
907 dkw.dkw_offset = sc->sc_offset;
908 dkw.dkw_size = dkwedge_size(sc);
909 strlcpy(dkw.dkw_ptype, sc->sc_ptype, sizeof(dkw.dkw_ptype));
910
911 /*
912 * Acquire a device reference so this wedge doesn't go
913 * away before our next iteration in LIST_FOREACH, and
914 * then release the lock for uiomove.
915 */
916 device_acquire(sc->sc_dev);
917 mutex_exit(&pdk->dk_openlock);
918 error = uiomove(&dkw, sizeof(dkw), &uio);
919 mutex_enter(&pdk->dk_openlock);
920 device_release(sc->sc_dev);
921 if (error)
922 break;
923
924 dkwl->dkwl_ncopied++;
925 }
926 dkwl->dkwl_nwedges = pdk->dk_nwedges;
927 mutex_exit(&pdk->dk_openlock);
928
929 return error;
930 }
931
932 static device_t
933 dkwedge_find_by_wname_acquire(const char *wname)
934 {
935 device_t dv = NULL;
936 struct dkwedge_softc *sc;
937 int i;
938
939 rw_enter(&dkwedges_lock, RW_READER);
940 for (i = 0; i < ndkwedges; i++) {
941 if ((sc = dkwedges[i]) == NULL || sc->sc_dev == NULL)
942 continue;
943 if (strcmp(sc->sc_wname, wname) == 0) {
944 if (dv != NULL) {
945 printf(
946 "WARNING: double match for wedge name %s "
947 "(%s, %s)\n", wname, device_xname(dv),
948 device_xname(sc->sc_dev));
949 continue;
950 }
951 device_acquire(sc->sc_dev);
952 dv = sc->sc_dev;
953 }
954 }
955 rw_exit(&dkwedges_lock);
956 return dv;
957 }
958
959 static device_t
960 dkwedge_find_by_parent_acquire(const char *name, size_t *i)
961 {
962
963 rw_enter(&dkwedges_lock, RW_READER);
964 for (; *i < (size_t)ndkwedges; (*i)++) {
965 struct dkwedge_softc *sc;
966 if ((sc = dkwedges[*i]) == NULL || sc->sc_dev == NULL)
967 continue;
968 if (strcmp(sc->sc_parent->dk_name, name) != 0)
969 continue;
970 device_acquire(sc->sc_dev);
971 rw_exit(&dkwedges_lock);
972 return sc->sc_dev;
973 }
974 rw_exit(&dkwedges_lock);
975 return NULL;
976 }
977
978 /* XXX unsafe */
979 device_t
980 dkwedge_find_by_wname(const char *wname)
981 {
982 device_t dv;
983
984 if ((dv = dkwedge_find_by_wname_acquire(wname)) == NULL)
985 return NULL;
986 device_release(dv);
987 return dv;
988 }
989
990 /* XXX unsafe */
991 device_t
992 dkwedge_find_by_parent(const char *name, size_t *i)
993 {
994 device_t dv;
995
996 if ((dv = dkwedge_find_by_parent_acquire(name, i)) == NULL)
997 return NULL;
998 device_release(dv);
999 return dv;
1000 }
1001
1002 void
1003 dkwedge_print_wnames(void)
1004 {
1005 struct dkwedge_softc *sc;
1006 int i;
1007
1008 rw_enter(&dkwedges_lock, RW_READER);
1009 for (i = 0; i < ndkwedges; i++) {
1010 if ((sc = dkwedges[i]) == NULL || sc->sc_dev == NULL)
1011 continue;
1012 printf(" wedge:%s", sc->sc_wname);
1013 }
1014 rw_exit(&dkwedges_lock);
1015 }
1016
1017 /*
1018 * We need a dummy object to stuff into the dkwedge discovery method link
1019 * set to ensure that there is always at least one object in the set.
1020 */
1021 static struct dkwedge_discovery_method dummy_discovery_method;
1022 __link_set_add_bss(dkwedge_methods, dummy_discovery_method);
1023
1024 /*
1025 * dkwedge_init:
1026 *
1027 * Initialize the disk wedge subsystem.
1028 */
1029 void
1030 dkwedge_init(void)
1031 {
1032 __link_set_decl(dkwedge_methods, struct dkwedge_discovery_method);
1033 struct dkwedge_discovery_method * const *ddmp;
1034 struct dkwedge_discovery_method *lddm, *ddm;
1035
1036 rw_init(&dkwedges_lock);
1037 rw_init(&dkwedge_discovery_methods_lock);
1038
1039 if (config_cfdriver_attach(&dk_cd) != 0)
1040 panic("dkwedge: unable to attach cfdriver");
1041 if (config_cfattach_attach(dk_cd.cd_name, &dk_ca) != 0)
1042 panic("dkwedge: unable to attach cfattach");
1043
1044 rw_enter(&dkwedge_discovery_methods_lock, RW_WRITER);
1045
1046 LIST_INIT(&dkwedge_discovery_methods);
1047
1048 __link_set_foreach(ddmp, dkwedge_methods) {
1049 ddm = *ddmp;
1050 if (ddm == &dummy_discovery_method)
1051 continue;
1052 if (LIST_EMPTY(&dkwedge_discovery_methods)) {
1053 LIST_INSERT_HEAD(&dkwedge_discovery_methods,
1054 ddm, ddm_list);
1055 continue;
1056 }
1057 LIST_FOREACH(lddm, &dkwedge_discovery_methods, ddm_list) {
1058 if (ddm->ddm_priority == lddm->ddm_priority) {
1059 aprint_error("dk-method-%s: method \"%s\" "
1060 "already exists at priority %d\n",
1061 ddm->ddm_name, lddm->ddm_name,
1062 lddm->ddm_priority);
1063 /* Not inserted. */
1064 break;
1065 }
1066 if (ddm->ddm_priority < lddm->ddm_priority) {
1067 /* Higher priority; insert before. */
1068 LIST_INSERT_BEFORE(lddm, ddm, ddm_list);
1069 break;
1070 }
1071 if (LIST_NEXT(lddm, ddm_list) == NULL) {
1072 /* Last one; insert after. */
1073 KASSERT(lddm->ddm_priority < ddm->ddm_priority);
1074 LIST_INSERT_AFTER(lddm, ddm, ddm_list);
1075 break;
1076 }
1077 }
1078 }
1079
1080 rw_exit(&dkwedge_discovery_methods_lock);
1081 }
1082
1083 #ifdef DKWEDGE_AUTODISCOVER
1084 int dkwedge_autodiscover = 1;
1085 #else
1086 int dkwedge_autodiscover = 0;
1087 #endif
1088
1089 /*
1090 * dkwedge_discover: [exported function]
1091 *
1092 * Discover the wedges on a newly attached disk.
1093 * Remove all unused wedges on the disk first.
1094 */
1095 void
1096 dkwedge_discover(struct disk *pdk)
1097 {
1098 struct dkwedge_discovery_method *ddm;
1099 struct vnode *vp;
1100 int error;
1101 dev_t pdev;
1102
1103 /*
1104 * Require people playing with wedges to enable this explicitly.
1105 */
1106 if (dkwedge_autodiscover == 0)
1107 return;
1108
1109 rw_enter(&dkwedge_discovery_methods_lock, RW_READER);
1110
1111 /*
1112 * Use the character device for scanning, the block device
1113 * is busy if there are already wedges attached.
1114 */
1115 error = dkwedge_compute_pdev(pdk->dk_name, &pdev, VCHR);
1116 if (error) {
1117 aprint_error("%s: unable to compute pdev, error = %d\n",
1118 pdk->dk_name, error);
1119 goto out;
1120 }
1121
1122 error = cdevvp(pdev, &vp);
1123 if (error) {
1124 aprint_error("%s: unable to find vnode for pdev, error = %d\n",
1125 pdk->dk_name, error);
1126 goto out;
1127 }
1128
1129 error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1130 if (error) {
1131 aprint_error("%s: unable to lock vnode for pdev, error = %d\n",
1132 pdk->dk_name, error);
1133 vrele(vp);
1134 goto out;
1135 }
1136
1137 error = VOP_OPEN(vp, FREAD | FSILENT, NOCRED);
1138 if (error) {
1139 if (error != ENXIO)
1140 aprint_error("%s: unable to open device, error = %d\n",
1141 pdk->dk_name, error);
1142 vput(vp);
1143 goto out;
1144 }
1145 VOP_UNLOCK(vp);
1146
1147 /*
1148 * Remove unused wedges
1149 */
1150 dkwedge_delidle(pdk);
1151
1152 /*
1153 * For each supported partition map type, look to see if
1154 * this map type exists. If so, parse it and add the
1155 * corresponding wedges.
1156 */
1157 LIST_FOREACH(ddm, &dkwedge_discovery_methods, ddm_list) {
1158 error = (*ddm->ddm_discover)(pdk, vp);
1159 if (error == 0) {
1160 /* Successfully created wedges; we're done. */
1161 break;
1162 }
1163 }
1164
1165 error = vn_close(vp, FREAD, NOCRED);
1166 if (error) {
1167 aprint_error("%s: unable to close device, error = %d\n",
1168 pdk->dk_name, error);
1169 /* We'll just assume the vnode has been cleaned up. */
1170 }
1171
1172 out:
1173 rw_exit(&dkwedge_discovery_methods_lock);
1174 }
1175
1176 /*
1177 * dkwedge_read:
1178 *
1179 * Read some data from the specified disk, used for
1180 * partition discovery.
1181 */
1182 int
1183 dkwedge_read(struct disk *pdk, struct vnode *vp, daddr_t blkno,
1184 void *tbuf, size_t len)
1185 {
1186 buf_t *bp;
1187 int error;
1188 bool isopen;
1189 dev_t bdev;
1190 struct vnode *bdvp;
1191
1192 /*
1193 * The kernel cannot read from a character device vnode
1194 * as physio() only handles user memory.
1195 *
1196 * If the block device has already been opened by a wedge
1197 * use that vnode and temporarily bump the open counter.
1198 *
1199 * Otherwise try to open the block device.
1200 */
1201
1202 bdev = devsw_chr2blk(vp->v_rdev);
1203
1204 mutex_enter(&pdk->dk_rawlock);
1205 if (pdk->dk_rawopens != 0) {
1206 KASSERT(pdk->dk_rawvp != NULL);
1207 isopen = true;
1208 ++pdk->dk_rawopens;
1209 bdvp = pdk->dk_rawvp;
1210 error = 0;
1211 } else {
1212 isopen = false;
1213 error = dk_open_parent(bdev, FREAD, &bdvp);
1214 }
1215 mutex_exit(&pdk->dk_rawlock);
1216
1217 if (error)
1218 return error;
1219
1220 bp = getiobuf(bdvp, true);
1221 bp->b_flags = B_READ;
1222 bp->b_cflags = BC_BUSY;
1223 bp->b_dev = bdev;
1224 bp->b_data = tbuf;
1225 bp->b_bufsize = bp->b_bcount = len;
1226 bp->b_blkno = blkno;
1227 bp->b_cylinder = 0;
1228 bp->b_error = 0;
1229
1230 VOP_STRATEGY(bdvp, bp);
1231 error = biowait(bp);
1232 putiobuf(bp);
1233
1234 mutex_enter(&pdk->dk_rawlock);
1235 if (isopen) {
1236 --pdk->dk_rawopens;
1237 } else {
1238 dk_close_parent(bdvp, FREAD);
1239 }
1240 mutex_exit(&pdk->dk_rawlock);
1241
1242 return error;
1243 }
1244
1245 /*
1246 * dkwedge_lookup:
1247 *
1248 * Look up a dkwedge_softc based on the provided dev_t.
1249 *
1250 * Caller must guarantee the wedge is referenced.
1251 */
1252 static struct dkwedge_softc *
1253 dkwedge_lookup(dev_t dev)
1254 {
1255
1256 return device_lookup_private(&dk_cd, minor(dev));
1257 }
1258
1259 static struct dkwedge_softc *
1260 dkwedge_lookup_acquire(dev_t dev)
1261 {
1262 device_t dv = device_lookup_acquire(&dk_cd, minor(dev));
1263
1264 if (dv == NULL)
1265 return NULL;
1266 return device_private(dv);
1267 }
1268
1269 static int
1270 dk_open_parent(dev_t dev, int mode, struct vnode **vpp)
1271 {
1272 struct vnode *vp;
1273 int error;
1274
1275 error = bdevvp(dev, &vp);
1276 if (error)
1277 return error;
1278
1279 error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1280 if (error) {
1281 vrele(vp);
1282 return error;
1283 }
1284 error = VOP_OPEN(vp, mode, NOCRED);
1285 if (error) {
1286 vput(vp);
1287 return error;
1288 }
1289
1290 /* VOP_OPEN() doesn't do this for us. */
1291 if (mode & FWRITE) {
1292 mutex_enter(vp->v_interlock);
1293 vp->v_writecount++;
1294 mutex_exit(vp->v_interlock);
1295 }
1296
1297 VOP_UNLOCK(vp);
1298
1299 *vpp = vp;
1300
1301 return 0;
1302 }
1303
1304 static int
1305 dk_close_parent(struct vnode *vp, int mode)
1306 {
1307 int error;
1308
1309 error = vn_close(vp, mode, NOCRED);
1310 return error;
1311 }
1312
1313 /*
1314 * dkopen: [devsw entry point]
1315 *
1316 * Open a wedge.
1317 */
1318 static int
1319 dkopen(dev_t dev, int flags, int fmt, struct lwp *l)
1320 {
1321 struct dkwedge_softc *sc = dkwedge_lookup(dev);
1322 int error = 0;
1323
1324 if (sc == NULL)
1325 return ENXIO;
1326 KASSERT(sc->sc_dev != NULL);
1327 KASSERT(sc->sc_state == DKW_STATE_RUNNING);
1328
1329 /*
1330 * We go through a complicated little dance to only open the parent
1331 * vnode once per wedge, no matter how many times the wedge is
1332 * opened. The reason? We see one dkopen() per open call, but
1333 * only dkclose() on the last close.
1334 */
1335 mutex_enter(&sc->sc_dk.dk_openlock);
1336 mutex_enter(&sc->sc_parent->dk_rawlock);
1337 if (sc->sc_dk.dk_openmask == 0) {
1338 error = dkfirstopen(sc, flags);
1339 if (error)
1340 goto out;
1341 } else if (flags & ~sc->sc_mode & FWRITE) {
1342 /*
1343 * The parent is already open, but the previous attempt
1344 * to open it read/write failed and fell back to
1345 * read-only. In that case, we assume the medium is
1346 * read-only and fail to open the wedge read/write.
1347 */
1348 error = EROFS;
1349 goto out;
1350 }
1351 KASSERT(sc->sc_mode != 0);
1352 KASSERTMSG(sc->sc_mode & FREAD, "%s: sc_mode=%x",
1353 device_xname(sc->sc_dev), sc->sc_mode);
1354 KASSERTMSG((flags & FWRITE) ? (sc->sc_mode & FWRITE) : 1,
1355 "%s: flags=%x sc_mode=%x",
1356 device_xname(sc->sc_dev), flags, sc->sc_mode);
1357 if (fmt == S_IFCHR)
1358 sc->sc_dk.dk_copenmask |= 1;
1359 else
1360 sc->sc_dk.dk_bopenmask |= 1;
1361 sc->sc_dk.dk_openmask =
1362 sc->sc_dk.dk_copenmask | sc->sc_dk.dk_bopenmask;
1363
1364 out: mutex_exit(&sc->sc_parent->dk_rawlock);
1365 mutex_exit(&sc->sc_dk.dk_openlock);
1366 return error;
1367 }
1368
1369 static int
1370 dkfirstopen(struct dkwedge_softc *sc, int flags)
1371 {
1372 struct dkwedge_softc *nsc;
1373 struct vnode *vp;
1374 int mode;
1375 int error;
1376
1377 KASSERT(mutex_owned(&sc->sc_dk.dk_openlock));
1378 KASSERT(mutex_owned(&sc->sc_parent->dk_rawlock));
1379
1380 if (sc->sc_parent->dk_rawopens == 0) {
1381 KASSERT(sc->sc_parent->dk_rawvp == NULL);
1382 /*
1383 * Try open read-write. If this fails for EROFS
1384 * and wedge is read-only, retry to open read-only.
1385 */
1386 mode = FREAD | FWRITE;
1387 error = dk_open_parent(sc->sc_pdev, mode, &vp);
1388 if (error == EROFS && (flags & FWRITE) == 0) {
1389 mode &= ~FWRITE;
1390 error = dk_open_parent(sc->sc_pdev, mode, &vp);
1391 }
1392 if (error)
1393 return error;
1394 KASSERT(vp != NULL);
1395 sc->sc_parent->dk_rawvp = vp;
1396 } else {
1397 /*
1398 * Retrieve mode from an already opened wedge.
1399 *
1400 * At this point, dk_rawopens is bounded by the number
1401 * of dkwedge devices in the system, which is limited
1402 * by autoconf device numbering to INT_MAX. Since
1403 * dk_rawopens is unsigned, this can't overflow.
1404 */
1405 KASSERT(sc->sc_parent->dk_rawopens < UINT_MAX);
1406 KASSERT(sc->sc_parent->dk_rawvp != NULL);
1407 mode = 0;
1408 mutex_enter(&sc->sc_parent->dk_openlock);
1409 LIST_FOREACH(nsc, &sc->sc_parent->dk_wedges, sc_plink) {
1410 if (nsc == sc || nsc->sc_dk.dk_openmask == 0)
1411 continue;
1412 mode = nsc->sc_mode;
1413 break;
1414 }
1415 mutex_exit(&sc->sc_parent->dk_openlock);
1416 }
1417 sc->sc_mode = mode;
1418 sc->sc_parent->dk_rawopens++;
1419
1420 return 0;
1421 }
1422
1423 static void
1424 dklastclose(struct dkwedge_softc *sc)
1425 {
1426
1427 KASSERT(mutex_owned(&sc->sc_dk.dk_openlock));
1428 KASSERT(mutex_owned(&sc->sc_parent->dk_rawlock));
1429 KASSERT(sc->sc_parent->dk_rawopens > 0);
1430 KASSERT(sc->sc_parent->dk_rawvp != NULL);
1431
1432 if (--sc->sc_parent->dk_rawopens == 0) {
1433 struct vnode *const vp = sc->sc_parent->dk_rawvp;
1434 const int mode = sc->sc_mode;
1435
1436 sc->sc_parent->dk_rawvp = NULL;
1437 sc->sc_mode = 0;
1438
1439 dk_close_parent(vp, mode);
1440 }
1441 }
1442
1443 /*
1444 * dkclose: [devsw entry point]
1445 *
1446 * Close a wedge.
1447 */
1448 static int
1449 dkclose(dev_t dev, int flags, int fmt, struct lwp *l)
1450 {
1451 struct dkwedge_softc *sc = dkwedge_lookup(dev);
1452
1453 /*
1454 * dkclose can be called even if dkopen didn't succeed, so we
1455 * have to handle the same possibility that the wedge may not
1456 * exist.
1457 */
1458 if (sc == NULL)
1459 return ENXIO;
1460 KASSERT(sc->sc_dev != NULL);
1461 KASSERT(sc->sc_state != DKW_STATE_LARVAL);
1462 KASSERT(sc->sc_state != DKW_STATE_DEAD);
1463
1464 mutex_enter(&sc->sc_dk.dk_openlock);
1465 mutex_enter(&sc->sc_parent->dk_rawlock);
1466
1467 KASSERT(sc->sc_dk.dk_openmask != 0);
1468
1469 if (fmt == S_IFCHR)
1470 sc->sc_dk.dk_copenmask &= ~1;
1471 else
1472 sc->sc_dk.dk_bopenmask &= ~1;
1473 sc->sc_dk.dk_openmask =
1474 sc->sc_dk.dk_copenmask | sc->sc_dk.dk_bopenmask;
1475
1476 if (sc->sc_dk.dk_openmask == 0) {
1477 dklastclose(sc);
1478 }
1479
1480 mutex_exit(&sc->sc_parent->dk_rawlock);
1481 mutex_exit(&sc->sc_dk.dk_openlock);
1482
1483 return 0;
1484 }
1485
1486 /*
1487 * dkcancel: [devsw entry point]
1488 *
1489 * Cancel any pending I/O operations waiting on a wedge.
1490 */
1491 static int
1492 dkcancel(dev_t dev, int flags, int fmt, struct lwp *l)
1493 {
1494 struct dkwedge_softc *sc = dkwedge_lookup(dev);
1495
1496 KASSERT(sc != NULL);
1497 KASSERT(sc->sc_dev != NULL);
1498 KASSERT(sc->sc_state != DKW_STATE_LARVAL);
1499 KASSERT(sc->sc_state != DKW_STATE_DEAD);
1500
1501 /*
1502 * Disk I/O is expected to complete or fail within a reasonable
1503 * timeframe -- it's storage, not communication. Further, the
1504 * character and block device interface guarantees that prior
1505 * reads and writes have completed or failed by the time close
1506 * returns -- we are not to cancel them here. If the parent
1507 * device's hardware is gone, the parent driver can make them
1508 * fail. Nothing for dk(4) itself to do.
1509 */
1510
1511 return 0;
1512 }
1513
1514 /*
1515 * dkstrategy: [devsw entry point]
1516 *
1517 * Perform I/O based on the wedge I/O strategy.
1518 */
1519 static void
1520 dkstrategy(struct buf *bp)
1521 {
1522 struct dkwedge_softc *sc = dkwedge_lookup(bp->b_dev);
1523 uint64_t p_size, p_offset;
1524
1525 KASSERT(sc != NULL);
1526 KASSERT(sc->sc_dev != NULL);
1527 KASSERT(sc->sc_state != DKW_STATE_LARVAL);
1528 KASSERT(sc->sc_state != DKW_STATE_DEAD);
1529 KASSERT(sc->sc_parent->dk_rawvp != NULL);
1530
1531 /* If it's an empty transfer, wake up the top half now. */
1532 if (bp->b_bcount == 0)
1533 goto done;
1534
1535 p_offset = sc->sc_offset << sc->sc_parent->dk_blkshift;
1536 p_size = dkwedge_size(sc) << sc->sc_parent->dk_blkshift;
1537
1538 /* Make sure it's in-range. */
1539 if (bounds_check_with_mediasize(bp, DEV_BSIZE, p_size) <= 0)
1540 goto done;
1541
1542 /* Translate it to the parent's raw LBA. */
1543 bp->b_rawblkno = bp->b_blkno + p_offset;
1544
1545 /* Place it in the queue and start I/O on the unit. */
1546 mutex_enter(&sc->sc_iolock);
1547 disk_wait(&sc->sc_dk);
1548 bufq_put(sc->sc_bufq, bp);
1549 mutex_exit(&sc->sc_iolock);
1550
1551 dkstart(sc);
1552 return;
1553
1554 done:
1555 bp->b_resid = bp->b_bcount;
1556 biodone(bp);
1557 }
1558
1559 /*
1560 * dkstart:
1561 *
1562 * Start I/O that has been enqueued on the wedge.
1563 */
1564 static void
1565 dkstart(struct dkwedge_softc *sc)
1566 {
1567 struct vnode *vp;
1568 struct buf *bp, *nbp;
1569
1570 mutex_enter(&sc->sc_iolock);
1571
1572 /* Do as much work as has been enqueued. */
1573 while ((bp = bufq_peek(sc->sc_bufq)) != NULL) {
1574 if (sc->sc_iostop) {
1575 (void) bufq_get(sc->sc_bufq);
1576 mutex_exit(&sc->sc_iolock);
1577 bp->b_error = ENXIO;
1578 bp->b_resid = bp->b_bcount;
1579 biodone(bp);
1580 mutex_enter(&sc->sc_iolock);
1581 continue;
1582 }
1583
1584 /* fetch an I/O buf with sc_iolock dropped */
1585 mutex_exit(&sc->sc_iolock);
1586 nbp = getiobuf(sc->sc_parent->dk_rawvp, false);
1587 mutex_enter(&sc->sc_iolock);
1588 if (nbp == NULL) {
1589 /*
1590 * No resources to run this request; leave the
1591 * buffer queued up, and schedule a timer to
1592 * restart the queue in 1/2 a second.
1593 */
1594 if (!sc->sc_iostop)
1595 callout_schedule(&sc->sc_restart_ch, hz/2);
1596 break;
1597 }
1598
1599 /*
1600 * fetch buf, this can fail if another thread
1601 * has already processed the queue, it can also
1602 * return a completely different buf.
1603 */
1604 bp = bufq_get(sc->sc_bufq);
1605 if (bp == NULL) {
1606 mutex_exit(&sc->sc_iolock);
1607 putiobuf(nbp);
1608 mutex_enter(&sc->sc_iolock);
1609 continue;
1610 }
1611
1612 /* Instrumentation. */
1613 disk_busy(&sc->sc_dk);
1614
1615 /* release lock for VOP_STRATEGY */
1616 mutex_exit(&sc->sc_iolock);
1617
1618 nbp->b_data = bp->b_data;
1619 nbp->b_flags = bp->b_flags;
1620 nbp->b_oflags = bp->b_oflags;
1621 nbp->b_cflags = bp->b_cflags;
1622 nbp->b_iodone = dkiodone;
1623 nbp->b_proc = bp->b_proc;
1624 nbp->b_blkno = bp->b_rawblkno;
1625 nbp->b_dev = sc->sc_parent->dk_rawvp->v_rdev;
1626 nbp->b_bcount = bp->b_bcount;
1627 nbp->b_private = bp;
1628 BIO_COPYPRIO(nbp, bp);
1629
1630 vp = nbp->b_vp;
1631 if ((nbp->b_flags & B_READ) == 0) {
1632 mutex_enter(vp->v_interlock);
1633 vp->v_numoutput++;
1634 mutex_exit(vp->v_interlock);
1635 }
1636 VOP_STRATEGY(vp, nbp);
1637
1638 mutex_enter(&sc->sc_iolock);
1639 }
1640
1641 mutex_exit(&sc->sc_iolock);
1642 }
1643
1644 /*
1645 * dkiodone:
1646 *
1647 * I/O to a wedge has completed; alert the top half.
1648 */
1649 static void
1650 dkiodone(struct buf *bp)
1651 {
1652 struct buf *obp = bp->b_private;
1653 struct dkwedge_softc *sc = dkwedge_lookup(obp->b_dev);
1654
1655 KASSERT(sc != NULL);
1656 KASSERT(sc->sc_dev != NULL);
1657
1658 if (bp->b_error != 0)
1659 obp->b_error = bp->b_error;
1660 obp->b_resid = bp->b_resid;
1661 putiobuf(bp);
1662
1663 mutex_enter(&sc->sc_iolock);
1664 disk_unbusy(&sc->sc_dk, obp->b_bcount - obp->b_resid,
1665 obp->b_flags & B_READ);
1666 mutex_exit(&sc->sc_iolock);
1667
1668 biodone(obp);
1669
1670 /* Kick the queue in case there is more work we can do. */
1671 dkstart(sc);
1672 }
1673
1674 /*
1675 * dkrestart:
1676 *
1677 * Restart the work queue after it was stalled due to
1678 * a resource shortage. Invoked via a callout.
1679 */
1680 static void
1681 dkrestart(void *v)
1682 {
1683 struct dkwedge_softc *sc = v;
1684
1685 dkstart(sc);
1686 }
1687
1688 /*
1689 * dkminphys:
1690 *
1691 * Call parent's minphys function.
1692 */
1693 static void
1694 dkminphys(struct buf *bp)
1695 {
1696 struct dkwedge_softc *sc = dkwedge_lookup(bp->b_dev);
1697 dev_t dev;
1698
1699 KASSERT(sc != NULL);
1700 KASSERT(sc->sc_dev != NULL);
1701
1702 dev = bp->b_dev;
1703 bp->b_dev = sc->sc_pdev;
1704 if (sc->sc_parent->dk_driver && sc->sc_parent->dk_driver->d_minphys)
1705 (*sc->sc_parent->dk_driver->d_minphys)(bp);
1706 else
1707 minphys(bp);
1708 bp->b_dev = dev;
1709 }
1710
1711 /*
1712 * dkread: [devsw entry point]
1713 *
1714 * Read from a wedge.
1715 */
1716 static int
1717 dkread(dev_t dev, struct uio *uio, int flags)
1718 {
1719 struct dkwedge_softc *sc __diagused = dkwedge_lookup(dev);
1720
1721 KASSERT(sc != NULL);
1722 KASSERT(sc->sc_dev != NULL);
1723 KASSERT(sc->sc_state != DKW_STATE_LARVAL);
1724 KASSERT(sc->sc_state != DKW_STATE_DEAD);
1725
1726 return physio(dkstrategy, NULL, dev, B_READ, dkminphys, uio);
1727 }
1728
1729 /*
1730 * dkwrite: [devsw entry point]
1731 *
1732 * Write to a wedge.
1733 */
1734 static int
1735 dkwrite(dev_t dev, struct uio *uio, int flags)
1736 {
1737 struct dkwedge_softc *sc __diagused = dkwedge_lookup(dev);
1738
1739 KASSERT(sc != NULL);
1740 KASSERT(sc->sc_dev != NULL);
1741 KASSERT(sc->sc_state != DKW_STATE_LARVAL);
1742 KASSERT(sc->sc_state != DKW_STATE_DEAD);
1743
1744 return physio(dkstrategy, NULL, dev, B_WRITE, dkminphys, uio);
1745 }
1746
1747 /*
1748 * dkioctl: [devsw entry point]
1749 *
1750 * Perform an ioctl request on a wedge.
1751 */
1752 static int
1753 dkioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
1754 {
1755 struct dkwedge_softc *sc = dkwedge_lookup(dev);
1756 int error = 0;
1757
1758 KASSERT(sc != NULL);
1759 KASSERT(sc->sc_dev != NULL);
1760 KASSERT(sc->sc_state != DKW_STATE_LARVAL);
1761 KASSERT(sc->sc_state != DKW_STATE_DEAD);
1762 KASSERT(sc->sc_parent->dk_rawvp != NULL);
1763
1764 /*
1765 * We pass NODEV instead of our device to indicate we don't
1766 * want to handle disklabel ioctls
1767 */
1768 error = disk_ioctl(&sc->sc_dk, NODEV, cmd, data, flag, l);
1769 if (error != EPASSTHROUGH)
1770 return error;
1771
1772 error = 0;
1773
1774 switch (cmd) {
1775 case DIOCGSTRATEGY:
1776 case DIOCGCACHE:
1777 case DIOCCACHESYNC:
1778 error = VOP_IOCTL(sc->sc_parent->dk_rawvp, cmd, data, flag,
1779 l != NULL ? l->l_cred : NOCRED);
1780 break;
1781 case DIOCGWEDGEINFO: {
1782 struct dkwedge_info *dkw = data;
1783
1784 strlcpy(dkw->dkw_devname, device_xname(sc->sc_dev),
1785 sizeof(dkw->dkw_devname));
1786 memcpy(dkw->dkw_wname, sc->sc_wname, sizeof(dkw->dkw_wname));
1787 dkw->dkw_wname[sizeof(dkw->dkw_wname) - 1] = '\0';
1788 strlcpy(dkw->dkw_parent, sc->sc_parent->dk_name,
1789 sizeof(dkw->dkw_parent));
1790 dkw->dkw_offset = sc->sc_offset;
1791 dkw->dkw_size = dkwedge_size(sc);
1792 strlcpy(dkw->dkw_ptype, sc->sc_ptype, sizeof(dkw->dkw_ptype));
1793
1794 break;
1795 }
1796 case DIOCGSECTORALIGN: {
1797 struct disk_sectoralign *dsa = data;
1798 uint32_t r;
1799
1800 error = VOP_IOCTL(sc->sc_parent->dk_rawvp, cmd, dsa, flag,
1801 l != NULL ? l->l_cred : NOCRED);
1802 if (error)
1803 break;
1804
1805 r = sc->sc_offset % dsa->dsa_alignment;
1806 if (r < dsa->dsa_firstaligned)
1807 dsa->dsa_firstaligned = dsa->dsa_firstaligned - r;
1808 else
1809 dsa->dsa_firstaligned = (dsa->dsa_firstaligned +
1810 dsa->dsa_alignment) - r;
1811 dsa->dsa_firstaligned %= dsa->dsa_alignment;
1812 break;
1813 }
1814 default:
1815 error = ENOTTY;
1816 }
1817
1818 return error;
1819 }
1820
1821 /*
1822 * dkdiscard: [devsw entry point]
1823 *
1824 * Perform a discard-range request on a wedge.
1825 */
1826 static int
1827 dkdiscard(dev_t dev, off_t pos, off_t len)
1828 {
1829 struct dkwedge_softc *sc = dkwedge_lookup(dev);
1830 uint64_t size = dkwedge_size(sc);
1831 unsigned shift;
1832 off_t offset, maxlen;
1833 int error;
1834
1835 KASSERT(sc != NULL);
1836 KASSERT(sc->sc_dev != NULL);
1837 KASSERT(sc->sc_state != DKW_STATE_LARVAL);
1838 KASSERT(sc->sc_state != DKW_STATE_DEAD);
1839 KASSERT(sc->sc_parent->dk_rawvp != NULL);
1840
1841 /* XXX check bounds on size/offset up front */
1842 shift = (sc->sc_parent->dk_blkshift + DEV_BSHIFT);
1843 KASSERT(__type_fit(off_t, size));
1844 KASSERT(__type_fit(off_t, sc->sc_offset));
1845 KASSERT(0 <= sc->sc_offset);
1846 KASSERT(size <= (__type_max(off_t) >> shift));
1847 KASSERT(sc->sc_offset <= ((__type_max(off_t) >> shift) - size));
1848 offset = ((off_t)sc->sc_offset << shift);
1849 maxlen = ((off_t)size << shift);
1850
1851 if (len > maxlen)
1852 return EINVAL;
1853 if (pos > (maxlen - len))
1854 return EINVAL;
1855
1856 pos += offset;
1857
1858 vn_lock(sc->sc_parent->dk_rawvp, LK_EXCLUSIVE | LK_RETRY);
1859 error = VOP_FDISCARD(sc->sc_parent->dk_rawvp, pos, len);
1860 VOP_UNLOCK(sc->sc_parent->dk_rawvp);
1861
1862 return error;
1863 }
1864
1865 /*
1866 * dksize: [devsw entry point]
1867 *
1868 * Query the size of a wedge for the purpose of performing a dump
1869 * or for swapping to.
1870 */
1871 static int
1872 dksize(dev_t dev)
1873 {
1874 /*
1875 * Don't bother taking a reference because this is only used
1876 * either (a) while the device is open (for swap), or (b) while
1877 * any multiprocessing is quiescent (for crash dumps).
1878 */
1879 struct dkwedge_softc *sc = dkwedge_lookup(dev);
1880 uint64_t p_size;
1881 int rv = -1;
1882
1883 if (sc == NULL)
1884 return -1;
1885 if (sc->sc_state != DKW_STATE_RUNNING)
1886 return -1;
1887
1888 /* Our content type is static, no need to open the device. */
1889
1890 p_size = dkwedge_size(sc) << sc->sc_parent->dk_blkshift;
1891 if (strcmp(sc->sc_ptype, DKW_PTYPE_SWAP) == 0) {
1892 /* Saturate if we are larger than INT_MAX. */
1893 if (p_size > INT_MAX)
1894 rv = INT_MAX;
1895 else
1896 rv = (int)p_size;
1897 }
1898
1899 return rv;
1900 }
1901
1902 /*
1903 * dkdump: [devsw entry point]
1904 *
1905 * Perform a crash dump to a wedge.
1906 */
1907 static int
1908 dkdump(dev_t dev, daddr_t blkno, void *va, size_t size)
1909 {
1910 /*
1911 * Don't bother taking a reference because this is only used
1912 * while any multiprocessing is quiescent.
1913 */
1914 struct dkwedge_softc *sc = dkwedge_lookup(dev);
1915 const struct bdevsw *bdev;
1916 uint64_t p_size, p_offset;
1917
1918 if (sc == NULL)
1919 return ENXIO;
1920 if (sc->sc_state != DKW_STATE_RUNNING)
1921 return ENXIO;
1922
1923 /* Our content type is static, no need to open the device. */
1924
1925 if (strcmp(sc->sc_ptype, DKW_PTYPE_SWAP) != 0 &&
1926 strcmp(sc->sc_ptype, DKW_PTYPE_RAID) != 0 &&
1927 strcmp(sc->sc_ptype, DKW_PTYPE_CGD) != 0)
1928 return ENXIO;
1929 if (size % DEV_BSIZE != 0)
1930 return EINVAL;
1931
1932 p_offset = sc->sc_offset << sc->sc_parent->dk_blkshift;
1933 p_size = dkwedge_size(sc) << sc->sc_parent->dk_blkshift;
1934
1935 if (blkno < 0 || blkno + size/DEV_BSIZE > p_size) {
1936 printf("%s: blkno (%" PRIu64 ") + size / DEV_BSIZE (%zu) > "
1937 "p_size (%" PRIu64 ")\n", __func__, blkno,
1938 size/DEV_BSIZE, p_size);
1939 return EINVAL;
1940 }
1941
1942 bdev = bdevsw_lookup(sc->sc_pdev);
1943 return (*bdev->d_dump)(sc->sc_pdev, blkno + p_offset, va, size);
1944 }
1945
1946 /*
1947 * config glue
1948 */
1949
1950 /*
1951 * dkwedge_find_partition
1952 *
1953 * Find wedge corresponding to the specified parent name
1954 * and offset/length.
1955 */
1956 static device_t
1957 dkwedge_find_partition_acquire(device_t parent, daddr_t startblk,
1958 uint64_t nblks)
1959 {
1960 struct dkwedge_softc *sc;
1961 int i;
1962 device_t wedge = NULL;
1963
1964 rw_enter(&dkwedges_lock, RW_READER);
1965 for (i = 0; i < ndkwedges; i++) {
1966 if ((sc = dkwedges[i]) == NULL || sc->sc_dev == NULL)
1967 continue;
1968 if (strcmp(sc->sc_parent->dk_name, device_xname(parent)) == 0 &&
1969 sc->sc_offset == startblk &&
1970 dkwedge_size(sc) == nblks) {
1971 if (wedge) {
1972 printf("WARNING: double match for boot wedge "
1973 "(%s, %s)\n",
1974 device_xname(wedge),
1975 device_xname(sc->sc_dev));
1976 continue;
1977 }
1978 wedge = sc->sc_dev;
1979 device_acquire(wedge);
1980 }
1981 }
1982 rw_exit(&dkwedges_lock);
1983
1984 return wedge;
1985 }
1986
1987 /* XXX unsafe */
1988 device_t
1989 dkwedge_find_partition(device_t parent, daddr_t startblk,
1990 uint64_t nblks)
1991 {
1992 device_t dv;
1993
1994 if ((dv = dkwedge_find_partition_acquire(parent, startblk, nblks))
1995 == NULL)
1996 return NULL;
1997 device_release(dv);
1998 return dv;
1999 }
2000
2001 const char *
2002 dkwedge_get_parent_name(dev_t dev)
2003 {
2004 /* XXX: perhaps do this in lookup? */
2005 int bmaj = bdevsw_lookup_major(&dk_bdevsw);
2006 int cmaj = cdevsw_lookup_major(&dk_cdevsw);
2007
2008 if (major(dev) != bmaj && major(dev) != cmaj)
2009 return NULL;
2010
2011 struct dkwedge_softc *const sc = dkwedge_lookup_acquire(dev);
2012 if (sc == NULL)
2013 return NULL;
2014 const char *const name = sc->sc_parent->dk_name;
2015 device_release(sc->sc_dev);
2016 return name;
2017 }
2018