dk.c revision 1.156 1 /* $NetBSD: dk.c,v 1.156 2023/05/09 13:14:14 riastradh Exp $ */
2
3 /*-
4 * Copyright (c) 2004, 2005, 2006, 2007 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 #include <sys/cdefs.h>
33 __KERNEL_RCSID(0, "$NetBSD: dk.c,v 1.156 2023/05/09 13:14:14 riastradh Exp $");
34
35 #ifdef _KERNEL_OPT
36 #include "opt_dkwedge.h"
37 #endif
38
39 #include <sys/param.h>
40 #include <sys/types.h>
41
42 #include <sys/buf.h>
43 #include <sys/bufq.h>
44 #include <sys/callout.h>
45 #include <sys/conf.h>
46 #include <sys/device.h>
47 #include <sys/disk.h>
48 #include <sys/disklabel.h>
49 #include <sys/errno.h>
50 #include <sys/fcntl.h>
51 #include <sys/ioctl.h>
52 #include <sys/kauth.h>
53 #include <sys/kernel.h>
54 #include <sys/malloc.h>
55 #include <sys/pool.h>
56 #include <sys/proc.h>
57 #include <sys/rwlock.h>
58 #include <sys/stat.h>
59 #include <sys/systm.h>
60 #include <sys/vnode.h>
61
62 #include <miscfs/specfs/specdev.h>
63
64 MALLOC_DEFINE(M_DKWEDGE, "dkwedge", "Disk wedge structures");
65
66 typedef enum {
67 DKW_STATE_LARVAL = 0,
68 DKW_STATE_RUNNING = 1,
69 DKW_STATE_DYING = 2,
70 DKW_STATE_DEAD = 666
71 } dkwedge_state_t;
72
73 struct dkwedge_softc {
74 device_t sc_dev; /* pointer to our pseudo-device */
75 struct cfdata sc_cfdata; /* our cfdata structure */
76 uint8_t sc_wname[128]; /* wedge name (Unicode, UTF-8) */
77
78 dkwedge_state_t sc_state; /* state this wedge is in */
79
80 struct disk *sc_parent; /* parent disk */
81 daddr_t sc_offset; /* LBA offset of wedge in parent */
82 krwlock_t sc_sizelock;
83 uint64_t sc_size; /* size of wedge in blocks */
84 char sc_ptype[32]; /* partition type */
85 dev_t sc_pdev; /* cached parent's dev_t */
86 /* link on parent's wedge list */
87 LIST_ENTRY(dkwedge_softc) sc_plink;
88
89 struct disk sc_dk; /* our own disk structure */
90 struct bufq_state *sc_bufq; /* buffer queue */
91 struct callout sc_restart_ch; /* callout to restart I/O */
92
93 kmutex_t sc_iolock;
94 bool sc_iostop; /* don't schedule restart */
95 int sc_mode; /* parent open mode */
96 };
97
98 static int dkwedge_match(device_t, cfdata_t, void *);
99 static void dkwedge_attach(device_t, device_t, void *);
100 static int dkwedge_detach(device_t, int);
101
102 static void dkstart(struct dkwedge_softc *);
103 static void dkiodone(struct buf *);
104 static void dkrestart(void *);
105 static void dkminphys(struct buf *);
106
107 static int dkfirstopen(struct dkwedge_softc *, int);
108 static void dklastclose(struct dkwedge_softc *);
109 static int dkwedge_detach(device_t, int);
110 static void dkwedge_delall1(struct disk *, bool);
111 static int dkwedge_del1(struct dkwedge_info *, int);
112 static int dk_open_parent(dev_t, int, struct vnode **);
113 static int dk_close_parent(struct vnode *, int);
114
115 static int dkunit(dev_t);
116
117 static dev_type_open(dkopen);
118 static dev_type_close(dkclose);
119 static dev_type_cancel(dkcancel);
120 static dev_type_read(dkread);
121 static dev_type_write(dkwrite);
122 static dev_type_ioctl(dkioctl);
123 static dev_type_strategy(dkstrategy);
124 static dev_type_dump(dkdump);
125 static dev_type_size(dksize);
126 static dev_type_discard(dkdiscard);
127
128 CFDRIVER_DECL(dk, DV_DISK, NULL);
129 CFATTACH_DECL3_NEW(dk, 0,
130 dkwedge_match, dkwedge_attach, dkwedge_detach, NULL, NULL, NULL,
131 DVF_DETACH_SHUTDOWN);
132
133 const struct bdevsw dk_bdevsw = {
134 .d_open = dkopen,
135 .d_close = dkclose,
136 .d_cancel = dkcancel,
137 .d_strategy = dkstrategy,
138 .d_ioctl = dkioctl,
139 .d_dump = dkdump,
140 .d_psize = dksize,
141 .d_discard = dkdiscard,
142 .d_cfdriver = &dk_cd,
143 .d_devtounit = dkunit,
144 .d_flag = D_DISK | D_MPSAFE
145 };
146
147 const struct cdevsw dk_cdevsw = {
148 .d_open = dkopen,
149 .d_close = dkclose,
150 .d_cancel = dkcancel,
151 .d_read = dkread,
152 .d_write = dkwrite,
153 .d_ioctl = dkioctl,
154 .d_stop = nostop,
155 .d_tty = notty,
156 .d_poll = nopoll,
157 .d_mmap = nommap,
158 .d_kqfilter = nokqfilter,
159 .d_discard = dkdiscard,
160 .d_cfdriver = &dk_cd,
161 .d_devtounit = dkunit,
162 .d_flag = D_DISK | D_MPSAFE
163 };
164
165 static struct dkwedge_softc **dkwedges;
166 static u_int ndkwedges;
167 static krwlock_t dkwedges_lock;
168
169 static LIST_HEAD(, dkwedge_discovery_method) dkwedge_discovery_methods;
170 static krwlock_t dkwedge_discovery_methods_lock;
171
172 /*
173 * dkwedge_match:
174 *
175 * Autoconfiguration match function for pseudo-device glue.
176 */
177 static int
178 dkwedge_match(device_t parent, cfdata_t match, void *aux)
179 {
180
181 /* Pseudo-device; always present. */
182 return 1;
183 }
184
185 /*
186 * dkwedge_attach:
187 *
188 * Autoconfiguration attach function for pseudo-device glue.
189 */
190 static void
191 dkwedge_attach(device_t parent, device_t self, void *aux)
192 {
193
194 if (!pmf_device_register(self, NULL, NULL))
195 aprint_error_dev(self, "couldn't establish power handler\n");
196 }
197
198 /*
199 * dkwedge_compute_pdev:
200 *
201 * Compute the parent disk's dev_t.
202 */
203 static int
204 dkwedge_compute_pdev(const char *pname, dev_t *pdevp, enum vtype type)
205 {
206 const char *name, *cp;
207 devmajor_t pmaj;
208 int punit;
209 char devname[16];
210
211 name = pname;
212 switch (type) {
213 case VBLK:
214 pmaj = devsw_name2blk(name, devname, sizeof(devname));
215 break;
216 case VCHR:
217 pmaj = devsw_name2chr(name, devname, sizeof(devname));
218 break;
219 default:
220 pmaj = NODEVMAJOR;
221 break;
222 }
223 if (pmaj == NODEVMAJOR)
224 return ENXIO;
225
226 name += strlen(devname);
227 for (cp = name, punit = 0; *cp >= '0' && *cp <= '9'; cp++)
228 punit = (punit * 10) + (*cp - '0');
229 if (cp == name) {
230 /* Invalid parent disk name. */
231 return ENXIO;
232 }
233
234 *pdevp = MAKEDISKDEV(pmaj, punit, RAW_PART);
235
236 return 0;
237 }
238
239 /*
240 * dkwedge_array_expand:
241 *
242 * Expand the dkwedges array.
243 *
244 * Releases and reacquires dkwedges_lock as a writer.
245 */
246 static int
247 dkwedge_array_expand(void)
248 {
249
250 const unsigned incr = 16;
251 unsigned newcnt, oldcnt;
252 struct dkwedge_softc **newarray = NULL, **oldarray = NULL;
253
254 KASSERT(rw_write_held(&dkwedges_lock));
255
256 oldcnt = ndkwedges;
257 oldarray = dkwedges;
258
259 if (oldcnt >= INT_MAX - incr)
260 return ENFILE; /* XXX */
261 newcnt = oldcnt + incr;
262
263 rw_exit(&dkwedges_lock);
264 newarray = malloc(newcnt * sizeof(*newarray), M_DKWEDGE,
265 M_WAITOK|M_ZERO);
266 rw_enter(&dkwedges_lock, RW_WRITER);
267
268 if (ndkwedges != oldcnt || dkwedges != oldarray) {
269 oldarray = NULL; /* already recycled */
270 goto out;
271 }
272
273 if (oldarray != NULL)
274 memcpy(newarray, dkwedges, ndkwedges * sizeof(*newarray));
275 dkwedges = newarray;
276 newarray = NULL; /* transferred to dkwedges */
277 ndkwedges = newcnt;
278
279 out: rw_exit(&dkwedges_lock);
280 if (oldarray != NULL)
281 free(oldarray, M_DKWEDGE);
282 if (newarray != NULL)
283 free(newarray, M_DKWEDGE);
284 rw_enter(&dkwedges_lock, RW_WRITER);
285 return 0;
286 }
287
288 static void
289 dkwedge_size_init(struct dkwedge_softc *sc, uint64_t size)
290 {
291
292 rw_init(&sc->sc_sizelock);
293 sc->sc_size = size;
294 }
295
296 static void
297 dkwedge_size_fini(struct dkwedge_softc *sc)
298 {
299
300 rw_destroy(&sc->sc_sizelock);
301 }
302
303 static uint64_t
304 dkwedge_size(struct dkwedge_softc *sc)
305 {
306 uint64_t size;
307
308 rw_enter(&sc->sc_sizelock, RW_READER);
309 size = sc->sc_size;
310 rw_exit(&sc->sc_sizelock);
311
312 return size;
313 }
314
315 static void
316 dkwedge_size_increase(struct dkwedge_softc *sc, uint64_t size)
317 {
318
319 KASSERT(mutex_owned(&sc->sc_parent->dk_openlock));
320
321 rw_enter(&sc->sc_sizelock, RW_WRITER);
322 KASSERTMSG(size >= sc->sc_size,
323 "decreasing dkwedge size from %"PRIu64" to %"PRIu64,
324 sc->sc_size, size);
325 sc->sc_size = size;
326 rw_exit(&sc->sc_sizelock);
327 }
328
329 static void
330 dk_set_geometry(struct dkwedge_softc *sc, struct disk *pdk)
331 {
332 struct disk *dk = &sc->sc_dk;
333 struct disk_geom *dg = &dk->dk_geom;
334
335 KASSERT(mutex_owned(&pdk->dk_openlock));
336
337 memset(dg, 0, sizeof(*dg));
338
339 dg->dg_secperunit = dkwedge_size(sc);
340 dg->dg_secsize = DEV_BSIZE << pdk->dk_blkshift;
341
342 /* fake numbers, 1 cylinder is 1 MB with default sector size */
343 dg->dg_nsectors = 32;
344 dg->dg_ntracks = 64;
345 dg->dg_ncylinders =
346 dg->dg_secperunit / (dg->dg_nsectors * dg->dg_ntracks);
347
348 disk_set_info(sc->sc_dev, dk, NULL);
349 }
350
351 /*
352 * dkwedge_add: [exported function]
353 *
354 * Add a disk wedge based on the provided information.
355 *
356 * The incoming dkw_devname[] is ignored, instead being
357 * filled in and returned to the caller.
358 */
359 int
360 dkwedge_add(struct dkwedge_info *dkw)
361 {
362 struct dkwedge_softc *sc, *lsc;
363 struct disk *pdk;
364 u_int unit;
365 int error;
366 dev_t pdev;
367
368 dkw->dkw_parent[sizeof(dkw->dkw_parent) - 1] = '\0';
369 pdk = disk_find(dkw->dkw_parent);
370 if (pdk == NULL)
371 return ENXIO;
372
373 error = dkwedge_compute_pdev(pdk->dk_name, &pdev, VBLK);
374 if (error)
375 return error;
376
377 if (dkw->dkw_offset < 0)
378 return EINVAL;
379
380 /*
381 * Check for an existing wedge at the same disk offset. Allow
382 * updating a wedge if the only change is the size, and the new
383 * size is larger than the old.
384 */
385 sc = NULL;
386 mutex_enter(&pdk->dk_openlock);
387 LIST_FOREACH(lsc, &pdk->dk_wedges, sc_plink) {
388 if (lsc->sc_offset != dkw->dkw_offset)
389 continue;
390 if (strcmp(lsc->sc_wname, dkw->dkw_wname) != 0)
391 break;
392 if (strcmp(lsc->sc_ptype, dkw->dkw_ptype) != 0)
393 break;
394 if (dkwedge_size(lsc) > dkw->dkw_size)
395 break;
396
397 sc = lsc;
398 dkwedge_size_increase(sc, dkw->dkw_size);
399 dk_set_geometry(sc, pdk);
400
401 break;
402 }
403 mutex_exit(&pdk->dk_openlock);
404
405 if (sc != NULL)
406 goto announce;
407
408 sc = malloc(sizeof(*sc), M_DKWEDGE, M_WAITOK|M_ZERO);
409 sc->sc_state = DKW_STATE_LARVAL;
410 sc->sc_parent = pdk;
411 sc->sc_pdev = pdev;
412 sc->sc_offset = dkw->dkw_offset;
413 dkwedge_size_init(sc, dkw->dkw_size);
414
415 memcpy(sc->sc_wname, dkw->dkw_wname, sizeof(sc->sc_wname));
416 sc->sc_wname[sizeof(sc->sc_wname) - 1] = '\0';
417
418 memcpy(sc->sc_ptype, dkw->dkw_ptype, sizeof(sc->sc_ptype));
419 sc->sc_ptype[sizeof(sc->sc_ptype) - 1] = '\0';
420
421 bufq_alloc(&sc->sc_bufq, "fcfs", 0);
422
423 callout_init(&sc->sc_restart_ch, 0);
424 callout_setfunc(&sc->sc_restart_ch, dkrestart, sc);
425
426 mutex_init(&sc->sc_iolock, MUTEX_DEFAULT, IPL_BIO);
427
428 /*
429 * Wedge will be added; increment the wedge count for the parent.
430 * Only allow this to happen if RAW_PART is the only thing open.
431 */
432 mutex_enter(&pdk->dk_openlock);
433 if (pdk->dk_openmask & ~(1 << RAW_PART))
434 error = EBUSY;
435 else {
436 /* Check for wedge overlap. */
437 LIST_FOREACH(lsc, &pdk->dk_wedges, sc_plink) {
438 /* XXX arithmetic overflow */
439 uint64_t size = dkwedge_size(sc);
440 uint64_t lsize = dkwedge_size(lsc);
441 daddr_t lastblk = sc->sc_offset + size - 1;
442 daddr_t llastblk = lsc->sc_offset + lsize - 1;
443
444 if (sc->sc_offset >= lsc->sc_offset &&
445 sc->sc_offset <= llastblk) {
446 /* Overlaps the tail of the existing wedge. */
447 break;
448 }
449 if (lastblk >= lsc->sc_offset &&
450 lastblk <= llastblk) {
451 /* Overlaps the head of the existing wedge. */
452 break;
453 }
454 }
455 if (lsc != NULL) {
456 if (sc->sc_offset == lsc->sc_offset &&
457 dkwedge_size(sc) == dkwedge_size(lsc) &&
458 strcmp(sc->sc_wname, lsc->sc_wname) == 0)
459 error = EEXIST;
460 else
461 error = EINVAL;
462 } else {
463 pdk->dk_nwedges++;
464 LIST_INSERT_HEAD(&pdk->dk_wedges, sc, sc_plink);
465 }
466 }
467 mutex_exit(&pdk->dk_openlock);
468 if (error) {
469 mutex_destroy(&sc->sc_iolock);
470 bufq_free(sc->sc_bufq);
471 dkwedge_size_fini(sc);
472 free(sc, M_DKWEDGE);
473 return error;
474 }
475
476 /* Fill in our cfdata for the pseudo-device glue. */
477 sc->sc_cfdata.cf_name = dk_cd.cd_name;
478 sc->sc_cfdata.cf_atname = dk_ca.ca_name;
479 /* sc->sc_cfdata.cf_unit set below */
480 sc->sc_cfdata.cf_fstate = FSTATE_STAR;
481
482 /* Insert the larval wedge into the array. */
483 rw_enter(&dkwedges_lock, RW_WRITER);
484 for (error = 0;;) {
485 struct dkwedge_softc **scpp;
486
487 /*
488 * Check for a duplicate wname while searching for
489 * a slot.
490 */
491 for (scpp = NULL, unit = 0; unit < ndkwedges; unit++) {
492 if (dkwedges[unit] == NULL) {
493 if (scpp == NULL) {
494 scpp = &dkwedges[unit];
495 sc->sc_cfdata.cf_unit = unit;
496 }
497 } else {
498 /* XXX Unicode. */
499 if (strcmp(dkwedges[unit]->sc_wname,
500 sc->sc_wname) == 0) {
501 error = EEXIST;
502 break;
503 }
504 }
505 }
506 if (error)
507 break;
508 KASSERT(unit == ndkwedges);
509 if (scpp == NULL) {
510 error = dkwedge_array_expand();
511 if (error)
512 break;
513 } else {
514 KASSERT(scpp == &dkwedges[sc->sc_cfdata.cf_unit]);
515 *scpp = sc;
516 break;
517 }
518 }
519 rw_exit(&dkwedges_lock);
520 if (error) {
521 mutex_enter(&pdk->dk_openlock);
522 pdk->dk_nwedges--;
523 LIST_REMOVE(sc, sc_plink);
524 mutex_exit(&pdk->dk_openlock);
525
526 mutex_destroy(&sc->sc_iolock);
527 bufq_free(sc->sc_bufq);
528 dkwedge_size_fini(sc);
529 free(sc, M_DKWEDGE);
530 return error;
531 }
532
533 /*
534 * Now that we know the unit #, attach a pseudo-device for
535 * this wedge instance. This will provide us with the
536 * device_t necessary for glue to other parts of the system.
537 *
538 * This should never fail, unless we're almost totally out of
539 * memory.
540 */
541 if ((sc->sc_dev = config_attach_pseudo(&sc->sc_cfdata)) == NULL) {
542 aprint_error("%s%u: unable to attach pseudo-device\n",
543 sc->sc_cfdata.cf_name, sc->sc_cfdata.cf_unit);
544
545 rw_enter(&dkwedges_lock, RW_WRITER);
546 KASSERT(dkwedges[sc->sc_cfdata.cf_unit] == sc);
547 dkwedges[sc->sc_cfdata.cf_unit] = NULL;
548 rw_exit(&dkwedges_lock);
549
550 mutex_enter(&pdk->dk_openlock);
551 pdk->dk_nwedges--;
552 LIST_REMOVE(sc, sc_plink);
553 mutex_exit(&pdk->dk_openlock);
554
555 mutex_destroy(&sc->sc_iolock);
556 bufq_free(sc->sc_bufq);
557 dkwedge_size_fini(sc);
558 free(sc, M_DKWEDGE);
559 return ENOMEM;
560 }
561
562 /*
563 * XXX Really ought to make the disk_attach() and the changing
564 * of state to RUNNING atomic.
565 */
566
567 disk_init(&sc->sc_dk, device_xname(sc->sc_dev), NULL);
568 mutex_enter(&pdk->dk_openlock);
569 dk_set_geometry(sc, pdk);
570 mutex_exit(&pdk->dk_openlock);
571 disk_attach(&sc->sc_dk);
572
573 /* Disk wedge is ready for use! */
574 sc->sc_state = DKW_STATE_RUNNING;
575
576 announce:
577 /* Announce our arrival. */
578 aprint_normal(
579 "%s at %s: \"%s\", %"PRIu64" blocks at %"PRId64", type: %s\n",
580 device_xname(sc->sc_dev), pdk->dk_name,
581 sc->sc_wname, /* XXX Unicode */
582 dkwedge_size(sc), sc->sc_offset,
583 sc->sc_ptype[0] == '\0' ? "<unknown>" : sc->sc_ptype);
584
585 /* Return the devname to the caller. */
586 strlcpy(dkw->dkw_devname, device_xname(sc->sc_dev),
587 sizeof(dkw->dkw_devname));
588
589 return 0;
590 }
591
592 /*
593 * dkwedge_find:
594 *
595 * Lookup a disk wedge based on the provided information.
596 * NOTE: We look up the wedge based on the wedge devname,
597 * not wname.
598 *
599 * Return NULL if the wedge is not found, otherwise return
600 * the wedge's softc. Assign the wedge's unit number to unitp
601 * if unitp is not NULL.
602 */
603 static struct dkwedge_softc *
604 dkwedge_find(struct dkwedge_info *dkw, u_int *unitp)
605 {
606 struct dkwedge_softc *sc = NULL;
607 u_int unit;
608
609 /* Find our softc. */
610 dkw->dkw_devname[sizeof(dkw->dkw_devname) - 1] = '\0';
611 rw_enter(&dkwedges_lock, RW_READER);
612 for (unit = 0; unit < ndkwedges; unit++) {
613 if ((sc = dkwedges[unit]) != NULL &&
614 strcmp(device_xname(sc->sc_dev), dkw->dkw_devname) == 0 &&
615 strcmp(sc->sc_parent->dk_name, dkw->dkw_parent) == 0) {
616 break;
617 }
618 }
619 rw_exit(&dkwedges_lock);
620 if (sc == NULL)
621 return NULL;
622
623 if (unitp != NULL)
624 *unitp = unit;
625
626 return sc;
627 }
628
629 /*
630 * dkwedge_del: [exported function]
631 *
632 * Delete a disk wedge based on the provided information.
633 * NOTE: We look up the wedge based on the wedge devname,
634 * not wname.
635 */
636 int
637 dkwedge_del(struct dkwedge_info *dkw)
638 {
639
640 return dkwedge_del1(dkw, 0);
641 }
642
643 int
644 dkwedge_del1(struct dkwedge_info *dkw, int flags)
645 {
646 struct dkwedge_softc *sc = NULL;
647
648 /* Find our softc. */
649 if ((sc = dkwedge_find(dkw, NULL)) == NULL)
650 return ESRCH;
651
652 return config_detach(sc->sc_dev, flags);
653 }
654
655 /*
656 * dkwedge_detach:
657 *
658 * Autoconfiguration detach function for pseudo-device glue.
659 */
660 static int
661 dkwedge_detach(device_t self, int flags)
662 {
663 struct dkwedge_softc *sc = NULL;
664 u_int unit;
665 int bmaj, cmaj, rc;
666
667 rw_enter(&dkwedges_lock, RW_WRITER);
668 for (unit = 0; unit < ndkwedges; unit++) {
669 if ((sc = dkwedges[unit]) != NULL && sc->sc_dev == self)
670 break;
671 }
672 if (unit == ndkwedges)
673 rc = ENXIO;
674 else if ((rc = disk_begindetach(&sc->sc_dk, /*lastclose*/NULL, self,
675 flags)) == 0) {
676 /* Mark the wedge as dying. */
677 sc->sc_state = DKW_STATE_DYING;
678 }
679 rw_exit(&dkwedges_lock);
680
681 if (rc != 0)
682 return rc;
683
684 pmf_device_deregister(self);
685
686 /* Kill any pending restart. */
687 mutex_enter(&sc->sc_iolock);
688 sc->sc_iostop = true;
689 mutex_exit(&sc->sc_iolock);
690 callout_halt(&sc->sc_restart_ch, NULL);
691
692 /* Locate the wedge major numbers. */
693 bmaj = bdevsw_lookup_major(&dk_bdevsw);
694 cmaj = cdevsw_lookup_major(&dk_cdevsw);
695
696 /* Nuke the vnodes for any open instances. */
697 vdevgone(bmaj, unit, unit, VBLK);
698 vdevgone(cmaj, unit, unit, VCHR);
699
700 /*
701 * At this point, all block device opens have been closed,
702 * synchronously flushing any buffered writes; and all
703 * character device I/O operations have completed
704 * synchronously, and character device opens have been closed.
705 *
706 * So there can be no more opens or queued buffers by now.
707 */
708 KASSERT(sc->sc_dk.dk_openmask == 0);
709 KASSERT(bufq_peek(sc->sc_bufq) == NULL);
710 bufq_drain(sc->sc_bufq);
711
712 /* Announce our departure. */
713 aprint_normal("%s at %s (%s) deleted\n", device_xname(sc->sc_dev),
714 sc->sc_parent->dk_name,
715 sc->sc_wname); /* XXX Unicode */
716
717 mutex_enter(&sc->sc_parent->dk_openlock);
718 sc->sc_parent->dk_nwedges--;
719 LIST_REMOVE(sc, sc_plink);
720 mutex_exit(&sc->sc_parent->dk_openlock);
721
722 /* Delete our buffer queue. */
723 bufq_free(sc->sc_bufq);
724
725 /* Detach from the disk list. */
726 disk_detach(&sc->sc_dk);
727 disk_destroy(&sc->sc_dk);
728
729 /* Poof. */
730 rw_enter(&dkwedges_lock, RW_WRITER);
731 KASSERT(dkwedges[unit] == sc);
732 dkwedges[unit] = NULL;
733 sc->sc_state = DKW_STATE_DEAD;
734 rw_exit(&dkwedges_lock);
735
736 mutex_destroy(&sc->sc_iolock);
737 dkwedge_size_fini(sc);
738
739 free(sc, M_DKWEDGE);
740
741 return 0;
742 }
743
744 /*
745 * dkwedge_delall: [exported function]
746 *
747 * Forcibly delete all of the wedges on the specified disk. Used
748 * when a disk is being detached.
749 */
750 void
751 dkwedge_delall(struct disk *pdk)
752 {
753
754 dkwedge_delall1(pdk, /*idleonly*/false);
755 }
756
757 /*
758 * dkwedge_delidle: [exported function]
759 *
760 * Delete all of the wedges on the specified disk if idle. Used
761 * by ioctl(DIOCRMWEDGES).
762 */
763 void
764 dkwedge_delidle(struct disk *pdk)
765 {
766
767 dkwedge_delall1(pdk, /*idleonly*/true);
768 }
769
770 static void
771 dkwedge_delall1(struct disk *pdk, bool idleonly)
772 {
773 struct dkwedge_info dkw;
774 struct dkwedge_softc *sc;
775 int flags;
776
777 flags = DETACH_QUIET;
778 if (!idleonly)
779 flags |= DETACH_FORCE;
780
781 for (;;) {
782 mutex_enter(&pdk->dk_rawlock); /* for sc->sc_dk.dk_openmask */
783 mutex_enter(&pdk->dk_openlock);
784 LIST_FOREACH(sc, &pdk->dk_wedges, sc_plink) {
785 if (!idleonly || sc->sc_dk.dk_openmask == 0)
786 break;
787 }
788 if (sc == NULL) {
789 KASSERT(idleonly || pdk->dk_nwedges == 0);
790 mutex_exit(&pdk->dk_openlock);
791 mutex_exit(&pdk->dk_rawlock);
792 return;
793 }
794 strlcpy(dkw.dkw_parent, pdk->dk_name, sizeof(dkw.dkw_parent));
795 strlcpy(dkw.dkw_devname, device_xname(sc->sc_dev),
796 sizeof(dkw.dkw_devname));
797 mutex_exit(&pdk->dk_openlock);
798 mutex_exit(&pdk->dk_rawlock);
799 (void) dkwedge_del1(&dkw, flags);
800 }
801 }
802
803 /*
804 * dkwedge_list: [exported function]
805 *
806 * List all of the wedges on a particular disk.
807 */
808 int
809 dkwedge_list(struct disk *pdk, struct dkwedge_list *dkwl, struct lwp *l)
810 {
811 struct uio uio;
812 struct iovec iov;
813 struct dkwedge_softc *sc;
814 struct dkwedge_info dkw;
815 int error = 0;
816
817 iov.iov_base = dkwl->dkwl_buf;
818 iov.iov_len = dkwl->dkwl_bufsize;
819
820 uio.uio_iov = &iov;
821 uio.uio_iovcnt = 1;
822 uio.uio_offset = 0;
823 uio.uio_resid = dkwl->dkwl_bufsize;
824 uio.uio_rw = UIO_READ;
825 KASSERT(l == curlwp);
826 uio.uio_vmspace = l->l_proc->p_vmspace;
827
828 dkwl->dkwl_ncopied = 0;
829
830 mutex_enter(&pdk->dk_openlock);
831 LIST_FOREACH(sc, &pdk->dk_wedges, sc_plink) {
832 if (uio.uio_resid < sizeof(dkw))
833 break;
834
835 if (sc->sc_state != DKW_STATE_RUNNING)
836 continue;
837
838 strlcpy(dkw.dkw_devname, device_xname(sc->sc_dev),
839 sizeof(dkw.dkw_devname));
840 memcpy(dkw.dkw_wname, sc->sc_wname, sizeof(dkw.dkw_wname));
841 dkw.dkw_wname[sizeof(dkw.dkw_wname) - 1] = '\0';
842 strlcpy(dkw.dkw_parent, sc->sc_parent->dk_name,
843 sizeof(dkw.dkw_parent));
844 dkw.dkw_offset = sc->sc_offset;
845 dkw.dkw_size = dkwedge_size(sc);
846 strlcpy(dkw.dkw_ptype, sc->sc_ptype, sizeof(dkw.dkw_ptype));
847
848 error = uiomove(&dkw, sizeof(dkw), &uio);
849 if (error)
850 break;
851 dkwl->dkwl_ncopied++;
852 }
853 dkwl->dkwl_nwedges = pdk->dk_nwedges;
854 mutex_exit(&pdk->dk_openlock);
855
856 return error;
857 }
858
859 device_t
860 dkwedge_find_by_wname(const char *wname)
861 {
862 device_t dv = NULL;
863 struct dkwedge_softc *sc;
864 int i;
865
866 rw_enter(&dkwedges_lock, RW_READER);
867 for (i = 0; i < ndkwedges; i++) {
868 if ((sc = dkwedges[i]) == NULL)
869 continue;
870 if (strcmp(sc->sc_wname, wname) == 0) {
871 if (dv != NULL) {
872 printf(
873 "WARNING: double match for wedge name %s "
874 "(%s, %s)\n", wname, device_xname(dv),
875 device_xname(sc->sc_dev));
876 continue;
877 }
878 dv = sc->sc_dev;
879 }
880 }
881 rw_exit(&dkwedges_lock);
882 return dv;
883 }
884
885 device_t
886 dkwedge_find_by_parent(const char *name, size_t *i)
887 {
888
889 rw_enter(&dkwedges_lock, RW_READER);
890 for (; *i < (size_t)ndkwedges; (*i)++) {
891 struct dkwedge_softc *sc;
892 if ((sc = dkwedges[*i]) == NULL)
893 continue;
894 if (strcmp(sc->sc_parent->dk_name, name) != 0)
895 continue;
896 rw_exit(&dkwedges_lock);
897 return sc->sc_dev;
898 }
899 rw_exit(&dkwedges_lock);
900 return NULL;
901 }
902
903 void
904 dkwedge_print_wnames(void)
905 {
906 struct dkwedge_softc *sc;
907 int i;
908
909 rw_enter(&dkwedges_lock, RW_READER);
910 for (i = 0; i < ndkwedges; i++) {
911 if ((sc = dkwedges[i]) == NULL)
912 continue;
913 printf(" wedge:%s", sc->sc_wname);
914 }
915 rw_exit(&dkwedges_lock);
916 }
917
918 /*
919 * We need a dummy object to stuff into the dkwedge discovery method link
920 * set to ensure that there is always at least one object in the set.
921 */
922 static struct dkwedge_discovery_method dummy_discovery_method;
923 __link_set_add_bss(dkwedge_methods, dummy_discovery_method);
924
925 /*
926 * dkwedge_init:
927 *
928 * Initialize the disk wedge subsystem.
929 */
930 void
931 dkwedge_init(void)
932 {
933 __link_set_decl(dkwedge_methods, struct dkwedge_discovery_method);
934 struct dkwedge_discovery_method * const *ddmp;
935 struct dkwedge_discovery_method *lddm, *ddm;
936
937 rw_init(&dkwedges_lock);
938 rw_init(&dkwedge_discovery_methods_lock);
939
940 if (config_cfdriver_attach(&dk_cd) != 0)
941 panic("dkwedge: unable to attach cfdriver");
942 if (config_cfattach_attach(dk_cd.cd_name, &dk_ca) != 0)
943 panic("dkwedge: unable to attach cfattach");
944
945 rw_enter(&dkwedge_discovery_methods_lock, RW_WRITER);
946
947 LIST_INIT(&dkwedge_discovery_methods);
948
949 __link_set_foreach(ddmp, dkwedge_methods) {
950 ddm = *ddmp;
951 if (ddm == &dummy_discovery_method)
952 continue;
953 if (LIST_EMPTY(&dkwedge_discovery_methods)) {
954 LIST_INSERT_HEAD(&dkwedge_discovery_methods,
955 ddm, ddm_list);
956 continue;
957 }
958 LIST_FOREACH(lddm, &dkwedge_discovery_methods, ddm_list) {
959 if (ddm->ddm_priority == lddm->ddm_priority) {
960 aprint_error("dk-method-%s: method \"%s\" "
961 "already exists at priority %d\n",
962 ddm->ddm_name, lddm->ddm_name,
963 lddm->ddm_priority);
964 /* Not inserted. */
965 break;
966 }
967 if (ddm->ddm_priority < lddm->ddm_priority) {
968 /* Higher priority; insert before. */
969 LIST_INSERT_BEFORE(lddm, ddm, ddm_list);
970 break;
971 }
972 if (LIST_NEXT(lddm, ddm_list) == NULL) {
973 /* Last one; insert after. */
974 KASSERT(lddm->ddm_priority < ddm->ddm_priority);
975 LIST_INSERT_AFTER(lddm, ddm, ddm_list);
976 break;
977 }
978 }
979 }
980
981 rw_exit(&dkwedge_discovery_methods_lock);
982 }
983
984 #ifdef DKWEDGE_AUTODISCOVER
985 int dkwedge_autodiscover = 1;
986 #else
987 int dkwedge_autodiscover = 0;
988 #endif
989
990 /*
991 * dkwedge_discover: [exported function]
992 *
993 * Discover the wedges on a newly attached disk.
994 * Remove all unused wedges on the disk first.
995 */
996 void
997 dkwedge_discover(struct disk *pdk)
998 {
999 struct dkwedge_discovery_method *ddm;
1000 struct vnode *vp;
1001 int error;
1002 dev_t pdev;
1003
1004 /*
1005 * Require people playing with wedges to enable this explicitly.
1006 */
1007 if (dkwedge_autodiscover == 0)
1008 return;
1009
1010 rw_enter(&dkwedge_discovery_methods_lock, RW_READER);
1011
1012 /*
1013 * Use the character device for scanning, the block device
1014 * is busy if there are already wedges attached.
1015 */
1016 error = dkwedge_compute_pdev(pdk->dk_name, &pdev, VCHR);
1017 if (error) {
1018 aprint_error("%s: unable to compute pdev, error = %d\n",
1019 pdk->dk_name, error);
1020 goto out;
1021 }
1022
1023 error = cdevvp(pdev, &vp);
1024 if (error) {
1025 aprint_error("%s: unable to find vnode for pdev, error = %d\n",
1026 pdk->dk_name, error);
1027 goto out;
1028 }
1029
1030 error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1031 if (error) {
1032 aprint_error("%s: unable to lock vnode for pdev, error = %d\n",
1033 pdk->dk_name, error);
1034 vrele(vp);
1035 goto out;
1036 }
1037
1038 error = VOP_OPEN(vp, FREAD | FSILENT, NOCRED);
1039 if (error) {
1040 if (error != ENXIO)
1041 aprint_error("%s: unable to open device, error = %d\n",
1042 pdk->dk_name, error);
1043 vput(vp);
1044 goto out;
1045 }
1046 VOP_UNLOCK(vp);
1047
1048 /*
1049 * Remove unused wedges
1050 */
1051 dkwedge_delidle(pdk);
1052
1053 /*
1054 * For each supported partition map type, look to see if
1055 * this map type exists. If so, parse it and add the
1056 * corresponding wedges.
1057 */
1058 LIST_FOREACH(ddm, &dkwedge_discovery_methods, ddm_list) {
1059 error = (*ddm->ddm_discover)(pdk, vp);
1060 if (error == 0) {
1061 /* Successfully created wedges; we're done. */
1062 break;
1063 }
1064 }
1065
1066 error = vn_close(vp, FREAD, NOCRED);
1067 if (error) {
1068 aprint_error("%s: unable to close device, error = %d\n",
1069 pdk->dk_name, error);
1070 /* We'll just assume the vnode has been cleaned up. */
1071 }
1072
1073 out:
1074 rw_exit(&dkwedge_discovery_methods_lock);
1075 }
1076
1077 /*
1078 * dkwedge_read:
1079 *
1080 * Read some data from the specified disk, used for
1081 * partition discovery.
1082 */
1083 int
1084 dkwedge_read(struct disk *pdk, struct vnode *vp, daddr_t blkno,
1085 void *tbuf, size_t len)
1086 {
1087 buf_t *bp;
1088 int error;
1089 bool isopen;
1090 dev_t bdev;
1091 struct vnode *bdvp;
1092
1093 /*
1094 * The kernel cannot read from a character device vnode
1095 * as physio() only handles user memory.
1096 *
1097 * If the block device has already been opened by a wedge
1098 * use that vnode and temporarily bump the open counter.
1099 *
1100 * Otherwise try to open the block device.
1101 */
1102
1103 bdev = devsw_chr2blk(vp->v_rdev);
1104
1105 mutex_enter(&pdk->dk_rawlock);
1106 if (pdk->dk_rawopens != 0) {
1107 KASSERT(pdk->dk_rawvp != NULL);
1108 isopen = true;
1109 ++pdk->dk_rawopens;
1110 bdvp = pdk->dk_rawvp;
1111 error = 0;
1112 } else {
1113 isopen = false;
1114 error = dk_open_parent(bdev, FREAD, &bdvp);
1115 }
1116 mutex_exit(&pdk->dk_rawlock);
1117
1118 if (error)
1119 return error;
1120
1121 bp = getiobuf(bdvp, true);
1122 bp->b_flags = B_READ;
1123 bp->b_cflags = BC_BUSY;
1124 bp->b_dev = bdev;
1125 bp->b_data = tbuf;
1126 bp->b_bufsize = bp->b_bcount = len;
1127 bp->b_blkno = blkno;
1128 bp->b_cylinder = 0;
1129 bp->b_error = 0;
1130
1131 VOP_STRATEGY(bdvp, bp);
1132 error = biowait(bp);
1133 putiobuf(bp);
1134
1135 mutex_enter(&pdk->dk_rawlock);
1136 if (isopen) {
1137 --pdk->dk_rawopens;
1138 } else {
1139 dk_close_parent(bdvp, FREAD);
1140 }
1141 mutex_exit(&pdk->dk_rawlock);
1142
1143 return error;
1144 }
1145
1146 /*
1147 * dkwedge_lookup:
1148 *
1149 * Look up a dkwedge_softc based on the provided dev_t.
1150 */
1151 static struct dkwedge_softc *
1152 dkwedge_lookup(dev_t dev)
1153 {
1154 const int unit = minor(dev);
1155 struct dkwedge_softc *sc;
1156
1157 rw_enter(&dkwedges_lock, RW_READER);
1158 if (unit < 0 || unit >= ndkwedges)
1159 sc = NULL;
1160 else
1161 sc = dkwedges[unit];
1162 rw_exit(&dkwedges_lock);
1163
1164 return sc;
1165 }
1166
1167 static int
1168 dk_open_parent(dev_t dev, int mode, struct vnode **vpp)
1169 {
1170 struct vnode *vp;
1171 int error;
1172
1173 error = bdevvp(dev, &vp);
1174 if (error)
1175 return error;
1176
1177 error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1178 if (error) {
1179 vrele(vp);
1180 return error;
1181 }
1182 error = VOP_OPEN(vp, mode, NOCRED);
1183 if (error) {
1184 vput(vp);
1185 return error;
1186 }
1187
1188 /* VOP_OPEN() doesn't do this for us. */
1189 if (mode & FWRITE) {
1190 mutex_enter(vp->v_interlock);
1191 vp->v_writecount++;
1192 mutex_exit(vp->v_interlock);
1193 }
1194
1195 VOP_UNLOCK(vp);
1196
1197 *vpp = vp;
1198
1199 return 0;
1200 }
1201
1202 static int
1203 dk_close_parent(struct vnode *vp, int mode)
1204 {
1205 int error;
1206
1207 error = vn_close(vp, mode, NOCRED);
1208 return error;
1209 }
1210
1211 /*
1212 * dkunit: [devsw entry point]
1213 *
1214 * Return the autoconf device_t unit number of a wedge by its
1215 * devsw dev_t number, or -1 if there is none.
1216 *
1217 * XXX This is a temporary hack until dkwedge numbering is made to
1218 * correspond 1:1 to autoconf device numbering.
1219 */
1220 static int
1221 dkunit(dev_t dev)
1222 {
1223 int mn = minor(dev);
1224 struct dkwedge_softc *sc;
1225 device_t dv;
1226 int unit = -1;
1227
1228 if (mn < 0)
1229 return -1;
1230
1231 rw_enter(&dkwedges_lock, RW_READER);
1232 if (mn < ndkwedges &&
1233 (sc = dkwedges[minor(dev)]) != NULL &&
1234 (dv = sc->sc_dev) != NULL)
1235 unit = device_unit(dv);
1236 rw_exit(&dkwedges_lock);
1237
1238 return unit;
1239 }
1240
1241 /*
1242 * dkopen: [devsw entry point]
1243 *
1244 * Open a wedge.
1245 */
1246 static int
1247 dkopen(dev_t dev, int flags, int fmt, struct lwp *l)
1248 {
1249 struct dkwedge_softc *sc = dkwedge_lookup(dev);
1250 int error = 0;
1251
1252 if (sc == NULL)
1253 return ENXIO;
1254 if (sc->sc_state != DKW_STATE_RUNNING)
1255 return ENXIO;
1256
1257 /*
1258 * We go through a complicated little dance to only open the parent
1259 * vnode once per wedge, no matter how many times the wedge is
1260 * opened. The reason? We see one dkopen() per open call, but
1261 * only dkclose() on the last close.
1262 */
1263 mutex_enter(&sc->sc_dk.dk_openlock);
1264 mutex_enter(&sc->sc_parent->dk_rawlock);
1265 if (sc->sc_dk.dk_openmask == 0) {
1266 error = dkfirstopen(sc, flags);
1267 if (error)
1268 goto out;
1269 }
1270 KASSERT(sc->sc_mode != 0);
1271 if (flags & ~sc->sc_mode & FWRITE) {
1272 error = EROFS;
1273 goto out;
1274 }
1275 if (fmt == S_IFCHR)
1276 sc->sc_dk.dk_copenmask |= 1;
1277 else
1278 sc->sc_dk.dk_bopenmask |= 1;
1279 sc->sc_dk.dk_openmask =
1280 sc->sc_dk.dk_copenmask | sc->sc_dk.dk_bopenmask;
1281
1282 out: mutex_exit(&sc->sc_parent->dk_rawlock);
1283 mutex_exit(&sc->sc_dk.dk_openlock);
1284 return error;
1285 }
1286
1287 static int
1288 dkfirstopen(struct dkwedge_softc *sc, int flags)
1289 {
1290 struct dkwedge_softc *nsc;
1291 struct vnode *vp;
1292 int mode;
1293 int error;
1294
1295 KASSERT(mutex_owned(&sc->sc_dk.dk_openlock));
1296 KASSERT(mutex_owned(&sc->sc_parent->dk_rawlock));
1297
1298 if (sc->sc_parent->dk_rawopens == 0) {
1299 KASSERT(sc->sc_parent->dk_rawvp == NULL);
1300 /*
1301 * Try open read-write. If this fails for EROFS
1302 * and wedge is read-only, retry to open read-only.
1303 */
1304 mode = FREAD | FWRITE;
1305 error = dk_open_parent(sc->sc_pdev, mode, &vp);
1306 if (error == EROFS && (flags & FWRITE) == 0) {
1307 mode &= ~FWRITE;
1308 error = dk_open_parent(sc->sc_pdev, mode, &vp);
1309 }
1310 if (error)
1311 return error;
1312 KASSERT(vp != NULL);
1313 sc->sc_parent->dk_rawvp = vp;
1314 } else {
1315 /*
1316 * Retrieve mode from an already opened wedge.
1317 *
1318 * At this point, dk_rawopens is bounded by the number
1319 * of dkwedge devices in the system, which is limited
1320 * by autoconf device numbering to INT_MAX. Since
1321 * dk_rawopens is unsigned, this can't overflow.
1322 */
1323 KASSERT(sc->sc_parent->dk_rawopens < UINT_MAX);
1324 KASSERT(sc->sc_parent->dk_rawvp != NULL);
1325 mode = 0;
1326 LIST_FOREACH(nsc, &sc->sc_parent->dk_wedges, sc_plink) {
1327 if (nsc == sc || nsc->sc_dk.dk_openmask == 0)
1328 continue;
1329 mode = nsc->sc_mode;
1330 break;
1331 }
1332 }
1333 sc->sc_mode = mode;
1334 sc->sc_parent->dk_rawopens++;
1335
1336 return 0;
1337 }
1338
1339 static void
1340 dklastclose(struct dkwedge_softc *sc)
1341 {
1342
1343 KASSERT(mutex_owned(&sc->sc_dk.dk_openlock));
1344 KASSERT(mutex_owned(&sc->sc_parent->dk_rawlock));
1345 KASSERT(sc->sc_parent->dk_rawopens > 0);
1346 KASSERT(sc->sc_parent->dk_rawvp != NULL);
1347
1348 if (--sc->sc_parent->dk_rawopens == 0) {
1349 struct vnode *const vp = sc->sc_parent->dk_rawvp;
1350 const int mode = sc->sc_mode;
1351
1352 sc->sc_parent->dk_rawvp = NULL;
1353 sc->sc_mode = 0;
1354
1355 dk_close_parent(vp, mode);
1356 }
1357 }
1358
1359 /*
1360 * dkclose: [devsw entry point]
1361 *
1362 * Close a wedge.
1363 */
1364 static int
1365 dkclose(dev_t dev, int flags, int fmt, struct lwp *l)
1366 {
1367 struct dkwedge_softc *sc = dkwedge_lookup(dev);
1368
1369 if (sc == NULL)
1370 return ENXIO;
1371 if (sc->sc_state != DKW_STATE_RUNNING &&
1372 sc->sc_state != DKW_STATE_DYING)
1373 return ENXIO;
1374
1375 mutex_enter(&sc->sc_dk.dk_openlock);
1376 mutex_enter(&sc->sc_parent->dk_rawlock);
1377
1378 KASSERT(sc->sc_dk.dk_openmask != 0);
1379
1380 if (fmt == S_IFCHR)
1381 sc->sc_dk.dk_copenmask &= ~1;
1382 else
1383 sc->sc_dk.dk_bopenmask &= ~1;
1384 sc->sc_dk.dk_openmask =
1385 sc->sc_dk.dk_copenmask | sc->sc_dk.dk_bopenmask;
1386
1387 if (sc->sc_dk.dk_openmask == 0) {
1388 dklastclose(sc);
1389 }
1390
1391 mutex_exit(&sc->sc_parent->dk_rawlock);
1392 mutex_exit(&sc->sc_dk.dk_openlock);
1393
1394 return 0;
1395 }
1396
1397 /*
1398 * dkcancel: [devsw entry point]
1399 *
1400 * Cancel any pending I/O operations waiting on a wedge.
1401 */
1402 static int
1403 dkcancel(dev_t dev, int flags, int fmt, struct lwp *l)
1404 {
1405 struct dkwedge_softc *sc = dkwedge_lookup(dev);
1406
1407 KASSERT(sc != NULL);
1408 KASSERT(sc->sc_dev != NULL);
1409 KASSERT(sc->sc_state != DKW_STATE_LARVAL);
1410 KASSERT(sc->sc_state != DKW_STATE_DEAD);
1411
1412 /*
1413 * Disk I/O is expected to complete or fail within a reasonable
1414 * timeframe -- it's storage, not communication. Further, the
1415 * character and block device interface guarantees that prior
1416 * reads and writes have completed or failed by the time close
1417 * returns -- we are not to cancel them here. If the parent
1418 * device's hardware is gone, the parent driver can make them
1419 * fail. Nothing for dk(4) itself to do.
1420 */
1421
1422 return 0;
1423 }
1424
1425 /*
1426 * dkstrategy: [devsw entry point]
1427 *
1428 * Perform I/O based on the wedge I/O strategy.
1429 */
1430 static void
1431 dkstrategy(struct buf *bp)
1432 {
1433 struct dkwedge_softc *sc = dkwedge_lookup(bp->b_dev);
1434 uint64_t p_size, p_offset;
1435
1436 KASSERT(sc != NULL);
1437 KASSERT(sc->sc_state != DKW_STATE_LARVAL);
1438 KASSERT(sc->sc_state != DKW_STATE_DEAD);
1439 KASSERT(sc->sc_parent->dk_rawvp != NULL);
1440
1441 /* If it's an empty transfer, wake up the top half now. */
1442 if (bp->b_bcount == 0)
1443 goto done;
1444
1445 p_offset = sc->sc_offset << sc->sc_parent->dk_blkshift;
1446 p_size = dkwedge_size(sc) << sc->sc_parent->dk_blkshift;
1447
1448 /* Make sure it's in-range. */
1449 if (bounds_check_with_mediasize(bp, DEV_BSIZE, p_size) <= 0)
1450 goto done;
1451
1452 /* Translate it to the parent's raw LBA. */
1453 bp->b_rawblkno = bp->b_blkno + p_offset;
1454
1455 /* Place it in the queue and start I/O on the unit. */
1456 mutex_enter(&sc->sc_iolock);
1457 disk_wait(&sc->sc_dk);
1458 bufq_put(sc->sc_bufq, bp);
1459 mutex_exit(&sc->sc_iolock);
1460
1461 dkstart(sc);
1462 return;
1463
1464 done:
1465 bp->b_resid = bp->b_bcount;
1466 biodone(bp);
1467 }
1468
1469 /*
1470 * dkstart:
1471 *
1472 * Start I/O that has been enqueued on the wedge.
1473 */
1474 static void
1475 dkstart(struct dkwedge_softc *sc)
1476 {
1477 struct vnode *vp;
1478 struct buf *bp, *nbp;
1479
1480 mutex_enter(&sc->sc_iolock);
1481
1482 /* Do as much work as has been enqueued. */
1483 while ((bp = bufq_peek(sc->sc_bufq)) != NULL) {
1484 if (sc->sc_iostop) {
1485 (void) bufq_get(sc->sc_bufq);
1486 mutex_exit(&sc->sc_iolock);
1487 bp->b_error = ENXIO;
1488 bp->b_resid = bp->b_bcount;
1489 biodone(bp);
1490 mutex_enter(&sc->sc_iolock);
1491 continue;
1492 }
1493
1494 /* fetch an I/O buf with sc_iolock dropped */
1495 mutex_exit(&sc->sc_iolock);
1496 nbp = getiobuf(sc->sc_parent->dk_rawvp, false);
1497 mutex_enter(&sc->sc_iolock);
1498 if (nbp == NULL) {
1499 /*
1500 * No resources to run this request; leave the
1501 * buffer queued up, and schedule a timer to
1502 * restart the queue in 1/2 a second.
1503 */
1504 if (!sc->sc_iostop)
1505 callout_schedule(&sc->sc_restart_ch, hz/2);
1506 break;
1507 }
1508
1509 /*
1510 * fetch buf, this can fail if another thread
1511 * has already processed the queue, it can also
1512 * return a completely different buf.
1513 */
1514 bp = bufq_get(sc->sc_bufq);
1515 if (bp == NULL) {
1516 mutex_exit(&sc->sc_iolock);
1517 putiobuf(nbp);
1518 mutex_enter(&sc->sc_iolock);
1519 continue;
1520 }
1521
1522 /* Instrumentation. */
1523 disk_busy(&sc->sc_dk);
1524
1525 /* release lock for VOP_STRATEGY */
1526 mutex_exit(&sc->sc_iolock);
1527
1528 nbp->b_data = bp->b_data;
1529 nbp->b_flags = bp->b_flags;
1530 nbp->b_oflags = bp->b_oflags;
1531 nbp->b_cflags = bp->b_cflags;
1532 nbp->b_iodone = dkiodone;
1533 nbp->b_proc = bp->b_proc;
1534 nbp->b_blkno = bp->b_rawblkno;
1535 nbp->b_dev = sc->sc_parent->dk_rawvp->v_rdev;
1536 nbp->b_bcount = bp->b_bcount;
1537 nbp->b_private = bp;
1538 BIO_COPYPRIO(nbp, bp);
1539
1540 vp = nbp->b_vp;
1541 if ((nbp->b_flags & B_READ) == 0) {
1542 mutex_enter(vp->v_interlock);
1543 vp->v_numoutput++;
1544 mutex_exit(vp->v_interlock);
1545 }
1546 VOP_STRATEGY(vp, nbp);
1547
1548 mutex_enter(&sc->sc_iolock);
1549 }
1550
1551 mutex_exit(&sc->sc_iolock);
1552 }
1553
1554 /*
1555 * dkiodone:
1556 *
1557 * I/O to a wedge has completed; alert the top half.
1558 */
1559 static void
1560 dkiodone(struct buf *bp)
1561 {
1562 struct buf *obp = bp->b_private;
1563 struct dkwedge_softc *sc = dkwedge_lookup(obp->b_dev);
1564
1565 if (bp->b_error != 0)
1566 obp->b_error = bp->b_error;
1567 obp->b_resid = bp->b_resid;
1568 putiobuf(bp);
1569
1570 mutex_enter(&sc->sc_iolock);
1571 disk_unbusy(&sc->sc_dk, obp->b_bcount - obp->b_resid,
1572 obp->b_flags & B_READ);
1573 mutex_exit(&sc->sc_iolock);
1574
1575 biodone(obp);
1576
1577 /* Kick the queue in case there is more work we can do. */
1578 dkstart(sc);
1579 }
1580
1581 /*
1582 * dkrestart:
1583 *
1584 * Restart the work queue after it was stalled due to
1585 * a resource shortage. Invoked via a callout.
1586 */
1587 static void
1588 dkrestart(void *v)
1589 {
1590 struct dkwedge_softc *sc = v;
1591
1592 dkstart(sc);
1593 }
1594
1595 /*
1596 * dkminphys:
1597 *
1598 * Call parent's minphys function.
1599 */
1600 static void
1601 dkminphys(struct buf *bp)
1602 {
1603 struct dkwedge_softc *sc = dkwedge_lookup(bp->b_dev);
1604 dev_t dev;
1605
1606 dev = bp->b_dev;
1607 bp->b_dev = sc->sc_pdev;
1608 if (sc->sc_parent->dk_driver && sc->sc_parent->dk_driver->d_minphys)
1609 (*sc->sc_parent->dk_driver->d_minphys)(bp);
1610 else
1611 minphys(bp);
1612 bp->b_dev = dev;
1613 }
1614
1615 /*
1616 * dkread: [devsw entry point]
1617 *
1618 * Read from a wedge.
1619 */
1620 static int
1621 dkread(dev_t dev, struct uio *uio, int flags)
1622 {
1623 struct dkwedge_softc *sc __diagused = dkwedge_lookup(dev);
1624
1625 KASSERT(sc != NULL);
1626 KASSERT(sc->sc_state != DKW_STATE_LARVAL);
1627 KASSERT(sc->sc_state != DKW_STATE_DEAD);
1628
1629 return physio(dkstrategy, NULL, dev, B_READ, dkminphys, uio);
1630 }
1631
1632 /*
1633 * dkwrite: [devsw entry point]
1634 *
1635 * Write to a wedge.
1636 */
1637 static int
1638 dkwrite(dev_t dev, struct uio *uio, int flags)
1639 {
1640 struct dkwedge_softc *sc __diagused = dkwedge_lookup(dev);
1641
1642 KASSERT(sc != NULL);
1643 KASSERT(sc->sc_state != DKW_STATE_LARVAL);
1644 KASSERT(sc->sc_state != DKW_STATE_DEAD);
1645
1646 return physio(dkstrategy, NULL, dev, B_WRITE, dkminphys, uio);
1647 }
1648
1649 /*
1650 * dkioctl: [devsw entry point]
1651 *
1652 * Perform an ioctl request on a wedge.
1653 */
1654 static int
1655 dkioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
1656 {
1657 struct dkwedge_softc *sc = dkwedge_lookup(dev);
1658 int error = 0;
1659
1660 KASSERT(sc != NULL);
1661 KASSERT(sc->sc_state != DKW_STATE_LARVAL);
1662 KASSERT(sc->sc_state != DKW_STATE_DEAD);
1663 KASSERT(sc->sc_parent->dk_rawvp != NULL);
1664
1665 /*
1666 * We pass NODEV instead of our device to indicate we don't
1667 * want to handle disklabel ioctls
1668 */
1669 error = disk_ioctl(&sc->sc_dk, NODEV, cmd, data, flag, l);
1670 if (error != EPASSTHROUGH)
1671 return error;
1672
1673 error = 0;
1674
1675 switch (cmd) {
1676 case DIOCGSTRATEGY:
1677 case DIOCGCACHE:
1678 case DIOCCACHESYNC:
1679 error = VOP_IOCTL(sc->sc_parent->dk_rawvp, cmd, data, flag,
1680 l != NULL ? l->l_cred : NOCRED);
1681 break;
1682 case DIOCGWEDGEINFO: {
1683 struct dkwedge_info *dkw = data;
1684
1685 strlcpy(dkw->dkw_devname, device_xname(sc->sc_dev),
1686 sizeof(dkw->dkw_devname));
1687 memcpy(dkw->dkw_wname, sc->sc_wname, sizeof(dkw->dkw_wname));
1688 dkw->dkw_wname[sizeof(dkw->dkw_wname) - 1] = '\0';
1689 strlcpy(dkw->dkw_parent, sc->sc_parent->dk_name,
1690 sizeof(dkw->dkw_parent));
1691 dkw->dkw_offset = sc->sc_offset;
1692 dkw->dkw_size = dkwedge_size(sc);
1693 strlcpy(dkw->dkw_ptype, sc->sc_ptype, sizeof(dkw->dkw_ptype));
1694
1695 break;
1696 }
1697 case DIOCGSECTORALIGN: {
1698 struct disk_sectoralign *dsa = data;
1699 uint32_t r;
1700
1701 error = VOP_IOCTL(sc->sc_parent->dk_rawvp, cmd, dsa, flag,
1702 l != NULL ? l->l_cred : NOCRED);
1703 if (error)
1704 break;
1705
1706 r = sc->sc_offset % dsa->dsa_alignment;
1707 if (r < dsa->dsa_firstaligned)
1708 dsa->dsa_firstaligned = dsa->dsa_firstaligned - r;
1709 else
1710 dsa->dsa_firstaligned = (dsa->dsa_firstaligned +
1711 dsa->dsa_alignment) - r;
1712 break;
1713 }
1714 default:
1715 error = ENOTTY;
1716 }
1717
1718 return error;
1719 }
1720
1721 /*
1722 * dkdiscard: [devsw entry point]
1723 *
1724 * Perform a discard-range request on a wedge.
1725 */
1726 static int
1727 dkdiscard(dev_t dev, off_t pos, off_t len)
1728 {
1729 struct dkwedge_softc *sc = dkwedge_lookup(dev);
1730 uint64_t size = dkwedge_size(sc);
1731 unsigned shift;
1732 off_t offset, maxlen;
1733 int error;
1734
1735 KASSERT(sc != NULL);
1736 KASSERT(sc->sc_state != DKW_STATE_LARVAL);
1737 KASSERT(sc->sc_state != DKW_STATE_DEAD);
1738 KASSERT(sc->sc_parent->dk_rawvp != NULL);
1739
1740 /* XXX check bounds on size/offset up front */
1741 shift = (sc->sc_parent->dk_blkshift + DEV_BSHIFT);
1742 KASSERT(__type_fit(off_t, size));
1743 KASSERT(__type_fit(off_t, sc->sc_offset));
1744 KASSERT(0 <= sc->sc_offset);
1745 KASSERT(size <= (__type_max(off_t) >> shift));
1746 KASSERT(sc->sc_offset <= ((__type_max(off_t) >> shift) - size));
1747 offset = ((off_t)sc->sc_offset << shift);
1748 maxlen = ((off_t)size << shift);
1749
1750 if (len > maxlen)
1751 return EINVAL;
1752 if (pos > (maxlen - len))
1753 return EINVAL;
1754
1755 pos += offset;
1756
1757 vn_lock(sc->sc_parent->dk_rawvp, LK_EXCLUSIVE | LK_RETRY);
1758 error = VOP_FDISCARD(sc->sc_parent->dk_rawvp, pos, len);
1759 VOP_UNLOCK(sc->sc_parent->dk_rawvp);
1760
1761 return error;
1762 }
1763
1764 /*
1765 * dksize: [devsw entry point]
1766 *
1767 * Query the size of a wedge for the purpose of performing a dump
1768 * or for swapping to.
1769 */
1770 static int
1771 dksize(dev_t dev)
1772 {
1773 struct dkwedge_softc *sc = dkwedge_lookup(dev);
1774 uint64_t p_size;
1775 int rv = -1;
1776
1777 if (sc == NULL)
1778 return -1;
1779 if (sc->sc_state != DKW_STATE_RUNNING)
1780 return -1;
1781
1782 /* Our content type is static, no need to open the device. */
1783
1784 p_size = dkwedge_size(sc) << sc->sc_parent->dk_blkshift;
1785 if (strcmp(sc->sc_ptype, DKW_PTYPE_SWAP) == 0) {
1786 /* Saturate if we are larger than INT_MAX. */
1787 if (p_size > INT_MAX)
1788 rv = INT_MAX;
1789 else
1790 rv = (int)p_size;
1791 }
1792
1793 return rv;
1794 }
1795
1796 /*
1797 * dkdump: [devsw entry point]
1798 *
1799 * Perform a crash dump to a wedge.
1800 */
1801 static int
1802 dkdump(dev_t dev, daddr_t blkno, void *va, size_t size)
1803 {
1804 struct dkwedge_softc *sc = dkwedge_lookup(dev);
1805 const struct bdevsw *bdev;
1806 uint64_t p_size, p_offset;
1807
1808 if (sc == NULL)
1809 return ENXIO;
1810 if (sc->sc_state != DKW_STATE_RUNNING)
1811 return ENXIO;
1812
1813 /* Our content type is static, no need to open the device. */
1814
1815 if (strcmp(sc->sc_ptype, DKW_PTYPE_SWAP) != 0 &&
1816 strcmp(sc->sc_ptype, DKW_PTYPE_RAID) != 0 &&
1817 strcmp(sc->sc_ptype, DKW_PTYPE_CGD) != 0)
1818 return ENXIO;
1819 if (size % DEV_BSIZE != 0)
1820 return EINVAL;
1821
1822 p_offset = sc->sc_offset << sc->sc_parent->dk_blkshift;
1823 p_size = dkwedge_size(sc) << sc->sc_parent->dk_blkshift;
1824
1825 if (blkno < 0 || blkno + size/DEV_BSIZE > p_size) {
1826 printf("%s: blkno (%" PRIu64 ") + size / DEV_BSIZE (%zu) > "
1827 "p_size (%" PRIu64 ")\n", __func__, blkno,
1828 size/DEV_BSIZE, p_size);
1829 return EINVAL;
1830 }
1831
1832 bdev = bdevsw_lookup(sc->sc_pdev);
1833 return (*bdev->d_dump)(sc->sc_pdev, blkno + p_offset, va, size);
1834 }
1835
1836 /*
1837 * config glue
1838 */
1839
1840 /*
1841 * dkwedge_find_partition
1842 *
1843 * Find wedge corresponding to the specified parent name
1844 * and offset/length.
1845 */
1846 device_t
1847 dkwedge_find_partition(device_t parent, daddr_t startblk, uint64_t nblks)
1848 {
1849 struct dkwedge_softc *sc;
1850 int i;
1851 device_t wedge = NULL;
1852
1853 rw_enter(&dkwedges_lock, RW_READER);
1854 for (i = 0; i < ndkwedges; i++) {
1855 if ((sc = dkwedges[i]) == NULL)
1856 continue;
1857 if (strcmp(sc->sc_parent->dk_name, device_xname(parent)) == 0 &&
1858 sc->sc_offset == startblk &&
1859 dkwedge_size(sc) == nblks) {
1860 if (wedge) {
1861 printf("WARNING: double match for boot wedge "
1862 "(%s, %s)\n",
1863 device_xname(wedge),
1864 device_xname(sc->sc_dev));
1865 continue;
1866 }
1867 wedge = sc->sc_dev;
1868 }
1869 }
1870 rw_exit(&dkwedges_lock);
1871
1872 return wedge;
1873 }
1874
1875 const char *
1876 dkwedge_get_parent_name(dev_t dev)
1877 {
1878 /* XXX: perhaps do this in lookup? */
1879 int bmaj = bdevsw_lookup_major(&dk_bdevsw);
1880 int cmaj = cdevsw_lookup_major(&dk_cdevsw);
1881
1882 if (major(dev) != bmaj && major(dev) != cmaj)
1883 return NULL;
1884 struct dkwedge_softc *sc = dkwedge_lookup(dev);
1885 if (sc == NULL)
1886 return NULL;
1887 return sc->sc_parent->dk_name;
1888 }
1889