dk.c revision 1.117 1 /* $NetBSD: dk.c,v 1.117 2022/08/22 00:19:53 riastradh Exp $ */
2
3 /*-
4 * Copyright (c) 2004, 2005, 2006, 2007 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 #include <sys/cdefs.h>
33 __KERNEL_RCSID(0, "$NetBSD: dk.c,v 1.117 2022/08/22 00:19:53 riastradh Exp $");
34
35 #ifdef _KERNEL_OPT
36 #include "opt_dkwedge.h"
37 #endif
38
39 #include <sys/param.h>
40 #include <sys/systm.h>
41 #include <sys/proc.h>
42 #include <sys/errno.h>
43 #include <sys/pool.h>
44 #include <sys/ioctl.h>
45 #include <sys/disklabel.h>
46 #include <sys/disk.h>
47 #include <sys/fcntl.h>
48 #include <sys/buf.h>
49 #include <sys/bufq.h>
50 #include <sys/vnode.h>
51 #include <sys/stat.h>
52 #include <sys/conf.h>
53 #include <sys/callout.h>
54 #include <sys/kernel.h>
55 #include <sys/malloc.h>
56 #include <sys/device.h>
57 #include <sys/kauth.h>
58
59 #include <miscfs/specfs/specdev.h>
60
61 MALLOC_DEFINE(M_DKWEDGE, "dkwedge", "Disk wedge structures");
62
63 typedef enum {
64 DKW_STATE_LARVAL = 0,
65 DKW_STATE_RUNNING = 1,
66 DKW_STATE_DYING = 2,
67 DKW_STATE_DEAD = 666
68 } dkwedge_state_t;
69
70 struct dkwedge_softc {
71 device_t sc_dev; /* pointer to our pseudo-device */
72 struct cfdata sc_cfdata; /* our cfdata structure */
73 uint8_t sc_wname[128]; /* wedge name (Unicode, UTF-8) */
74
75 dkwedge_state_t sc_state; /* state this wedge is in */
76
77 struct disk *sc_parent; /* parent disk */
78 daddr_t sc_offset; /* LBA offset of wedge in parent */
79 uint64_t sc_size; /* size of wedge in blocks */
80 char sc_ptype[32]; /* partition type */
81 dev_t sc_pdev; /* cached parent's dev_t */
82 /* link on parent's wedge list */
83 LIST_ENTRY(dkwedge_softc) sc_plink;
84
85 struct disk sc_dk; /* our own disk structure */
86 struct bufq_state *sc_bufq; /* buffer queue */
87 struct callout sc_restart_ch; /* callout to restart I/O */
88
89 kmutex_t sc_iolock;
90 kcondvar_t sc_dkdrn;
91 u_int sc_iopend; /* I/Os pending */
92 int sc_mode; /* parent open mode */
93 };
94
95 static void dkstart(struct dkwedge_softc *);
96 static void dkiodone(struct buf *);
97 static void dkrestart(void *);
98 static void dkminphys(struct buf *);
99
100 static int dklastclose(struct dkwedge_softc *);
101 static int dkwedge_cleanup_parent(struct dkwedge_softc *, int);
102 static int dkwedge_detach(device_t, int);
103 static void dkwedge_delall1(struct disk *, bool);
104 static int dkwedge_del1(struct dkwedge_info *, int);
105 static int dk_open_parent(dev_t, int, struct vnode **);
106 static int dk_close_parent(struct vnode *, int);
107
108 static dev_type_open(dkopen);
109 static dev_type_close(dkclose);
110 static dev_type_read(dkread);
111 static dev_type_write(dkwrite);
112 static dev_type_ioctl(dkioctl);
113 static dev_type_strategy(dkstrategy);
114 static dev_type_dump(dkdump);
115 static dev_type_size(dksize);
116 static dev_type_discard(dkdiscard);
117
118 const struct bdevsw dk_bdevsw = {
119 .d_open = dkopen,
120 .d_close = dkclose,
121 .d_strategy = dkstrategy,
122 .d_ioctl = dkioctl,
123 .d_dump = dkdump,
124 .d_psize = dksize,
125 .d_discard = dkdiscard,
126 .d_flag = D_DISK | D_MPSAFE
127 };
128
129 const struct cdevsw dk_cdevsw = {
130 .d_open = dkopen,
131 .d_close = dkclose,
132 .d_read = dkread,
133 .d_write = dkwrite,
134 .d_ioctl = dkioctl,
135 .d_stop = nostop,
136 .d_tty = notty,
137 .d_poll = nopoll,
138 .d_mmap = nommap,
139 .d_kqfilter = nokqfilter,
140 .d_discard = dkdiscard,
141 .d_flag = D_DISK | D_MPSAFE
142 };
143
144 static struct dkwedge_softc **dkwedges;
145 static u_int ndkwedges;
146 static krwlock_t dkwedges_lock;
147
148 static LIST_HEAD(, dkwedge_discovery_method) dkwedge_discovery_methods;
149 static krwlock_t dkwedge_discovery_methods_lock;
150
151 /*
152 * dkwedge_match:
153 *
154 * Autoconfiguration match function for pseudo-device glue.
155 */
156 static int
157 dkwedge_match(device_t parent, cfdata_t match,
158 void *aux)
159 {
160
161 /* Pseudo-device; always present. */
162 return (1);
163 }
164
165 /*
166 * dkwedge_attach:
167 *
168 * Autoconfiguration attach function for pseudo-device glue.
169 */
170 static void
171 dkwedge_attach(device_t parent, device_t self,
172 void *aux)
173 {
174
175 if (!pmf_device_register(self, NULL, NULL))
176 aprint_error_dev(self, "couldn't establish power handler\n");
177 }
178
179 CFDRIVER_DECL(dk, DV_DISK, NULL);
180 CFATTACH_DECL3_NEW(dk, 0,
181 dkwedge_match, dkwedge_attach, dkwedge_detach, NULL, NULL, NULL,
182 DVF_DETACH_SHUTDOWN);
183
184 /*
185 * dkwedge_wait_drain:
186 *
187 * Wait for I/O on the wedge to drain.
188 */
189 static void
190 dkwedge_wait_drain(struct dkwedge_softc *sc)
191 {
192
193 mutex_enter(&sc->sc_iolock);
194 while (sc->sc_iopend != 0)
195 cv_wait(&sc->sc_dkdrn, &sc->sc_iolock);
196 mutex_exit(&sc->sc_iolock);
197 }
198
199 /*
200 * dkwedge_compute_pdev:
201 *
202 * Compute the parent disk's dev_t.
203 */
204 static int
205 dkwedge_compute_pdev(const char *pname, dev_t *pdevp, enum vtype type)
206 {
207 const char *name, *cp;
208 devmajor_t pmaj;
209 int punit;
210 char devname[16];
211
212 name = pname;
213 switch (type) {
214 case VBLK:
215 pmaj = devsw_name2blk(name, devname, sizeof(devname));
216 break;
217 case VCHR:
218 pmaj = devsw_name2chr(name, devname, sizeof(devname));
219 break;
220 default:
221 pmaj = NODEVMAJOR;
222 break;
223 }
224 if (pmaj == NODEVMAJOR)
225 return (ENODEV);
226
227 name += strlen(devname);
228 for (cp = name, punit = 0; *cp >= '0' && *cp <= '9'; cp++)
229 punit = (punit * 10) + (*cp - '0');
230 if (cp == name) {
231 /* Invalid parent disk name. */
232 return (ENODEV);
233 }
234
235 *pdevp = MAKEDISKDEV(pmaj, punit, RAW_PART);
236
237 return (0);
238 }
239
240 /*
241 * dkwedge_array_expand:
242 *
243 * Expand the dkwedges array.
244 */
245 static void
246 dkwedge_array_expand(void)
247 {
248 int newcnt = ndkwedges + 16;
249 struct dkwedge_softc **newarray, **oldarray;
250
251 newarray = malloc(newcnt * sizeof(*newarray), M_DKWEDGE,
252 M_WAITOK|M_ZERO);
253 if ((oldarray = dkwedges) != NULL)
254 memcpy(newarray, dkwedges, ndkwedges * sizeof(*newarray));
255 dkwedges = newarray;
256 ndkwedges = newcnt;
257 if (oldarray != NULL)
258 free(oldarray, M_DKWEDGE);
259 }
260
261 static void
262 dk_set_geometry(struct dkwedge_softc *sc, struct disk *pdk)
263 {
264 struct disk *dk = &sc->sc_dk;
265 struct disk_geom *dg = &dk->dk_geom;
266
267 memset(dg, 0, sizeof(*dg));
268
269 dg->dg_secperunit = sc->sc_size;
270 dg->dg_secsize = DEV_BSIZE << pdk->dk_blkshift;
271
272 /* fake numbers, 1 cylinder is 1 MB with default sector size */
273 dg->dg_nsectors = 32;
274 dg->dg_ntracks = 64;
275 dg->dg_ncylinders = dg->dg_secperunit / (dg->dg_nsectors * dg->dg_ntracks);
276
277 disk_set_info(sc->sc_dev, dk, NULL);
278 }
279
280 /*
281 * dkwedge_add: [exported function]
282 *
283 * Add a disk wedge based on the provided information.
284 *
285 * The incoming dkw_devname[] is ignored, instead being
286 * filled in and returned to the caller.
287 */
288 int
289 dkwedge_add(struct dkwedge_info *dkw)
290 {
291 struct dkwedge_softc *sc, *lsc;
292 struct disk *pdk;
293 u_int unit;
294 int error;
295 dev_t pdev;
296
297 dkw->dkw_parent[sizeof(dkw->dkw_parent) - 1] = '\0';
298 pdk = disk_find(dkw->dkw_parent);
299 if (pdk == NULL)
300 return (ENODEV);
301
302 error = dkwedge_compute_pdev(pdk->dk_name, &pdev, VBLK);
303 if (error)
304 return (error);
305
306 if (dkw->dkw_offset < 0)
307 return (EINVAL);
308
309 /*
310 * Check for an existing wedge at the same disk offset. Allow
311 * updating a wedge if the only change is the size, and the new
312 * size is larger than the old.
313 */
314 sc = NULL;
315 mutex_enter(&pdk->dk_openlock);
316 LIST_FOREACH(lsc, &pdk->dk_wedges, sc_plink) {
317 if (lsc->sc_offset != dkw->dkw_offset)
318 continue;
319 if (strcmp(lsc->sc_wname, dkw->dkw_wname) != 0)
320 break;
321 if (strcmp(lsc->sc_ptype, dkw->dkw_ptype) != 0)
322 break;
323 if (lsc->sc_size > dkw->dkw_size)
324 break;
325
326 sc = lsc;
327 sc->sc_size = dkw->dkw_size;
328 dk_set_geometry(sc, pdk);
329
330 break;
331 }
332 mutex_exit(&pdk->dk_openlock);
333
334 if (sc != NULL)
335 goto announce;
336
337 sc = malloc(sizeof(*sc), M_DKWEDGE, M_WAITOK|M_ZERO);
338 sc->sc_state = DKW_STATE_LARVAL;
339 sc->sc_parent = pdk;
340 sc->sc_pdev = pdev;
341 sc->sc_offset = dkw->dkw_offset;
342 sc->sc_size = dkw->dkw_size;
343
344 memcpy(sc->sc_wname, dkw->dkw_wname, sizeof(sc->sc_wname));
345 sc->sc_wname[sizeof(sc->sc_wname) - 1] = '\0';
346
347 memcpy(sc->sc_ptype, dkw->dkw_ptype, sizeof(sc->sc_ptype));
348 sc->sc_ptype[sizeof(sc->sc_ptype) - 1] = '\0';
349
350 bufq_alloc(&sc->sc_bufq, "fcfs", 0);
351
352 callout_init(&sc->sc_restart_ch, 0);
353 callout_setfunc(&sc->sc_restart_ch, dkrestart, sc);
354
355 mutex_init(&sc->sc_iolock, MUTEX_DEFAULT, IPL_BIO);
356 cv_init(&sc->sc_dkdrn, "dkdrn");
357
358 /*
359 * Wedge will be added; increment the wedge count for the parent.
360 * Only allow this to happen if RAW_PART is the only thing open.
361 */
362 mutex_enter(&pdk->dk_openlock);
363 if (pdk->dk_openmask & ~(1 << RAW_PART))
364 error = EBUSY;
365 else {
366 /* Check for wedge overlap. */
367 LIST_FOREACH(lsc, &pdk->dk_wedges, sc_plink) {
368 daddr_t lastblk = sc->sc_offset + sc->sc_size - 1;
369 daddr_t llastblk = lsc->sc_offset + lsc->sc_size - 1;
370
371 if (sc->sc_offset >= lsc->sc_offset &&
372 sc->sc_offset <= llastblk) {
373 /* Overlaps the tail of the existing wedge. */
374 break;
375 }
376 if (lastblk >= lsc->sc_offset &&
377 lastblk <= llastblk) {
378 /* Overlaps the head of the existing wedge. */
379 break;
380 }
381 }
382 if (lsc != NULL) {
383 if (sc->sc_offset == lsc->sc_offset &&
384 sc->sc_size == lsc->sc_size &&
385 strcmp(sc->sc_wname, lsc->sc_wname) == 0)
386 error = EEXIST;
387 else
388 error = EINVAL;
389 } else {
390 pdk->dk_nwedges++;
391 LIST_INSERT_HEAD(&pdk->dk_wedges, sc, sc_plink);
392 }
393 }
394 mutex_exit(&pdk->dk_openlock);
395 if (error) {
396 cv_destroy(&sc->sc_dkdrn);
397 mutex_destroy(&sc->sc_iolock);
398 bufq_free(sc->sc_bufq);
399 free(sc, M_DKWEDGE);
400 return (error);
401 }
402
403 /* Fill in our cfdata for the pseudo-device glue. */
404 sc->sc_cfdata.cf_name = dk_cd.cd_name;
405 sc->sc_cfdata.cf_atname = dk_ca.ca_name;
406 /* sc->sc_cfdata.cf_unit set below */
407 sc->sc_cfdata.cf_fstate = FSTATE_STAR;
408
409 /* Insert the larval wedge into the array. */
410 rw_enter(&dkwedges_lock, RW_WRITER);
411 for (error = 0;;) {
412 struct dkwedge_softc **scpp;
413
414 /*
415 * Check for a duplicate wname while searching for
416 * a slot.
417 */
418 for (scpp = NULL, unit = 0; unit < ndkwedges; unit++) {
419 if (dkwedges[unit] == NULL) {
420 if (scpp == NULL) {
421 scpp = &dkwedges[unit];
422 sc->sc_cfdata.cf_unit = unit;
423 }
424 } else {
425 /* XXX Unicode. */
426 if (strcmp(dkwedges[unit]->sc_wname,
427 sc->sc_wname) == 0) {
428 error = EEXIST;
429 break;
430 }
431 }
432 }
433 if (error)
434 break;
435 KASSERT(unit == ndkwedges);
436 if (scpp == NULL)
437 dkwedge_array_expand();
438 else {
439 KASSERT(scpp == &dkwedges[sc->sc_cfdata.cf_unit]);
440 *scpp = sc;
441 break;
442 }
443 }
444 rw_exit(&dkwedges_lock);
445 if (error) {
446 mutex_enter(&pdk->dk_openlock);
447 pdk->dk_nwedges--;
448 LIST_REMOVE(sc, sc_plink);
449 mutex_exit(&pdk->dk_openlock);
450
451 cv_destroy(&sc->sc_dkdrn);
452 mutex_destroy(&sc->sc_iolock);
453 bufq_free(sc->sc_bufq);
454 free(sc, M_DKWEDGE);
455 return (error);
456 }
457
458 /*
459 * Now that we know the unit #, attach a pseudo-device for
460 * this wedge instance. This will provide us with the
461 * device_t necessary for glue to other parts of the system.
462 *
463 * This should never fail, unless we're almost totally out of
464 * memory.
465 */
466 if ((sc->sc_dev = config_attach_pseudo(&sc->sc_cfdata)) == NULL) {
467 aprint_error("%s%u: unable to attach pseudo-device\n",
468 sc->sc_cfdata.cf_name, sc->sc_cfdata.cf_unit);
469
470 rw_enter(&dkwedges_lock, RW_WRITER);
471 dkwedges[sc->sc_cfdata.cf_unit] = NULL;
472 rw_exit(&dkwedges_lock);
473
474 mutex_enter(&pdk->dk_openlock);
475 pdk->dk_nwedges--;
476 LIST_REMOVE(sc, sc_plink);
477 mutex_exit(&pdk->dk_openlock);
478
479 cv_destroy(&sc->sc_dkdrn);
480 mutex_destroy(&sc->sc_iolock);
481 bufq_free(sc->sc_bufq);
482 free(sc, M_DKWEDGE);
483 return (ENOMEM);
484 }
485
486 /*
487 * XXX Really ought to make the disk_attach() and the changing
488 * of state to RUNNING atomic.
489 */
490
491 disk_init(&sc->sc_dk, device_xname(sc->sc_dev), NULL);
492 dk_set_geometry(sc, pdk);
493 disk_attach(&sc->sc_dk);
494
495 /* Disk wedge is ready for use! */
496 sc->sc_state = DKW_STATE_RUNNING;
497
498 announce:
499 /* Announce our arrival. */
500 aprint_normal(
501 "%s at %s: \"%s\", %"PRIu64" blocks at %"PRId64", type: %s\n",
502 device_xname(sc->sc_dev), pdk->dk_name,
503 sc->sc_wname, /* XXX Unicode */
504 sc->sc_size, sc->sc_offset,
505 sc->sc_ptype[0] == '\0' ? "<unknown>" : sc->sc_ptype);
506
507 /* Return the devname to the caller. */
508 strlcpy(dkw->dkw_devname, device_xname(sc->sc_dev),
509 sizeof(dkw->dkw_devname));
510
511 return (0);
512 }
513
514 /*
515 * dkwedge_find:
516 *
517 * Lookup a disk wedge based on the provided information.
518 * NOTE: We look up the wedge based on the wedge devname,
519 * not wname.
520 *
521 * Return NULL if the wedge is not found, otherwise return
522 * the wedge's softc. Assign the wedge's unit number to unitp
523 * if unitp is not NULL.
524 */
525 static struct dkwedge_softc *
526 dkwedge_find(struct dkwedge_info *dkw, u_int *unitp)
527 {
528 struct dkwedge_softc *sc = NULL;
529 u_int unit;
530
531 /* Find our softc. */
532 dkw->dkw_devname[sizeof(dkw->dkw_devname) - 1] = '\0';
533 rw_enter(&dkwedges_lock, RW_READER);
534 for (unit = 0; unit < ndkwedges; unit++) {
535 if ((sc = dkwedges[unit]) != NULL &&
536 strcmp(device_xname(sc->sc_dev), dkw->dkw_devname) == 0 &&
537 strcmp(sc->sc_parent->dk_name, dkw->dkw_parent) == 0) {
538 break;
539 }
540 }
541 rw_exit(&dkwedges_lock);
542 if (unit == ndkwedges)
543 return NULL;
544
545 if (unitp != NULL)
546 *unitp = unit;
547
548 return sc;
549 }
550
551 /*
552 * dkwedge_del: [exported function]
553 *
554 * Delete a disk wedge based on the provided information.
555 * NOTE: We look up the wedge based on the wedge devname,
556 * not wname.
557 */
558 int
559 dkwedge_del(struct dkwedge_info *dkw)
560 {
561 return dkwedge_del1(dkw, 0);
562 }
563
564 int
565 dkwedge_del1(struct dkwedge_info *dkw, int flags)
566 {
567 struct dkwedge_softc *sc = NULL;
568
569 /* Find our softc. */
570 if ((sc = dkwedge_find(dkw, NULL)) == NULL)
571 return (ESRCH);
572
573 return config_detach(sc->sc_dev, flags);
574 }
575
576 static int
577 dkwedge_cleanup_parent(struct dkwedge_softc *sc, int flags)
578 {
579 struct disk *dk = &sc->sc_dk;
580 int rc;
581
582 rc = 0;
583 mutex_enter(&dk->dk_openlock);
584 if (dk->dk_openmask == 0) {
585 /* nothing to do */
586 } else if ((flags & DETACH_FORCE) == 0) {
587 rc = EBUSY;
588 } else {
589 mutex_enter(&sc->sc_parent->dk_rawlock);
590 rc = dklastclose(sc);
591 mutex_exit(&sc->sc_parent->dk_rawlock);
592 }
593 mutex_exit(&sc->sc_dk.dk_openlock);
594
595 return rc;
596 }
597
598 /*
599 * dkwedge_detach:
600 *
601 * Autoconfiguration detach function for pseudo-device glue.
602 */
603 static int
604 dkwedge_detach(device_t self, int flags)
605 {
606 struct dkwedge_softc *sc = NULL;
607 u_int unit;
608 int bmaj, cmaj, rc;
609
610 rw_enter(&dkwedges_lock, RW_WRITER);
611 for (unit = 0; unit < ndkwedges; unit++) {
612 if ((sc = dkwedges[unit]) != NULL && sc->sc_dev == self)
613 break;
614 }
615 if (unit == ndkwedges)
616 rc = ENXIO;
617 else if ((rc = dkwedge_cleanup_parent(sc, flags)) == 0) {
618 /* Mark the wedge as dying. */
619 sc->sc_state = DKW_STATE_DYING;
620 }
621 rw_exit(&dkwedges_lock);
622
623 if (rc != 0)
624 return rc;
625
626 pmf_device_deregister(self);
627
628 /* Locate the wedge major numbers. */
629 bmaj = bdevsw_lookup_major(&dk_bdevsw);
630 cmaj = cdevsw_lookup_major(&dk_cdevsw);
631
632 /* Kill any pending restart. */
633 callout_stop(&sc->sc_restart_ch);
634
635 /*
636 * dkstart() will kill any queued buffers now that the
637 * state of the wedge is not RUNNING. Once we've done
638 * that, wait for any other pending I/O to complete.
639 */
640 dkstart(sc);
641 dkwedge_wait_drain(sc);
642
643 /* Nuke the vnodes for any open instances. */
644 vdevgone(bmaj, unit, unit, VBLK);
645 vdevgone(cmaj, unit, unit, VCHR);
646
647 /* Clean up the parent. */
648 dkwedge_cleanup_parent(sc, flags | DETACH_FORCE);
649
650 /* Announce our departure. */
651 aprint_normal("%s at %s (%s) deleted\n", device_xname(sc->sc_dev),
652 sc->sc_parent->dk_name,
653 sc->sc_wname); /* XXX Unicode */
654
655 mutex_enter(&sc->sc_parent->dk_openlock);
656 sc->sc_parent->dk_nwedges--;
657 LIST_REMOVE(sc, sc_plink);
658 mutex_exit(&sc->sc_parent->dk_openlock);
659
660 /* Delete our buffer queue. */
661 bufq_free(sc->sc_bufq);
662
663 /* Detach from the disk list. */
664 disk_detach(&sc->sc_dk);
665 disk_destroy(&sc->sc_dk);
666
667 /* Poof. */
668 rw_enter(&dkwedges_lock, RW_WRITER);
669 dkwedges[unit] = NULL;
670 sc->sc_state = DKW_STATE_DEAD;
671 rw_exit(&dkwedges_lock);
672
673 mutex_destroy(&sc->sc_iolock);
674 cv_destroy(&sc->sc_dkdrn);
675
676 free(sc, M_DKWEDGE);
677
678 return 0;
679 }
680
681 /*
682 * dkwedge_delall: [exported function]
683 *
684 * Delete all of the wedges on the specified disk. Used when
685 * a disk is being detached.
686 */
687 void
688 dkwedge_delall(struct disk *pdk)
689 {
690 dkwedge_delall1(pdk, false);
691 }
692
693 static void
694 dkwedge_delall1(struct disk *pdk, bool idleonly)
695 {
696 struct dkwedge_info dkw;
697 struct dkwedge_softc *sc;
698 int flags;
699
700 flags = DETACH_QUIET;
701 if (!idleonly) flags |= DETACH_FORCE;
702
703 for (;;) {
704 mutex_enter(&pdk->dk_openlock);
705 LIST_FOREACH(sc, &pdk->dk_wedges, sc_plink) {
706 if (!idleonly || sc->sc_dk.dk_openmask == 0)
707 break;
708 }
709 if (sc == NULL) {
710 KASSERT(idleonly || pdk->dk_nwedges == 0);
711 mutex_exit(&pdk->dk_openlock);
712 return;
713 }
714 strlcpy(dkw.dkw_parent, pdk->dk_name, sizeof(dkw.dkw_parent));
715 strlcpy(dkw.dkw_devname, device_xname(sc->sc_dev),
716 sizeof(dkw.dkw_devname));
717 mutex_exit(&pdk->dk_openlock);
718 (void) dkwedge_del1(&dkw, flags);
719 }
720 }
721
722 /*
723 * dkwedge_list: [exported function]
724 *
725 * List all of the wedges on a particular disk.
726 */
727 int
728 dkwedge_list(struct disk *pdk, struct dkwedge_list *dkwl, struct lwp *l)
729 {
730 struct uio uio;
731 struct iovec iov;
732 struct dkwedge_softc *sc;
733 struct dkwedge_info dkw;
734 int error = 0;
735
736 iov.iov_base = dkwl->dkwl_buf;
737 iov.iov_len = dkwl->dkwl_bufsize;
738
739 uio.uio_iov = &iov;
740 uio.uio_iovcnt = 1;
741 uio.uio_offset = 0;
742 uio.uio_resid = dkwl->dkwl_bufsize;
743 uio.uio_rw = UIO_READ;
744 KASSERT(l == curlwp);
745 uio.uio_vmspace = l->l_proc->p_vmspace;
746
747 dkwl->dkwl_ncopied = 0;
748
749 mutex_enter(&pdk->dk_openlock);
750 LIST_FOREACH(sc, &pdk->dk_wedges, sc_plink) {
751 if (uio.uio_resid < sizeof(dkw))
752 break;
753
754 if (sc->sc_state != DKW_STATE_RUNNING)
755 continue;
756
757 strlcpy(dkw.dkw_devname, device_xname(sc->sc_dev),
758 sizeof(dkw.dkw_devname));
759 memcpy(dkw.dkw_wname, sc->sc_wname, sizeof(dkw.dkw_wname));
760 dkw.dkw_wname[sizeof(dkw.dkw_wname) - 1] = '\0';
761 strlcpy(dkw.dkw_parent, sc->sc_parent->dk_name,
762 sizeof(dkw.dkw_parent));
763 dkw.dkw_offset = sc->sc_offset;
764 dkw.dkw_size = sc->sc_size;
765 strlcpy(dkw.dkw_ptype, sc->sc_ptype, sizeof(dkw.dkw_ptype));
766
767 error = uiomove(&dkw, sizeof(dkw), &uio);
768 if (error)
769 break;
770 dkwl->dkwl_ncopied++;
771 }
772 dkwl->dkwl_nwedges = pdk->dk_nwedges;
773 mutex_exit(&pdk->dk_openlock);
774
775 return (error);
776 }
777
778 device_t
779 dkwedge_find_by_wname(const char *wname)
780 {
781 device_t dv = NULL;
782 struct dkwedge_softc *sc;
783 int i;
784
785 rw_enter(&dkwedges_lock, RW_WRITER);
786 for (i = 0; i < ndkwedges; i++) {
787 if ((sc = dkwedges[i]) == NULL)
788 continue;
789 if (strcmp(sc->sc_wname, wname) == 0) {
790 if (dv != NULL) {
791 printf(
792 "WARNING: double match for wedge name %s "
793 "(%s, %s)\n", wname, device_xname(dv),
794 device_xname(sc->sc_dev));
795 continue;
796 }
797 dv = sc->sc_dev;
798 }
799 }
800 rw_exit(&dkwedges_lock);
801 return dv;
802 }
803
804 device_t
805 dkwedge_find_by_parent(const char *name, size_t *i)
806 {
807 rw_enter(&dkwedges_lock, RW_WRITER);
808 for (; *i < (size_t)ndkwedges; (*i)++) {
809 struct dkwedge_softc *sc;
810 if ((sc = dkwedges[*i]) == NULL)
811 continue;
812 if (strcmp(sc->sc_parent->dk_name, name) != 0)
813 continue;
814 rw_exit(&dkwedges_lock);
815 return sc->sc_dev;
816 }
817 rw_exit(&dkwedges_lock);
818 return NULL;
819 }
820
821 void
822 dkwedge_print_wnames(void)
823 {
824 struct dkwedge_softc *sc;
825 int i;
826
827 rw_enter(&dkwedges_lock, RW_WRITER);
828 for (i = 0; i < ndkwedges; i++) {
829 if ((sc = dkwedges[i]) == NULL)
830 continue;
831 printf(" wedge:%s", sc->sc_wname);
832 }
833 rw_exit(&dkwedges_lock);
834 }
835
836 /*
837 * We need a dummy object to stuff into the dkwedge discovery method link
838 * set to ensure that there is always at least one object in the set.
839 */
840 static struct dkwedge_discovery_method dummy_discovery_method;
841 __link_set_add_bss(dkwedge_methods, dummy_discovery_method);
842
843 /*
844 * dkwedge_init:
845 *
846 * Initialize the disk wedge subsystem.
847 */
848 void
849 dkwedge_init(void)
850 {
851 __link_set_decl(dkwedge_methods, struct dkwedge_discovery_method);
852 struct dkwedge_discovery_method * const *ddmp;
853 struct dkwedge_discovery_method *lddm, *ddm;
854
855 rw_init(&dkwedges_lock);
856 rw_init(&dkwedge_discovery_methods_lock);
857
858 if (config_cfdriver_attach(&dk_cd) != 0)
859 panic("dkwedge: unable to attach cfdriver");
860 if (config_cfattach_attach(dk_cd.cd_name, &dk_ca) != 0)
861 panic("dkwedge: unable to attach cfattach");
862
863 rw_enter(&dkwedge_discovery_methods_lock, RW_WRITER);
864
865 LIST_INIT(&dkwedge_discovery_methods);
866
867 __link_set_foreach(ddmp, dkwedge_methods) {
868 ddm = *ddmp;
869 if (ddm == &dummy_discovery_method)
870 continue;
871 if (LIST_EMPTY(&dkwedge_discovery_methods)) {
872 LIST_INSERT_HEAD(&dkwedge_discovery_methods,
873 ddm, ddm_list);
874 continue;
875 }
876 LIST_FOREACH(lddm, &dkwedge_discovery_methods, ddm_list) {
877 if (ddm->ddm_priority == lddm->ddm_priority) {
878 aprint_error("dk-method-%s: method \"%s\" "
879 "already exists at priority %d\n",
880 ddm->ddm_name, lddm->ddm_name,
881 lddm->ddm_priority);
882 /* Not inserted. */
883 break;
884 }
885 if (ddm->ddm_priority < lddm->ddm_priority) {
886 /* Higher priority; insert before. */
887 LIST_INSERT_BEFORE(lddm, ddm, ddm_list);
888 break;
889 }
890 if (LIST_NEXT(lddm, ddm_list) == NULL) {
891 /* Last one; insert after. */
892 KASSERT(lddm->ddm_priority < ddm->ddm_priority);
893 LIST_INSERT_AFTER(lddm, ddm, ddm_list);
894 break;
895 }
896 }
897 }
898
899 rw_exit(&dkwedge_discovery_methods_lock);
900 }
901
902 #ifdef DKWEDGE_AUTODISCOVER
903 int dkwedge_autodiscover = 1;
904 #else
905 int dkwedge_autodiscover = 0;
906 #endif
907
908 /*
909 * dkwedge_discover: [exported function]
910 *
911 * Discover the wedges on a newly attached disk.
912 * Remove all unused wedges on the disk first.
913 */
914 void
915 dkwedge_discover(struct disk *pdk)
916 {
917 struct dkwedge_discovery_method *ddm;
918 struct vnode *vp;
919 int error;
920 dev_t pdev;
921
922 /*
923 * Require people playing with wedges to enable this explicitly.
924 */
925 if (dkwedge_autodiscover == 0)
926 return;
927
928 rw_enter(&dkwedge_discovery_methods_lock, RW_READER);
929
930 /*
931 * Use the character device for scanning, the block device
932 * is busy if there are already wedges attached.
933 */
934 error = dkwedge_compute_pdev(pdk->dk_name, &pdev, VCHR);
935 if (error) {
936 aprint_error("%s: unable to compute pdev, error = %d\n",
937 pdk->dk_name, error);
938 goto out;
939 }
940
941 error = cdevvp(pdev, &vp);
942 if (error) {
943 aprint_error("%s: unable to find vnode for pdev, error = %d\n",
944 pdk->dk_name, error);
945 goto out;
946 }
947
948 error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
949 if (error) {
950 aprint_error("%s: unable to lock vnode for pdev, error = %d\n",
951 pdk->dk_name, error);
952 vrele(vp);
953 goto out;
954 }
955
956 error = VOP_OPEN(vp, FREAD | FSILENT, NOCRED);
957 if (error) {
958 if (error != ENODEV)
959 aprint_error("%s: unable to open device, error = %d\n",
960 pdk->dk_name, error);
961 vput(vp);
962 goto out;
963 }
964 VOP_UNLOCK(vp);
965
966 /*
967 * Remove unused wedges
968 */
969 dkwedge_delall1(pdk, true);
970
971 /*
972 * For each supported partition map type, look to see if
973 * this map type exists. If so, parse it and add the
974 * corresponding wedges.
975 */
976 LIST_FOREACH(ddm, &dkwedge_discovery_methods, ddm_list) {
977 error = (*ddm->ddm_discover)(pdk, vp);
978 if (error == 0) {
979 /* Successfully created wedges; we're done. */
980 break;
981 }
982 }
983
984 error = vn_close(vp, FREAD, NOCRED);
985 if (error) {
986 aprint_error("%s: unable to close device, error = %d\n",
987 pdk->dk_name, error);
988 /* We'll just assume the vnode has been cleaned up. */
989 }
990
991 out:
992 rw_exit(&dkwedge_discovery_methods_lock);
993 }
994
995 /*
996 * dkwedge_read:
997 *
998 * Read some data from the specified disk, used for
999 * partition discovery.
1000 */
1001 int
1002 dkwedge_read(struct disk *pdk, struct vnode *vp, daddr_t blkno,
1003 void *tbuf, size_t len)
1004 {
1005 buf_t *bp;
1006 int error;
1007 bool isopen;
1008 dev_t bdev;
1009 struct vnode *bdvp;
1010
1011 /*
1012 * The kernel cannot read from a character device vnode
1013 * as physio() only handles user memory.
1014 *
1015 * If the block device has already been opened by a wedge
1016 * use that vnode and temporarily bump the open counter.
1017 *
1018 * Otherwise try to open the block device.
1019 */
1020
1021 bdev = devsw_chr2blk(vp->v_rdev);
1022
1023 mutex_enter(&pdk->dk_rawlock);
1024 if (pdk->dk_rawopens != 0) {
1025 KASSERT(pdk->dk_rawvp != NULL);
1026 isopen = true;
1027 ++pdk->dk_rawopens;
1028 bdvp = pdk->dk_rawvp;
1029 error = 0;
1030 } else {
1031 isopen = false;
1032 error = dk_open_parent(bdev, FREAD, &bdvp);
1033 }
1034 mutex_exit(&pdk->dk_rawlock);
1035
1036 if (error)
1037 return error;
1038
1039 bp = getiobuf(bdvp, true);
1040 bp->b_flags = B_READ;
1041 bp->b_cflags = BC_BUSY;
1042 bp->b_dev = bdev;
1043 bp->b_data = tbuf;
1044 bp->b_bufsize = bp->b_bcount = len;
1045 bp->b_blkno = blkno;
1046 bp->b_cylinder = 0;
1047 bp->b_error = 0;
1048
1049 VOP_STRATEGY(bdvp, bp);
1050 error = biowait(bp);
1051 putiobuf(bp);
1052
1053 mutex_enter(&pdk->dk_rawlock);
1054 if (isopen) {
1055 --pdk->dk_rawopens;
1056 } else {
1057 dk_close_parent(bdvp, FREAD);
1058 }
1059 mutex_exit(&pdk->dk_rawlock);
1060
1061 return error;
1062 }
1063
1064 /*
1065 * dkwedge_lookup:
1066 *
1067 * Look up a dkwedge_softc based on the provided dev_t.
1068 */
1069 static struct dkwedge_softc *
1070 dkwedge_lookup(dev_t dev)
1071 {
1072 int unit = minor(dev);
1073
1074 if (unit >= ndkwedges)
1075 return (NULL);
1076
1077 KASSERT(dkwedges != NULL);
1078
1079 return (dkwedges[unit]);
1080 }
1081
1082 static int
1083 dk_open_parent(dev_t dev, int mode, struct vnode **vpp)
1084 {
1085 struct vnode *vp;
1086 int error;
1087
1088 error = bdevvp(dev, &vp);
1089 if (error)
1090 return error;
1091
1092 error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1093 if (error) {
1094 vrele(vp);
1095 return error;
1096 }
1097 error = VOP_OPEN(vp, mode, NOCRED);
1098 if (error) {
1099 vput(vp);
1100 return error;
1101 }
1102
1103 /* VOP_OPEN() doesn't do this for us. */
1104 if (mode & FWRITE) {
1105 mutex_enter(vp->v_interlock);
1106 vp->v_writecount++;
1107 mutex_exit(vp->v_interlock);
1108 }
1109
1110 VOP_UNLOCK(vp);
1111
1112 *vpp = vp;
1113
1114 return 0;
1115 }
1116
1117 static int
1118 dk_close_parent(struct vnode *vp, int mode)
1119 {
1120 int error;
1121
1122 error = vn_close(vp, mode, NOCRED);
1123 return error;
1124 }
1125
1126 /*
1127 * dkopen: [devsw entry point]
1128 *
1129 * Open a wedge.
1130 */
1131 static int
1132 dkopen(dev_t dev, int flags, int fmt, struct lwp *l)
1133 {
1134 struct dkwedge_softc *sc = dkwedge_lookup(dev);
1135 struct dkwedge_softc *nsc;
1136 struct vnode *vp;
1137 int error = 0;
1138 int mode;
1139
1140 if (sc == NULL)
1141 return (ENODEV);
1142 if (sc->sc_state != DKW_STATE_RUNNING)
1143 return (ENXIO);
1144
1145 /*
1146 * We go through a complicated little dance to only open the parent
1147 * vnode once per wedge, no matter how many times the wedge is
1148 * opened. The reason? We see one dkopen() per open call, but
1149 * only dkclose() on the last close.
1150 */
1151 mutex_enter(&sc->sc_dk.dk_openlock);
1152 mutex_enter(&sc->sc_parent->dk_rawlock);
1153 if (sc->sc_dk.dk_openmask == 0) {
1154 if (sc->sc_parent->dk_rawopens == 0) {
1155 KASSERT(sc->sc_parent->dk_rawvp == NULL);
1156 /*
1157 * Try open read-write. If this fails for EROFS
1158 * and wedge is read-only, retry to open read-only.
1159 */
1160 mode = FREAD | FWRITE;
1161 error = dk_open_parent(sc->sc_pdev, mode, &vp);
1162 if (error == EROFS && (flags & FWRITE) == 0) {
1163 mode &= ~FWRITE;
1164 error = dk_open_parent(sc->sc_pdev, mode, &vp);
1165 }
1166 if (error)
1167 goto popen_fail;
1168 sc->sc_parent->dk_rawvp = vp;
1169 } else {
1170 /*
1171 * Retrieve mode from an already opened wedge.
1172 */
1173 mode = 0;
1174 LIST_FOREACH(nsc, &sc->sc_parent->dk_wedges, sc_plink) {
1175 if (nsc == sc || nsc->sc_dk.dk_openmask == 0)
1176 continue;
1177 mode = nsc->sc_mode;
1178 break;
1179 }
1180 }
1181 sc->sc_mode = mode;
1182 sc->sc_parent->dk_rawopens++;
1183 }
1184 KASSERT(sc->sc_mode != 0);
1185 if (flags & ~sc->sc_mode & FWRITE) {
1186 error = EROFS;
1187 goto popen_fail;
1188 }
1189 if (fmt == S_IFCHR)
1190 sc->sc_dk.dk_copenmask |= 1;
1191 else
1192 sc->sc_dk.dk_bopenmask |= 1;
1193 sc->sc_dk.dk_openmask =
1194 sc->sc_dk.dk_copenmask | sc->sc_dk.dk_bopenmask;
1195
1196 popen_fail:
1197 mutex_exit(&sc->sc_parent->dk_rawlock);
1198 mutex_exit(&sc->sc_dk.dk_openlock);
1199 return (error);
1200 }
1201
1202 static int
1203 dklastclose(struct dkwedge_softc *sc)
1204 {
1205 struct vnode *vp;
1206 int error = 0, mode;
1207
1208 KASSERT(mutex_owned(&sc->sc_dk.dk_openlock));
1209 KASSERT(mutex_owned(&sc->sc_parent->dk_rawlock));
1210
1211 mode = sc->sc_mode;
1212
1213 vp = NULL;
1214 if (sc->sc_parent->dk_rawopens > 0) {
1215 if (--sc->sc_parent->dk_rawopens == 0) {
1216 KASSERT(sc->sc_parent->dk_rawvp != NULL);
1217 vp = sc->sc_parent->dk_rawvp;
1218 sc->sc_parent->dk_rawvp = NULL;
1219 sc->sc_mode = 0;
1220 }
1221 }
1222
1223 if (vp) {
1224 dk_close_parent(vp, mode);
1225 }
1226
1227 return error;
1228 }
1229
1230 /*
1231 * dkclose: [devsw entry point]
1232 *
1233 * Close a wedge.
1234 */
1235 static int
1236 dkclose(dev_t dev, int flags, int fmt, struct lwp *l)
1237 {
1238 struct dkwedge_softc *sc = dkwedge_lookup(dev);
1239 int error = 0;
1240
1241 if (sc == NULL)
1242 return (ENODEV);
1243 if (sc->sc_state != DKW_STATE_RUNNING)
1244 return (ENXIO);
1245
1246 KASSERT(sc->sc_dk.dk_openmask != 0);
1247
1248 mutex_enter(&sc->sc_dk.dk_openlock);
1249
1250 if (fmt == S_IFCHR)
1251 sc->sc_dk.dk_copenmask &= ~1;
1252 else
1253 sc->sc_dk.dk_bopenmask &= ~1;
1254 sc->sc_dk.dk_openmask =
1255 sc->sc_dk.dk_copenmask | sc->sc_dk.dk_bopenmask;
1256
1257 if (sc->sc_dk.dk_openmask == 0) {
1258 mutex_enter(&sc->sc_parent->dk_rawlock);
1259 error = dklastclose(sc);
1260 mutex_exit(&sc->sc_parent->dk_rawlock);
1261 }
1262
1263 mutex_exit(&sc->sc_dk.dk_openlock);
1264
1265 return (error);
1266 }
1267
1268 /*
1269 * dkstragegy: [devsw entry point]
1270 *
1271 * Perform I/O based on the wedge I/O strategy.
1272 */
1273 static void
1274 dkstrategy(struct buf *bp)
1275 {
1276 struct dkwedge_softc *sc = dkwedge_lookup(bp->b_dev);
1277 uint64_t p_size, p_offset;
1278
1279 if (sc == NULL) {
1280 bp->b_error = ENODEV;
1281 goto done;
1282 }
1283
1284 if (sc->sc_state != DKW_STATE_RUNNING ||
1285 sc->sc_parent->dk_rawvp == NULL) {
1286 bp->b_error = ENXIO;
1287 goto done;
1288 }
1289
1290 /* If it's an empty transfer, wake up the top half now. */
1291 if (bp->b_bcount == 0)
1292 goto done;
1293
1294 p_offset = sc->sc_offset << sc->sc_parent->dk_blkshift;
1295 p_size = sc->sc_size << sc->sc_parent->dk_blkshift;
1296
1297 /* Make sure it's in-range. */
1298 if (bounds_check_with_mediasize(bp, DEV_BSIZE, p_size) <= 0)
1299 goto done;
1300
1301 /* Translate it to the parent's raw LBA. */
1302 bp->b_rawblkno = bp->b_blkno + p_offset;
1303
1304 /* Place it in the queue and start I/O on the unit. */
1305 mutex_enter(&sc->sc_iolock);
1306 sc->sc_iopend++;
1307 disk_wait(&sc->sc_dk);
1308 bufq_put(sc->sc_bufq, bp);
1309 mutex_exit(&sc->sc_iolock);
1310
1311 dkstart(sc);
1312 return;
1313
1314 done:
1315 bp->b_resid = bp->b_bcount;
1316 biodone(bp);
1317 }
1318
1319 /*
1320 * dkstart:
1321 *
1322 * Start I/O that has been enqueued on the wedge.
1323 */
1324 static void
1325 dkstart(struct dkwedge_softc *sc)
1326 {
1327 struct vnode *vp;
1328 struct buf *bp, *nbp;
1329
1330 mutex_enter(&sc->sc_iolock);
1331
1332 /* Do as much work as has been enqueued. */
1333 while ((bp = bufq_peek(sc->sc_bufq)) != NULL) {
1334 if (sc->sc_state != DKW_STATE_RUNNING) {
1335 (void) bufq_get(sc->sc_bufq);
1336 if (--sc->sc_iopend == 0)
1337 cv_broadcast(&sc->sc_dkdrn);
1338 mutex_exit(&sc->sc_iolock);
1339 bp->b_error = ENXIO;
1340 bp->b_resid = bp->b_bcount;
1341 biodone(bp);
1342 mutex_enter(&sc->sc_iolock);
1343 continue;
1344 }
1345
1346 /* fetch an I/O buf with sc_iolock dropped */
1347 mutex_exit(&sc->sc_iolock);
1348 nbp = getiobuf(sc->sc_parent->dk_rawvp, false);
1349 mutex_enter(&sc->sc_iolock);
1350 if (nbp == NULL) {
1351 /*
1352 * No resources to run this request; leave the
1353 * buffer queued up, and schedule a timer to
1354 * restart the queue in 1/2 a second.
1355 */
1356 callout_schedule(&sc->sc_restart_ch, hz / 2);
1357 break;
1358 }
1359
1360 /*
1361 * fetch buf, this can fail if another thread
1362 * has already processed the queue, it can also
1363 * return a completely different buf.
1364 */
1365 bp = bufq_get(sc->sc_bufq);
1366 if (bp == NULL) {
1367 mutex_exit(&sc->sc_iolock);
1368 putiobuf(nbp);
1369 mutex_enter(&sc->sc_iolock);
1370 continue;
1371 }
1372
1373 /* Instrumentation. */
1374 disk_busy(&sc->sc_dk);
1375
1376 /* release lock for VOP_STRATEGY */
1377 mutex_exit(&sc->sc_iolock);
1378
1379 nbp->b_data = bp->b_data;
1380 nbp->b_flags = bp->b_flags;
1381 nbp->b_oflags = bp->b_oflags;
1382 nbp->b_cflags = bp->b_cflags;
1383 nbp->b_iodone = dkiodone;
1384 nbp->b_proc = bp->b_proc;
1385 nbp->b_blkno = bp->b_rawblkno;
1386 nbp->b_dev = sc->sc_parent->dk_rawvp->v_rdev;
1387 nbp->b_bcount = bp->b_bcount;
1388 nbp->b_private = bp;
1389 BIO_COPYPRIO(nbp, bp);
1390
1391 vp = nbp->b_vp;
1392 if ((nbp->b_flags & B_READ) == 0) {
1393 mutex_enter(vp->v_interlock);
1394 vp->v_numoutput++;
1395 mutex_exit(vp->v_interlock);
1396 }
1397 VOP_STRATEGY(vp, nbp);
1398
1399 mutex_enter(&sc->sc_iolock);
1400 }
1401
1402 mutex_exit(&sc->sc_iolock);
1403 }
1404
1405 /*
1406 * dkiodone:
1407 *
1408 * I/O to a wedge has completed; alert the top half.
1409 */
1410 static void
1411 dkiodone(struct buf *bp)
1412 {
1413 struct buf *obp = bp->b_private;
1414 struct dkwedge_softc *sc = dkwedge_lookup(obp->b_dev);
1415
1416 if (bp->b_error != 0)
1417 obp->b_error = bp->b_error;
1418 obp->b_resid = bp->b_resid;
1419 putiobuf(bp);
1420
1421 mutex_enter(&sc->sc_iolock);
1422 if (--sc->sc_iopend == 0)
1423 cv_broadcast(&sc->sc_dkdrn);
1424
1425 disk_unbusy(&sc->sc_dk, obp->b_bcount - obp->b_resid,
1426 obp->b_flags & B_READ);
1427 mutex_exit(&sc->sc_iolock);
1428
1429 biodone(obp);
1430
1431 /* Kick the queue in case there is more work we can do. */
1432 dkstart(sc);
1433 }
1434
1435 /*
1436 * dkrestart:
1437 *
1438 * Restart the work queue after it was stalled due to
1439 * a resource shortage. Invoked via a callout.
1440 */
1441 static void
1442 dkrestart(void *v)
1443 {
1444 struct dkwedge_softc *sc = v;
1445
1446 dkstart(sc);
1447 }
1448
1449 /*
1450 * dkminphys:
1451 *
1452 * Call parent's minphys function.
1453 */
1454 static void
1455 dkminphys(struct buf *bp)
1456 {
1457 struct dkwedge_softc *sc = dkwedge_lookup(bp->b_dev);
1458 dev_t dev;
1459
1460 dev = bp->b_dev;
1461 bp->b_dev = sc->sc_pdev;
1462 if (sc->sc_parent->dk_driver && sc->sc_parent->dk_driver->d_minphys)
1463 (*sc->sc_parent->dk_driver->d_minphys)(bp);
1464 else
1465 minphys(bp);
1466 bp->b_dev = dev;
1467 }
1468
1469 /*
1470 * dkread: [devsw entry point]
1471 *
1472 * Read from a wedge.
1473 */
1474 static int
1475 dkread(dev_t dev, struct uio *uio, int flags)
1476 {
1477 struct dkwedge_softc *sc = dkwedge_lookup(dev);
1478
1479 if (sc == NULL)
1480 return (ENODEV);
1481 if (sc->sc_state != DKW_STATE_RUNNING)
1482 return (ENXIO);
1483
1484 return (physio(dkstrategy, NULL, dev, B_READ, dkminphys, uio));
1485 }
1486
1487 /*
1488 * dkwrite: [devsw entry point]
1489 *
1490 * Write to a wedge.
1491 */
1492 static int
1493 dkwrite(dev_t dev, struct uio *uio, int flags)
1494 {
1495 struct dkwedge_softc *sc = dkwedge_lookup(dev);
1496
1497 if (sc == NULL)
1498 return (ENODEV);
1499 if (sc->sc_state != DKW_STATE_RUNNING)
1500 return (ENXIO);
1501
1502 return (physio(dkstrategy, NULL, dev, B_WRITE, dkminphys, uio));
1503 }
1504
1505 /*
1506 * dkioctl: [devsw entry point]
1507 *
1508 * Perform an ioctl request on a wedge.
1509 */
1510 static int
1511 dkioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
1512 {
1513 struct dkwedge_softc *sc = dkwedge_lookup(dev);
1514 int error = 0;
1515
1516 if (sc == NULL)
1517 return (ENODEV);
1518 if (sc->sc_state != DKW_STATE_RUNNING)
1519 return (ENXIO);
1520 if (sc->sc_parent->dk_rawvp == NULL)
1521 return (ENXIO);
1522
1523 /*
1524 * We pass NODEV instead of our device to indicate we don't
1525 * want to handle disklabel ioctls
1526 */
1527 error = disk_ioctl(&sc->sc_dk, NODEV, cmd, data, flag, l);
1528 if (error != EPASSTHROUGH)
1529 return (error);
1530
1531 error = 0;
1532
1533 switch (cmd) {
1534 case DIOCGSTRATEGY:
1535 case DIOCGCACHE:
1536 case DIOCCACHESYNC:
1537 error = VOP_IOCTL(sc->sc_parent->dk_rawvp, cmd, data, flag,
1538 l != NULL ? l->l_cred : NOCRED);
1539 break;
1540 case DIOCGWEDGEINFO:
1541 {
1542 struct dkwedge_info *dkw = (void *) data;
1543
1544 strlcpy(dkw->dkw_devname, device_xname(sc->sc_dev),
1545 sizeof(dkw->dkw_devname));
1546 memcpy(dkw->dkw_wname, sc->sc_wname, sizeof(dkw->dkw_wname));
1547 dkw->dkw_wname[sizeof(dkw->dkw_wname) - 1] = '\0';
1548 strlcpy(dkw->dkw_parent, sc->sc_parent->dk_name,
1549 sizeof(dkw->dkw_parent));
1550 dkw->dkw_offset = sc->sc_offset;
1551 dkw->dkw_size = sc->sc_size;
1552 strlcpy(dkw->dkw_ptype, sc->sc_ptype, sizeof(dkw->dkw_ptype));
1553
1554 break;
1555 }
1556 case DIOCGSECTORALIGN:
1557 {
1558 struct disk_sectoralign *dsa = data;
1559 uint32_t r;
1560
1561 error = VOP_IOCTL(sc->sc_parent->dk_rawvp, cmd, dsa, flag,
1562 l != NULL ? l->l_cred : NOCRED);
1563 if (error)
1564 break;
1565
1566 r = sc->sc_offset % dsa->dsa_alignment;
1567 if (r < dsa->dsa_firstaligned)
1568 dsa->dsa_firstaligned = dsa->dsa_firstaligned - r;
1569 else
1570 dsa->dsa_firstaligned = (dsa->dsa_firstaligned +
1571 dsa->dsa_alignment) - r;
1572 break;
1573 }
1574 default:
1575 error = ENOTTY;
1576 }
1577
1578 return (error);
1579 }
1580
1581 /*
1582 * dkdiscard: [devsw entry point]
1583 *
1584 * Perform a discard-range request on a wedge.
1585 */
1586 static int
1587 dkdiscard(dev_t dev, off_t pos, off_t len)
1588 {
1589 struct dkwedge_softc *sc = dkwedge_lookup(dev);
1590 unsigned shift;
1591 off_t offset, maxlen;
1592 int error;
1593
1594 if (sc == NULL)
1595 return (ENODEV);
1596 if (sc->sc_state != DKW_STATE_RUNNING)
1597 return (ENXIO);
1598 if (sc->sc_parent->dk_rawvp == NULL)
1599 return (ENXIO);
1600
1601 shift = (sc->sc_parent->dk_blkshift + DEV_BSHIFT);
1602 KASSERT(__type_fit(off_t, sc->sc_size));
1603 KASSERT(__type_fit(off_t, sc->sc_offset));
1604 KASSERT(0 <= sc->sc_offset);
1605 KASSERT(sc->sc_size <= (__type_max(off_t) >> shift));
1606 KASSERT(sc->sc_offset <= ((__type_max(off_t) >> shift) - sc->sc_size));
1607 offset = ((off_t)sc->sc_offset << shift);
1608 maxlen = ((off_t)sc->sc_size << shift);
1609
1610 if (len > maxlen)
1611 return (EINVAL);
1612 if (pos > (maxlen - len))
1613 return (EINVAL);
1614
1615 pos += offset;
1616
1617 vn_lock(sc->sc_parent->dk_rawvp, LK_EXCLUSIVE | LK_RETRY);
1618 error = VOP_FDISCARD(sc->sc_parent->dk_rawvp, pos, len);
1619 VOP_UNLOCK(sc->sc_parent->dk_rawvp);
1620
1621 return error;
1622 }
1623
1624 /*
1625 * dksize: [devsw entry point]
1626 *
1627 * Query the size of a wedge for the purpose of performing a dump
1628 * or for swapping to.
1629 */
1630 static int
1631 dksize(dev_t dev)
1632 {
1633 struct dkwedge_softc *sc = dkwedge_lookup(dev);
1634 uint64_t p_size;
1635 int rv = -1;
1636
1637 if (sc == NULL)
1638 return (-1);
1639 if (sc->sc_state != DKW_STATE_RUNNING)
1640 return (-1);
1641
1642 mutex_enter(&sc->sc_dk.dk_openlock);
1643 mutex_enter(&sc->sc_parent->dk_rawlock);
1644
1645 /* Our content type is static, no need to open the device. */
1646
1647 p_size = sc->sc_size << sc->sc_parent->dk_blkshift;
1648 if (strcmp(sc->sc_ptype, DKW_PTYPE_SWAP) == 0) {
1649 /* Saturate if we are larger than INT_MAX. */
1650 if (p_size > INT_MAX)
1651 rv = INT_MAX;
1652 else
1653 rv = (int) p_size;
1654 }
1655
1656 mutex_exit(&sc->sc_parent->dk_rawlock);
1657 mutex_exit(&sc->sc_dk.dk_openlock);
1658
1659 return (rv);
1660 }
1661
1662 /*
1663 * dkdump: [devsw entry point]
1664 *
1665 * Perform a crash dump to a wedge.
1666 */
1667 static int
1668 dkdump(dev_t dev, daddr_t blkno, void *va, size_t size)
1669 {
1670 struct dkwedge_softc *sc = dkwedge_lookup(dev);
1671 const struct bdevsw *bdev;
1672 uint64_t p_size, p_offset;
1673 int rv = 0;
1674
1675 if (sc == NULL)
1676 return (ENODEV);
1677 if (sc->sc_state != DKW_STATE_RUNNING)
1678 return (ENXIO);
1679
1680 mutex_enter(&sc->sc_dk.dk_openlock);
1681 mutex_enter(&sc->sc_parent->dk_rawlock);
1682
1683 /* Our content type is static, no need to open the device. */
1684
1685 if (strcmp(sc->sc_ptype, DKW_PTYPE_SWAP) != 0 &&
1686 strcmp(sc->sc_ptype, DKW_PTYPE_RAID) != 0 &&
1687 strcmp(sc->sc_ptype, DKW_PTYPE_CGD) != 0) {
1688 rv = ENXIO;
1689 goto out;
1690 }
1691 if (size % DEV_BSIZE != 0) {
1692 rv = EINVAL;
1693 goto out;
1694 }
1695
1696 p_offset = sc->sc_offset << sc->sc_parent->dk_blkshift;
1697 p_size = sc->sc_size << sc->sc_parent->dk_blkshift;
1698
1699 if (blkno < 0 || blkno + size / DEV_BSIZE > p_size) {
1700 printf("%s: blkno (%" PRIu64 ") + size / DEV_BSIZE (%zu) > "
1701 "p_size (%" PRIu64 ")\n", __func__, blkno,
1702 size / DEV_BSIZE, p_size);
1703 rv = EINVAL;
1704 goto out;
1705 }
1706
1707 bdev = bdevsw_lookup(sc->sc_pdev);
1708 rv = (*bdev->d_dump)(sc->sc_pdev, blkno + p_offset, va, size);
1709
1710 out:
1711 mutex_exit(&sc->sc_parent->dk_rawlock);
1712 mutex_exit(&sc->sc_dk.dk_openlock);
1713
1714 return rv;
1715 }
1716
1717 /*
1718 * config glue
1719 */
1720
1721 /*
1722 * dkwedge_find_partition
1723 *
1724 * Find wedge corresponding to the specified parent name
1725 * and offset/length.
1726 */
1727 device_t
1728 dkwedge_find_partition(device_t parent, daddr_t startblk, uint64_t nblks)
1729 {
1730 struct dkwedge_softc *sc;
1731 int i;
1732 device_t wedge = NULL;
1733
1734 rw_enter(&dkwedges_lock, RW_READER);
1735 for (i = 0; i < ndkwedges; i++) {
1736 if ((sc = dkwedges[i]) == NULL)
1737 continue;
1738 if (strcmp(sc->sc_parent->dk_name, device_xname(parent)) == 0 &&
1739 sc->sc_offset == startblk &&
1740 sc->sc_size == nblks) {
1741 if (wedge) {
1742 printf("WARNING: double match for boot wedge "
1743 "(%s, %s)\n",
1744 device_xname(wedge),
1745 device_xname(sc->sc_dev));
1746 continue;
1747 }
1748 wedge = sc->sc_dev;
1749 }
1750 }
1751 rw_exit(&dkwedges_lock);
1752
1753 return wedge;
1754 }
1755
1756 const char *
1757 dkwedge_get_parent_name(dev_t dev)
1758 {
1759 /* XXX: perhaps do this in lookup? */
1760 int bmaj = bdevsw_lookup_major(&dk_bdevsw);
1761 int cmaj = cdevsw_lookup_major(&dk_cdevsw);
1762 if (major(dev) != bmaj && major(dev) != cmaj)
1763 return NULL;
1764 struct dkwedge_softc *sc = dkwedge_lookup(dev);
1765 if (sc == NULL)
1766 return NULL;
1767 return sc->sc_parent->dk_name;
1768 }
1769