dk.c revision 1.130 1 /* $NetBSD: dk.c,v 1.130 2023/04/21 18:24:47 riastradh Exp $ */
2
3 /*-
4 * Copyright (c) 2004, 2005, 2006, 2007 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 #include <sys/cdefs.h>
33 __KERNEL_RCSID(0, "$NetBSD: dk.c,v 1.130 2023/04/21 18:24:47 riastradh Exp $");
34
35 #ifdef _KERNEL_OPT
36 #include "opt_dkwedge.h"
37 #endif
38
39 #include <sys/param.h>
40 #include <sys/systm.h>
41 #include <sys/proc.h>
42 #include <sys/errno.h>
43 #include <sys/pool.h>
44 #include <sys/ioctl.h>
45 #include <sys/disklabel.h>
46 #include <sys/disk.h>
47 #include <sys/fcntl.h>
48 #include <sys/buf.h>
49 #include <sys/bufq.h>
50 #include <sys/vnode.h>
51 #include <sys/stat.h>
52 #include <sys/conf.h>
53 #include <sys/callout.h>
54 #include <sys/kernel.h>
55 #include <sys/malloc.h>
56 #include <sys/device.h>
57 #include <sys/kauth.h>
58
59 #include <miscfs/specfs/specdev.h>
60
61 MALLOC_DEFINE(M_DKWEDGE, "dkwedge", "Disk wedge structures");
62
63 typedef enum {
64 DKW_STATE_LARVAL = 0,
65 DKW_STATE_RUNNING = 1,
66 DKW_STATE_DYING = 2,
67 DKW_STATE_DEAD = 666
68 } dkwedge_state_t;
69
70 struct dkwedge_softc {
71 device_t sc_dev; /* pointer to our pseudo-device */
72 struct cfdata sc_cfdata; /* our cfdata structure */
73 uint8_t sc_wname[128]; /* wedge name (Unicode, UTF-8) */
74
75 dkwedge_state_t sc_state; /* state this wedge is in */
76
77 struct disk *sc_parent; /* parent disk */
78 daddr_t sc_offset; /* LBA offset of wedge in parent */
79 uint64_t sc_size; /* size of wedge in blocks */
80 char sc_ptype[32]; /* partition type */
81 dev_t sc_pdev; /* cached parent's dev_t */
82 /* link on parent's wedge list */
83 LIST_ENTRY(dkwedge_softc) sc_plink;
84
85 struct disk sc_dk; /* our own disk structure */
86 struct bufq_state *sc_bufq; /* buffer queue */
87 struct callout sc_restart_ch; /* callout to restart I/O */
88
89 kmutex_t sc_iolock;
90 kcondvar_t sc_dkdrn;
91 u_int sc_iopend; /* I/Os pending */
92 int sc_mode; /* parent open mode */
93 };
94
95 static void dkstart(struct dkwedge_softc *);
96 static void dkiodone(struct buf *);
97 static void dkrestart(void *);
98 static void dkminphys(struct buf *);
99
100 static int dkfirstopen(struct dkwedge_softc *, int);
101 static void dklastclose(struct dkwedge_softc *);
102 static int dkwedge_cleanup_parent(struct dkwedge_softc *, int);
103 static int dkwedge_detach(device_t, int);
104 static void dkwedge_delall1(struct disk *, bool);
105 static int dkwedge_del1(struct dkwedge_info *, int);
106 static int dk_open_parent(dev_t, int, struct vnode **);
107 static int dk_close_parent(struct vnode *, int);
108
109 static dev_type_open(dkopen);
110 static dev_type_close(dkclose);
111 static dev_type_read(dkread);
112 static dev_type_write(dkwrite);
113 static dev_type_ioctl(dkioctl);
114 static dev_type_strategy(dkstrategy);
115 static dev_type_dump(dkdump);
116 static dev_type_size(dksize);
117 static dev_type_discard(dkdiscard);
118
119 const struct bdevsw dk_bdevsw = {
120 .d_open = dkopen,
121 .d_close = dkclose,
122 .d_strategy = dkstrategy,
123 .d_ioctl = dkioctl,
124 .d_dump = dkdump,
125 .d_psize = dksize,
126 .d_discard = dkdiscard,
127 .d_flag = D_DISK | D_MPSAFE
128 };
129
130 const struct cdevsw dk_cdevsw = {
131 .d_open = dkopen,
132 .d_close = dkclose,
133 .d_read = dkread,
134 .d_write = dkwrite,
135 .d_ioctl = dkioctl,
136 .d_stop = nostop,
137 .d_tty = notty,
138 .d_poll = nopoll,
139 .d_mmap = nommap,
140 .d_kqfilter = nokqfilter,
141 .d_discard = dkdiscard,
142 .d_flag = D_DISK | D_MPSAFE
143 };
144
145 static struct dkwedge_softc **dkwedges;
146 static u_int ndkwedges;
147 static krwlock_t dkwedges_lock;
148
149 static LIST_HEAD(, dkwedge_discovery_method) dkwedge_discovery_methods;
150 static krwlock_t dkwedge_discovery_methods_lock;
151
152 /*
153 * dkwedge_match:
154 *
155 * Autoconfiguration match function for pseudo-device glue.
156 */
157 static int
158 dkwedge_match(device_t parent, cfdata_t match, void *aux)
159 {
160
161 /* Pseudo-device; always present. */
162 return 1;
163 }
164
165 /*
166 * dkwedge_attach:
167 *
168 * Autoconfiguration attach function for pseudo-device glue.
169 */
170 static void
171 dkwedge_attach(device_t parent, device_t self, void *aux)
172 {
173
174 if (!pmf_device_register(self, NULL, NULL))
175 aprint_error_dev(self, "couldn't establish power handler\n");
176 }
177
178 CFDRIVER_DECL(dk, DV_DISK, NULL);
179 CFATTACH_DECL3_NEW(dk, 0,
180 dkwedge_match, dkwedge_attach, dkwedge_detach, NULL, NULL, NULL,
181 DVF_DETACH_SHUTDOWN);
182
183 /*
184 * dkwedge_wait_drain:
185 *
186 * Wait for I/O on the wedge to drain.
187 */
188 static void
189 dkwedge_wait_drain(struct dkwedge_softc *sc)
190 {
191
192 mutex_enter(&sc->sc_iolock);
193 while (sc->sc_iopend != 0)
194 cv_wait(&sc->sc_dkdrn, &sc->sc_iolock);
195 mutex_exit(&sc->sc_iolock);
196 }
197
198 /*
199 * dkwedge_compute_pdev:
200 *
201 * Compute the parent disk's dev_t.
202 */
203 static int
204 dkwedge_compute_pdev(const char *pname, dev_t *pdevp, enum vtype type)
205 {
206 const char *name, *cp;
207 devmajor_t pmaj;
208 int punit;
209 char devname[16];
210
211 name = pname;
212 switch (type) {
213 case VBLK:
214 pmaj = devsw_name2blk(name, devname, sizeof(devname));
215 break;
216 case VCHR:
217 pmaj = devsw_name2chr(name, devname, sizeof(devname));
218 break;
219 default:
220 pmaj = NODEVMAJOR;
221 break;
222 }
223 if (pmaj == NODEVMAJOR)
224 return ENODEV;
225
226 name += strlen(devname);
227 for (cp = name, punit = 0; *cp >= '0' && *cp <= '9'; cp++)
228 punit = (punit * 10) + (*cp - '0');
229 if (cp == name) {
230 /* Invalid parent disk name. */
231 return ENODEV;
232 }
233
234 *pdevp = MAKEDISKDEV(pmaj, punit, RAW_PART);
235
236 return 0;
237 }
238
239 /*
240 * dkwedge_array_expand:
241 *
242 * Expand the dkwedges array.
243 *
244 * Releases and reacquires dkwedges_lock as a writer.
245 */
246 static int
247 dkwedge_array_expand(void)
248 {
249
250 const unsigned incr = 16;
251 unsigned newcnt, oldcnt;
252 struct dkwedge_softc **newarray = NULL, **oldarray = NULL;
253
254 KASSERT(rw_write_held(&dkwedges_lock));
255
256 oldcnt = ndkwedges;
257 oldarray = dkwedges;
258
259 if (oldcnt >= INT_MAX - incr)
260 return ENFILE; /* XXX */
261 newcnt = oldcnt + incr;
262
263 rw_exit(&dkwedges_lock);
264 newarray = malloc(newcnt * sizeof(*newarray), M_DKWEDGE,
265 M_WAITOK|M_ZERO);
266 rw_enter(&dkwedges_lock, RW_WRITER);
267
268 if (ndkwedges != oldcnt || dkwedges != oldarray) {
269 oldarray = NULL; /* already recycled */
270 goto out;
271 }
272
273 if (oldarray != NULL)
274 memcpy(newarray, dkwedges, ndkwedges * sizeof(*newarray));
275 dkwedges = newarray;
276 newarray = NULL; /* transferred to dkwedges */
277 ndkwedges = newcnt;
278
279 out: rw_exit(&dkwedges_lock);
280 if (oldarray != NULL)
281 free(oldarray, M_DKWEDGE);
282 if (newarray != NULL)
283 free(newarray, M_DKWEDGE);
284 rw_enter(&dkwedges_lock, RW_WRITER);
285 return 0;
286 }
287
288 static void
289 dk_set_geometry(struct dkwedge_softc *sc, struct disk *pdk)
290 {
291 struct disk *dk = &sc->sc_dk;
292 struct disk_geom *dg = &dk->dk_geom;
293
294 memset(dg, 0, sizeof(*dg));
295
296 dg->dg_secperunit = sc->sc_size;
297 dg->dg_secsize = DEV_BSIZE << pdk->dk_blkshift;
298
299 /* fake numbers, 1 cylinder is 1 MB with default sector size */
300 dg->dg_nsectors = 32;
301 dg->dg_ntracks = 64;
302 dg->dg_ncylinders =
303 dg->dg_secperunit / (dg->dg_nsectors * dg->dg_ntracks);
304
305 disk_set_info(sc->sc_dev, dk, NULL);
306 }
307
308 /*
309 * dkwedge_add: [exported function]
310 *
311 * Add a disk wedge based on the provided information.
312 *
313 * The incoming dkw_devname[] is ignored, instead being
314 * filled in and returned to the caller.
315 */
316 int
317 dkwedge_add(struct dkwedge_info *dkw)
318 {
319 struct dkwedge_softc *sc, *lsc;
320 struct disk *pdk;
321 u_int unit;
322 int error;
323 dev_t pdev;
324
325 dkw->dkw_parent[sizeof(dkw->dkw_parent) - 1] = '\0';
326 pdk = disk_find(dkw->dkw_parent);
327 if (pdk == NULL)
328 return ENODEV;
329
330 error = dkwedge_compute_pdev(pdk->dk_name, &pdev, VBLK);
331 if (error)
332 return error;
333
334 if (dkw->dkw_offset < 0)
335 return EINVAL;
336
337 /*
338 * Check for an existing wedge at the same disk offset. Allow
339 * updating a wedge if the only change is the size, and the new
340 * size is larger than the old.
341 */
342 sc = NULL;
343 mutex_enter(&pdk->dk_openlock);
344 LIST_FOREACH(lsc, &pdk->dk_wedges, sc_plink) {
345 if (lsc->sc_offset != dkw->dkw_offset)
346 continue;
347 if (strcmp(lsc->sc_wname, dkw->dkw_wname) != 0)
348 break;
349 if (strcmp(lsc->sc_ptype, dkw->dkw_ptype) != 0)
350 break;
351 if (lsc->sc_size > dkw->dkw_size)
352 break;
353
354 sc = lsc;
355 sc->sc_size = dkw->dkw_size;
356 dk_set_geometry(sc, pdk);
357
358 break;
359 }
360 mutex_exit(&pdk->dk_openlock);
361
362 if (sc != NULL)
363 goto announce;
364
365 sc = malloc(sizeof(*sc), M_DKWEDGE, M_WAITOK|M_ZERO);
366 sc->sc_state = DKW_STATE_LARVAL;
367 sc->sc_parent = pdk;
368 sc->sc_pdev = pdev;
369 sc->sc_offset = dkw->dkw_offset;
370 sc->sc_size = dkw->dkw_size;
371
372 memcpy(sc->sc_wname, dkw->dkw_wname, sizeof(sc->sc_wname));
373 sc->sc_wname[sizeof(sc->sc_wname) - 1] = '\0';
374
375 memcpy(sc->sc_ptype, dkw->dkw_ptype, sizeof(sc->sc_ptype));
376 sc->sc_ptype[sizeof(sc->sc_ptype) - 1] = '\0';
377
378 bufq_alloc(&sc->sc_bufq, "fcfs", 0);
379
380 callout_init(&sc->sc_restart_ch, 0);
381 callout_setfunc(&sc->sc_restart_ch, dkrestart, sc);
382
383 mutex_init(&sc->sc_iolock, MUTEX_DEFAULT, IPL_BIO);
384 cv_init(&sc->sc_dkdrn, "dkdrn");
385
386 /*
387 * Wedge will be added; increment the wedge count for the parent.
388 * Only allow this to happen if RAW_PART is the only thing open.
389 */
390 mutex_enter(&pdk->dk_openlock);
391 if (pdk->dk_openmask & ~(1 << RAW_PART))
392 error = EBUSY;
393 else {
394 /* Check for wedge overlap. */
395 LIST_FOREACH(lsc, &pdk->dk_wedges, sc_plink) {
396 daddr_t lastblk = sc->sc_offset + sc->sc_size - 1;
397 daddr_t llastblk = lsc->sc_offset + lsc->sc_size - 1;
398
399 if (sc->sc_offset >= lsc->sc_offset &&
400 sc->sc_offset <= llastblk) {
401 /* Overlaps the tail of the existing wedge. */
402 break;
403 }
404 if (lastblk >= lsc->sc_offset &&
405 lastblk <= llastblk) {
406 /* Overlaps the head of the existing wedge. */
407 break;
408 }
409 }
410 if (lsc != NULL) {
411 if (sc->sc_offset == lsc->sc_offset &&
412 sc->sc_size == lsc->sc_size &&
413 strcmp(sc->sc_wname, lsc->sc_wname) == 0)
414 error = EEXIST;
415 else
416 error = EINVAL;
417 } else {
418 pdk->dk_nwedges++;
419 LIST_INSERT_HEAD(&pdk->dk_wedges, sc, sc_plink);
420 }
421 }
422 mutex_exit(&pdk->dk_openlock);
423 if (error) {
424 cv_destroy(&sc->sc_dkdrn);
425 mutex_destroy(&sc->sc_iolock);
426 bufq_free(sc->sc_bufq);
427 free(sc, M_DKWEDGE);
428 return error;
429 }
430
431 /* Fill in our cfdata for the pseudo-device glue. */
432 sc->sc_cfdata.cf_name = dk_cd.cd_name;
433 sc->sc_cfdata.cf_atname = dk_ca.ca_name;
434 /* sc->sc_cfdata.cf_unit set below */
435 sc->sc_cfdata.cf_fstate = FSTATE_STAR;
436
437 /* Insert the larval wedge into the array. */
438 rw_enter(&dkwedges_lock, RW_WRITER);
439 for (error = 0;;) {
440 struct dkwedge_softc **scpp;
441
442 /*
443 * Check for a duplicate wname while searching for
444 * a slot.
445 */
446 for (scpp = NULL, unit = 0; unit < ndkwedges; unit++) {
447 if (dkwedges[unit] == NULL) {
448 if (scpp == NULL) {
449 scpp = &dkwedges[unit];
450 sc->sc_cfdata.cf_unit = unit;
451 }
452 } else {
453 /* XXX Unicode. */
454 if (strcmp(dkwedges[unit]->sc_wname,
455 sc->sc_wname) == 0) {
456 error = EEXIST;
457 break;
458 }
459 }
460 }
461 if (error)
462 break;
463 KASSERT(unit == ndkwedges);
464 if (scpp == NULL) {
465 error = dkwedge_array_expand();
466 if (error)
467 break;
468 } else {
469 KASSERT(scpp == &dkwedges[sc->sc_cfdata.cf_unit]);
470 *scpp = sc;
471 break;
472 }
473 }
474 rw_exit(&dkwedges_lock);
475 if (error) {
476 mutex_enter(&pdk->dk_openlock);
477 pdk->dk_nwedges--;
478 LIST_REMOVE(sc, sc_plink);
479 mutex_exit(&pdk->dk_openlock);
480
481 cv_destroy(&sc->sc_dkdrn);
482 mutex_destroy(&sc->sc_iolock);
483 bufq_free(sc->sc_bufq);
484 free(sc, M_DKWEDGE);
485 return error;
486 }
487
488 /*
489 * Now that we know the unit #, attach a pseudo-device for
490 * this wedge instance. This will provide us with the
491 * device_t necessary for glue to other parts of the system.
492 *
493 * This should never fail, unless we're almost totally out of
494 * memory.
495 */
496 if ((sc->sc_dev = config_attach_pseudo(&sc->sc_cfdata)) == NULL) {
497 aprint_error("%s%u: unable to attach pseudo-device\n",
498 sc->sc_cfdata.cf_name, sc->sc_cfdata.cf_unit);
499
500 rw_enter(&dkwedges_lock, RW_WRITER);
501 dkwedges[sc->sc_cfdata.cf_unit] = NULL;
502 rw_exit(&dkwedges_lock);
503
504 mutex_enter(&pdk->dk_openlock);
505 pdk->dk_nwedges--;
506 LIST_REMOVE(sc, sc_plink);
507 mutex_exit(&pdk->dk_openlock);
508
509 cv_destroy(&sc->sc_dkdrn);
510 mutex_destroy(&sc->sc_iolock);
511 bufq_free(sc->sc_bufq);
512 free(sc, M_DKWEDGE);
513 return ENOMEM;
514 }
515
516 /*
517 * XXX Really ought to make the disk_attach() and the changing
518 * of state to RUNNING atomic.
519 */
520
521 disk_init(&sc->sc_dk, device_xname(sc->sc_dev), NULL);
522 dk_set_geometry(sc, pdk);
523 disk_attach(&sc->sc_dk);
524
525 /* Disk wedge is ready for use! */
526 sc->sc_state = DKW_STATE_RUNNING;
527
528 announce:
529 /* Announce our arrival. */
530 aprint_normal(
531 "%s at %s: \"%s\", %"PRIu64" blocks at %"PRId64", type: %s\n",
532 device_xname(sc->sc_dev), pdk->dk_name,
533 sc->sc_wname, /* XXX Unicode */
534 sc->sc_size, sc->sc_offset,
535 sc->sc_ptype[0] == '\0' ? "<unknown>" : sc->sc_ptype);
536
537 /* Return the devname to the caller. */
538 strlcpy(dkw->dkw_devname, device_xname(sc->sc_dev),
539 sizeof(dkw->dkw_devname));
540
541 return 0;
542 }
543
544 /*
545 * dkwedge_find:
546 *
547 * Lookup a disk wedge based on the provided information.
548 * NOTE: We look up the wedge based on the wedge devname,
549 * not wname.
550 *
551 * Return NULL if the wedge is not found, otherwise return
552 * the wedge's softc. Assign the wedge's unit number to unitp
553 * if unitp is not NULL.
554 */
555 static struct dkwedge_softc *
556 dkwedge_find(struct dkwedge_info *dkw, u_int *unitp)
557 {
558 struct dkwedge_softc *sc = NULL;
559 u_int unit;
560
561 /* Find our softc. */
562 dkw->dkw_devname[sizeof(dkw->dkw_devname) - 1] = '\0';
563 rw_enter(&dkwedges_lock, RW_READER);
564 for (unit = 0; unit < ndkwedges; unit++) {
565 if ((sc = dkwedges[unit]) != NULL &&
566 strcmp(device_xname(sc->sc_dev), dkw->dkw_devname) == 0 &&
567 strcmp(sc->sc_parent->dk_name, dkw->dkw_parent) == 0) {
568 break;
569 }
570 }
571 rw_exit(&dkwedges_lock);
572 if (unit == ndkwedges)
573 return NULL;
574
575 if (unitp != NULL)
576 *unitp = unit;
577
578 return sc;
579 }
580
581 /*
582 * dkwedge_del: [exported function]
583 *
584 * Delete a disk wedge based on the provided information.
585 * NOTE: We look up the wedge based on the wedge devname,
586 * not wname.
587 */
588 int
589 dkwedge_del(struct dkwedge_info *dkw)
590 {
591
592 return dkwedge_del1(dkw, 0);
593 }
594
595 int
596 dkwedge_del1(struct dkwedge_info *dkw, int flags)
597 {
598 struct dkwedge_softc *sc = NULL;
599
600 /* Find our softc. */
601 if ((sc = dkwedge_find(dkw, NULL)) == NULL)
602 return ESRCH;
603
604 return config_detach(sc->sc_dev, flags);
605 }
606
607 static int
608 dkwedge_cleanup_parent(struct dkwedge_softc *sc, int flags)
609 {
610 struct disk *dk = &sc->sc_dk;
611 int rc;
612
613 rc = 0;
614 mutex_enter(&dk->dk_openlock);
615 if (dk->dk_openmask == 0) {
616 /* nothing to do */
617 } else if ((flags & DETACH_FORCE) == 0) {
618 rc = EBUSY;
619 } else {
620 mutex_enter(&sc->sc_parent->dk_rawlock);
621 dklastclose(sc);
622 mutex_exit(&sc->sc_parent->dk_rawlock);
623 }
624 mutex_exit(&sc->sc_dk.dk_openlock);
625
626 return rc;
627 }
628
629 /*
630 * dkwedge_detach:
631 *
632 * Autoconfiguration detach function for pseudo-device glue.
633 */
634 static int
635 dkwedge_detach(device_t self, int flags)
636 {
637 struct dkwedge_softc *sc = NULL;
638 u_int unit;
639 int bmaj, cmaj, rc;
640
641 rw_enter(&dkwedges_lock, RW_WRITER);
642 for (unit = 0; unit < ndkwedges; unit++) {
643 if ((sc = dkwedges[unit]) != NULL && sc->sc_dev == self)
644 break;
645 }
646 if (unit == ndkwedges)
647 rc = ENXIO;
648 else if ((rc = dkwedge_cleanup_parent(sc, flags)) == 0) {
649 /* Mark the wedge as dying. */
650 sc->sc_state = DKW_STATE_DYING;
651 }
652 rw_exit(&dkwedges_lock);
653
654 if (rc != 0)
655 return rc;
656
657 pmf_device_deregister(self);
658
659 /* Locate the wedge major numbers. */
660 bmaj = bdevsw_lookup_major(&dk_bdevsw);
661 cmaj = cdevsw_lookup_major(&dk_cdevsw);
662
663 /* Kill any pending restart. */
664 callout_stop(&sc->sc_restart_ch);
665
666 /*
667 * dkstart() will kill any queued buffers now that the
668 * state of the wedge is not RUNNING. Once we've done
669 * that, wait for any other pending I/O to complete.
670 */
671 dkstart(sc);
672 dkwedge_wait_drain(sc);
673
674 /* Nuke the vnodes for any open instances. */
675 vdevgone(bmaj, unit, unit, VBLK);
676 vdevgone(cmaj, unit, unit, VCHR);
677
678 /* Clean up the parent. */
679 dkwedge_cleanup_parent(sc, flags | DETACH_FORCE);
680
681 /* Announce our departure. */
682 aprint_normal("%s at %s (%s) deleted\n", device_xname(sc->sc_dev),
683 sc->sc_parent->dk_name,
684 sc->sc_wname); /* XXX Unicode */
685
686 mutex_enter(&sc->sc_parent->dk_openlock);
687 sc->sc_parent->dk_nwedges--;
688 LIST_REMOVE(sc, sc_plink);
689 mutex_exit(&sc->sc_parent->dk_openlock);
690
691 /* Delete our buffer queue. */
692 bufq_free(sc->sc_bufq);
693
694 /* Detach from the disk list. */
695 disk_detach(&sc->sc_dk);
696 disk_destroy(&sc->sc_dk);
697
698 /* Poof. */
699 rw_enter(&dkwedges_lock, RW_WRITER);
700 dkwedges[unit] = NULL;
701 sc->sc_state = DKW_STATE_DEAD;
702 rw_exit(&dkwedges_lock);
703
704 mutex_destroy(&sc->sc_iolock);
705 cv_destroy(&sc->sc_dkdrn);
706
707 free(sc, M_DKWEDGE);
708
709 return 0;
710 }
711
712 /*
713 * dkwedge_delall: [exported function]
714 *
715 * Delete all of the wedges on the specified disk. Used when
716 * a disk is being detached.
717 */
718 void
719 dkwedge_delall(struct disk *pdk)
720 {
721
722 dkwedge_delall1(pdk, false);
723 }
724
725 static void
726 dkwedge_delall1(struct disk *pdk, bool idleonly)
727 {
728 struct dkwedge_info dkw;
729 struct dkwedge_softc *sc;
730 int flags;
731
732 flags = DETACH_QUIET;
733 if (!idleonly)
734 flags |= DETACH_FORCE;
735
736 for (;;) {
737 mutex_enter(&pdk->dk_openlock);
738 LIST_FOREACH(sc, &pdk->dk_wedges, sc_plink) {
739 if (!idleonly || sc->sc_dk.dk_openmask == 0)
740 break;
741 }
742 if (sc == NULL) {
743 KASSERT(idleonly || pdk->dk_nwedges == 0);
744 mutex_exit(&pdk->dk_openlock);
745 return;
746 }
747 strlcpy(dkw.dkw_parent, pdk->dk_name, sizeof(dkw.dkw_parent));
748 strlcpy(dkw.dkw_devname, device_xname(sc->sc_dev),
749 sizeof(dkw.dkw_devname));
750 mutex_exit(&pdk->dk_openlock);
751 (void) dkwedge_del1(&dkw, flags);
752 }
753 }
754
755 /*
756 * dkwedge_list: [exported function]
757 *
758 * List all of the wedges on a particular disk.
759 */
760 int
761 dkwedge_list(struct disk *pdk, struct dkwedge_list *dkwl, struct lwp *l)
762 {
763 struct uio uio;
764 struct iovec iov;
765 struct dkwedge_softc *sc;
766 struct dkwedge_info dkw;
767 int error = 0;
768
769 iov.iov_base = dkwl->dkwl_buf;
770 iov.iov_len = dkwl->dkwl_bufsize;
771
772 uio.uio_iov = &iov;
773 uio.uio_iovcnt = 1;
774 uio.uio_offset = 0;
775 uio.uio_resid = dkwl->dkwl_bufsize;
776 uio.uio_rw = UIO_READ;
777 KASSERT(l == curlwp);
778 uio.uio_vmspace = l->l_proc->p_vmspace;
779
780 dkwl->dkwl_ncopied = 0;
781
782 mutex_enter(&pdk->dk_openlock);
783 LIST_FOREACH(sc, &pdk->dk_wedges, sc_plink) {
784 if (uio.uio_resid < sizeof(dkw))
785 break;
786
787 if (sc->sc_state != DKW_STATE_RUNNING)
788 continue;
789
790 strlcpy(dkw.dkw_devname, device_xname(sc->sc_dev),
791 sizeof(dkw.dkw_devname));
792 memcpy(dkw.dkw_wname, sc->sc_wname, sizeof(dkw.dkw_wname));
793 dkw.dkw_wname[sizeof(dkw.dkw_wname) - 1] = '\0';
794 strlcpy(dkw.dkw_parent, sc->sc_parent->dk_name,
795 sizeof(dkw.dkw_parent));
796 dkw.dkw_offset = sc->sc_offset;
797 dkw.dkw_size = sc->sc_size;
798 strlcpy(dkw.dkw_ptype, sc->sc_ptype, sizeof(dkw.dkw_ptype));
799
800 error = uiomove(&dkw, sizeof(dkw), &uio);
801 if (error)
802 break;
803 dkwl->dkwl_ncopied++;
804 }
805 dkwl->dkwl_nwedges = pdk->dk_nwedges;
806 mutex_exit(&pdk->dk_openlock);
807
808 return error;
809 }
810
811 device_t
812 dkwedge_find_by_wname(const char *wname)
813 {
814 device_t dv = NULL;
815 struct dkwedge_softc *sc;
816 int i;
817
818 rw_enter(&dkwedges_lock, RW_WRITER);
819 for (i = 0; i < ndkwedges; i++) {
820 if ((sc = dkwedges[i]) == NULL)
821 continue;
822 if (strcmp(sc->sc_wname, wname) == 0) {
823 if (dv != NULL) {
824 printf(
825 "WARNING: double match for wedge name %s "
826 "(%s, %s)\n", wname, device_xname(dv),
827 device_xname(sc->sc_dev));
828 continue;
829 }
830 dv = sc->sc_dev;
831 }
832 }
833 rw_exit(&dkwedges_lock);
834 return dv;
835 }
836
837 device_t
838 dkwedge_find_by_parent(const char *name, size_t *i)
839 {
840
841 rw_enter(&dkwedges_lock, RW_WRITER);
842 for (; *i < (size_t)ndkwedges; (*i)++) {
843 struct dkwedge_softc *sc;
844 if ((sc = dkwedges[*i]) == NULL)
845 continue;
846 if (strcmp(sc->sc_parent->dk_name, name) != 0)
847 continue;
848 rw_exit(&dkwedges_lock);
849 return sc->sc_dev;
850 }
851 rw_exit(&dkwedges_lock);
852 return NULL;
853 }
854
855 void
856 dkwedge_print_wnames(void)
857 {
858 struct dkwedge_softc *sc;
859 int i;
860
861 rw_enter(&dkwedges_lock, RW_WRITER);
862 for (i = 0; i < ndkwedges; i++) {
863 if ((sc = dkwedges[i]) == NULL)
864 continue;
865 printf(" wedge:%s", sc->sc_wname);
866 }
867 rw_exit(&dkwedges_lock);
868 }
869
870 /*
871 * We need a dummy object to stuff into the dkwedge discovery method link
872 * set to ensure that there is always at least one object in the set.
873 */
874 static struct dkwedge_discovery_method dummy_discovery_method;
875 __link_set_add_bss(dkwedge_methods, dummy_discovery_method);
876
877 /*
878 * dkwedge_init:
879 *
880 * Initialize the disk wedge subsystem.
881 */
882 void
883 dkwedge_init(void)
884 {
885 __link_set_decl(dkwedge_methods, struct dkwedge_discovery_method);
886 struct dkwedge_discovery_method * const *ddmp;
887 struct dkwedge_discovery_method *lddm, *ddm;
888
889 rw_init(&dkwedges_lock);
890 rw_init(&dkwedge_discovery_methods_lock);
891
892 if (config_cfdriver_attach(&dk_cd) != 0)
893 panic("dkwedge: unable to attach cfdriver");
894 if (config_cfattach_attach(dk_cd.cd_name, &dk_ca) != 0)
895 panic("dkwedge: unable to attach cfattach");
896
897 rw_enter(&dkwedge_discovery_methods_lock, RW_WRITER);
898
899 LIST_INIT(&dkwedge_discovery_methods);
900
901 __link_set_foreach(ddmp, dkwedge_methods) {
902 ddm = *ddmp;
903 if (ddm == &dummy_discovery_method)
904 continue;
905 if (LIST_EMPTY(&dkwedge_discovery_methods)) {
906 LIST_INSERT_HEAD(&dkwedge_discovery_methods,
907 ddm, ddm_list);
908 continue;
909 }
910 LIST_FOREACH(lddm, &dkwedge_discovery_methods, ddm_list) {
911 if (ddm->ddm_priority == lddm->ddm_priority) {
912 aprint_error("dk-method-%s: method \"%s\" "
913 "already exists at priority %d\n",
914 ddm->ddm_name, lddm->ddm_name,
915 lddm->ddm_priority);
916 /* Not inserted. */
917 break;
918 }
919 if (ddm->ddm_priority < lddm->ddm_priority) {
920 /* Higher priority; insert before. */
921 LIST_INSERT_BEFORE(lddm, ddm, ddm_list);
922 break;
923 }
924 if (LIST_NEXT(lddm, ddm_list) == NULL) {
925 /* Last one; insert after. */
926 KASSERT(lddm->ddm_priority < ddm->ddm_priority);
927 LIST_INSERT_AFTER(lddm, ddm, ddm_list);
928 break;
929 }
930 }
931 }
932
933 rw_exit(&dkwedge_discovery_methods_lock);
934 }
935
936 #ifdef DKWEDGE_AUTODISCOVER
937 int dkwedge_autodiscover = 1;
938 #else
939 int dkwedge_autodiscover = 0;
940 #endif
941
942 /*
943 * dkwedge_discover: [exported function]
944 *
945 * Discover the wedges on a newly attached disk.
946 * Remove all unused wedges on the disk first.
947 */
948 void
949 dkwedge_discover(struct disk *pdk)
950 {
951 struct dkwedge_discovery_method *ddm;
952 struct vnode *vp;
953 int error;
954 dev_t pdev;
955
956 /*
957 * Require people playing with wedges to enable this explicitly.
958 */
959 if (dkwedge_autodiscover == 0)
960 return;
961
962 rw_enter(&dkwedge_discovery_methods_lock, RW_READER);
963
964 /*
965 * Use the character device for scanning, the block device
966 * is busy if there are already wedges attached.
967 */
968 error = dkwedge_compute_pdev(pdk->dk_name, &pdev, VCHR);
969 if (error) {
970 aprint_error("%s: unable to compute pdev, error = %d\n",
971 pdk->dk_name, error);
972 goto out;
973 }
974
975 error = cdevvp(pdev, &vp);
976 if (error) {
977 aprint_error("%s: unable to find vnode for pdev, error = %d\n",
978 pdk->dk_name, error);
979 goto out;
980 }
981
982 error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
983 if (error) {
984 aprint_error("%s: unable to lock vnode for pdev, error = %d\n",
985 pdk->dk_name, error);
986 vrele(vp);
987 goto out;
988 }
989
990 error = VOP_OPEN(vp, FREAD | FSILENT, NOCRED);
991 if (error) {
992 if (error != ENODEV)
993 aprint_error("%s: unable to open device, error = %d\n",
994 pdk->dk_name, error);
995 vput(vp);
996 goto out;
997 }
998 VOP_UNLOCK(vp);
999
1000 /*
1001 * Remove unused wedges
1002 */
1003 dkwedge_delall1(pdk, true);
1004
1005 /*
1006 * For each supported partition map type, look to see if
1007 * this map type exists. If so, parse it and add the
1008 * corresponding wedges.
1009 */
1010 LIST_FOREACH(ddm, &dkwedge_discovery_methods, ddm_list) {
1011 error = (*ddm->ddm_discover)(pdk, vp);
1012 if (error == 0) {
1013 /* Successfully created wedges; we're done. */
1014 break;
1015 }
1016 }
1017
1018 error = vn_close(vp, FREAD, NOCRED);
1019 if (error) {
1020 aprint_error("%s: unable to close device, error = %d\n",
1021 pdk->dk_name, error);
1022 /* We'll just assume the vnode has been cleaned up. */
1023 }
1024
1025 out:
1026 rw_exit(&dkwedge_discovery_methods_lock);
1027 }
1028
1029 /*
1030 * dkwedge_read:
1031 *
1032 * Read some data from the specified disk, used for
1033 * partition discovery.
1034 */
1035 int
1036 dkwedge_read(struct disk *pdk, struct vnode *vp, daddr_t blkno,
1037 void *tbuf, size_t len)
1038 {
1039 buf_t *bp;
1040 int error;
1041 bool isopen;
1042 dev_t bdev;
1043 struct vnode *bdvp;
1044
1045 /*
1046 * The kernel cannot read from a character device vnode
1047 * as physio() only handles user memory.
1048 *
1049 * If the block device has already been opened by a wedge
1050 * use that vnode and temporarily bump the open counter.
1051 *
1052 * Otherwise try to open the block device.
1053 */
1054
1055 bdev = devsw_chr2blk(vp->v_rdev);
1056
1057 mutex_enter(&pdk->dk_rawlock);
1058 if (pdk->dk_rawopens != 0) {
1059 KASSERT(pdk->dk_rawvp != NULL);
1060 isopen = true;
1061 ++pdk->dk_rawopens;
1062 bdvp = pdk->dk_rawvp;
1063 error = 0;
1064 } else {
1065 isopen = false;
1066 error = dk_open_parent(bdev, FREAD, &bdvp);
1067 }
1068 mutex_exit(&pdk->dk_rawlock);
1069
1070 if (error)
1071 return error;
1072
1073 bp = getiobuf(bdvp, true);
1074 bp->b_flags = B_READ;
1075 bp->b_cflags = BC_BUSY;
1076 bp->b_dev = bdev;
1077 bp->b_data = tbuf;
1078 bp->b_bufsize = bp->b_bcount = len;
1079 bp->b_blkno = blkno;
1080 bp->b_cylinder = 0;
1081 bp->b_error = 0;
1082
1083 VOP_STRATEGY(bdvp, bp);
1084 error = biowait(bp);
1085 putiobuf(bp);
1086
1087 mutex_enter(&pdk->dk_rawlock);
1088 if (isopen) {
1089 --pdk->dk_rawopens;
1090 } else {
1091 dk_close_parent(bdvp, FREAD);
1092 }
1093 mutex_exit(&pdk->dk_rawlock);
1094
1095 return error;
1096 }
1097
1098 /*
1099 * dkwedge_lookup:
1100 *
1101 * Look up a dkwedge_softc based on the provided dev_t.
1102 */
1103 static struct dkwedge_softc *
1104 dkwedge_lookup(dev_t dev)
1105 {
1106 int unit = minor(dev);
1107
1108 if (unit >= ndkwedges)
1109 return NULL;
1110
1111 KASSERT(dkwedges != NULL);
1112
1113 return dkwedges[unit];
1114 }
1115
1116 static int
1117 dk_open_parent(dev_t dev, int mode, struct vnode **vpp)
1118 {
1119 struct vnode *vp;
1120 int error;
1121
1122 error = bdevvp(dev, &vp);
1123 if (error)
1124 return error;
1125
1126 error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1127 if (error) {
1128 vrele(vp);
1129 return error;
1130 }
1131 error = VOP_OPEN(vp, mode, NOCRED);
1132 if (error) {
1133 vput(vp);
1134 return error;
1135 }
1136
1137 /* VOP_OPEN() doesn't do this for us. */
1138 if (mode & FWRITE) {
1139 mutex_enter(vp->v_interlock);
1140 vp->v_writecount++;
1141 mutex_exit(vp->v_interlock);
1142 }
1143
1144 VOP_UNLOCK(vp);
1145
1146 *vpp = vp;
1147
1148 return 0;
1149 }
1150
1151 static int
1152 dk_close_parent(struct vnode *vp, int mode)
1153 {
1154 int error;
1155
1156 error = vn_close(vp, mode, NOCRED);
1157 return error;
1158 }
1159
1160 /*
1161 * dkopen: [devsw entry point]
1162 *
1163 * Open a wedge.
1164 */
1165 static int
1166 dkopen(dev_t dev, int flags, int fmt, struct lwp *l)
1167 {
1168 struct dkwedge_softc *sc = dkwedge_lookup(dev);
1169 int error = 0;
1170
1171 if (sc == NULL)
1172 return ENODEV;
1173 if (sc->sc_state != DKW_STATE_RUNNING)
1174 return ENXIO;
1175
1176 /*
1177 * We go through a complicated little dance to only open the parent
1178 * vnode once per wedge, no matter how many times the wedge is
1179 * opened. The reason? We see one dkopen() per open call, but
1180 * only dkclose() on the last close.
1181 */
1182 mutex_enter(&sc->sc_dk.dk_openlock);
1183 mutex_enter(&sc->sc_parent->dk_rawlock);
1184 if (sc->sc_dk.dk_openmask == 0) {
1185 error = dkfirstopen(sc, flags);
1186 if (error)
1187 goto popen_fail;
1188 }
1189 KASSERT(sc->sc_mode != 0);
1190 if (flags & ~sc->sc_mode & FWRITE) {
1191 error = EROFS;
1192 goto popen_fail;
1193 }
1194 if (fmt == S_IFCHR)
1195 sc->sc_dk.dk_copenmask |= 1;
1196 else
1197 sc->sc_dk.dk_bopenmask |= 1;
1198 sc->sc_dk.dk_openmask =
1199 sc->sc_dk.dk_copenmask | sc->sc_dk.dk_bopenmask;
1200
1201 popen_fail:
1202 mutex_exit(&sc->sc_parent->dk_rawlock);
1203 mutex_exit(&sc->sc_dk.dk_openlock);
1204 return error;
1205 }
1206
1207 static int
1208 dkfirstopen(struct dkwedge_softc *sc, int flags)
1209 {
1210 struct dkwedge_softc *nsc;
1211 struct vnode *vp;
1212 int mode;
1213 int error;
1214
1215 KASSERT(mutex_owned(&sc->sc_dk.dk_openlock));
1216 KASSERT(mutex_owned(&sc->sc_parent->dk_rawlock));
1217
1218 if (sc->sc_parent->dk_rawopens == 0) {
1219 KASSERT(sc->sc_parent->dk_rawvp == NULL);
1220 /*
1221 * Try open read-write. If this fails for EROFS
1222 * and wedge is read-only, retry to open read-only.
1223 */
1224 mode = FREAD | FWRITE;
1225 error = dk_open_parent(sc->sc_pdev, mode, &vp);
1226 if (error == EROFS && (flags & FWRITE) == 0) {
1227 mode &= ~FWRITE;
1228 error = dk_open_parent(sc->sc_pdev, mode, &vp);
1229 }
1230 if (error)
1231 return error;
1232 sc->sc_parent->dk_rawvp = vp;
1233 } else {
1234 /*
1235 * Retrieve mode from an already opened wedge.
1236 *
1237 * At this point, dk_rawopens is bounded by the number
1238 * of dkwedge devices in the system, which is limited
1239 * by autoconf device numbering to INT_MAX. Since
1240 * dk_rawopens is unsigned, this can't overflow.
1241 */
1242 KASSERT(sc->sc_parent->dk_rawopens < UINT_MAX);
1243 mode = 0;
1244 LIST_FOREACH(nsc, &sc->sc_parent->dk_wedges, sc_plink) {
1245 if (nsc == sc || nsc->sc_dk.dk_openmask == 0)
1246 continue;
1247 mode = nsc->sc_mode;
1248 break;
1249 }
1250 }
1251 sc->sc_mode = mode;
1252 sc->sc_parent->dk_rawopens++;
1253
1254 return 0;
1255 }
1256
1257 static void
1258 dklastclose(struct dkwedge_softc *sc)
1259 {
1260
1261 KASSERT(mutex_owned(&sc->sc_dk.dk_openlock));
1262 KASSERT(mutex_owned(&sc->sc_parent->dk_rawlock));
1263 KASSERT(sc->sc_parent->dk_rawopens > 0);
1264 KASSERT(sc->sc_parent->dk_rawvp != NULL);
1265
1266 if (--sc->sc_parent->dk_rawopens == 0) {
1267 struct vnode *const vp = sc->sc_parent->dk_rawvp;
1268 const int mode = sc->sc_mode;
1269
1270 sc->sc_parent->dk_rawvp = NULL;
1271 sc->sc_mode = 0;
1272
1273 dk_close_parent(vp, mode);
1274 }
1275 }
1276
1277 /*
1278 * dkclose: [devsw entry point]
1279 *
1280 * Close a wedge.
1281 */
1282 static int
1283 dkclose(dev_t dev, int flags, int fmt, struct lwp *l)
1284 {
1285 struct dkwedge_softc *sc = dkwedge_lookup(dev);
1286
1287 if (sc == NULL)
1288 return ENODEV;
1289 if (sc->sc_state != DKW_STATE_RUNNING)
1290 return ENXIO;
1291
1292 mutex_enter(&sc->sc_dk.dk_openlock);
1293 mutex_enter(&sc->sc_parent->dk_rawlock);
1294
1295 KASSERT(sc->sc_dk.dk_openmask != 0);
1296
1297 if (fmt == S_IFCHR)
1298 sc->sc_dk.dk_copenmask &= ~1;
1299 else
1300 sc->sc_dk.dk_bopenmask &= ~1;
1301 sc->sc_dk.dk_openmask =
1302 sc->sc_dk.dk_copenmask | sc->sc_dk.dk_bopenmask;
1303
1304 if (sc->sc_dk.dk_openmask == 0) {
1305 dklastclose(sc);
1306 }
1307
1308 mutex_exit(&sc->sc_parent->dk_rawlock);
1309 mutex_exit(&sc->sc_dk.dk_openlock);
1310
1311 return 0;
1312 }
1313
1314 /*
1315 * dkstragegy: [devsw entry point]
1316 *
1317 * Perform I/O based on the wedge I/O strategy.
1318 */
1319 static void
1320 dkstrategy(struct buf *bp)
1321 {
1322 struct dkwedge_softc *sc = dkwedge_lookup(bp->b_dev);
1323 uint64_t p_size, p_offset;
1324
1325 if (sc == NULL) {
1326 bp->b_error = ENODEV;
1327 goto done;
1328 }
1329
1330 if (sc->sc_state != DKW_STATE_RUNNING ||
1331 sc->sc_parent->dk_rawvp == NULL) {
1332 bp->b_error = ENXIO;
1333 goto done;
1334 }
1335
1336 /* If it's an empty transfer, wake up the top half now. */
1337 if (bp->b_bcount == 0)
1338 goto done;
1339
1340 p_offset = sc->sc_offset << sc->sc_parent->dk_blkshift;
1341 p_size = sc->sc_size << sc->sc_parent->dk_blkshift;
1342
1343 /* Make sure it's in-range. */
1344 if (bounds_check_with_mediasize(bp, DEV_BSIZE, p_size) <= 0)
1345 goto done;
1346
1347 /* Translate it to the parent's raw LBA. */
1348 bp->b_rawblkno = bp->b_blkno + p_offset;
1349
1350 /* Place it in the queue and start I/O on the unit. */
1351 mutex_enter(&sc->sc_iolock);
1352 sc->sc_iopend++;
1353 disk_wait(&sc->sc_dk);
1354 bufq_put(sc->sc_bufq, bp);
1355 mutex_exit(&sc->sc_iolock);
1356
1357 dkstart(sc);
1358 return;
1359
1360 done:
1361 bp->b_resid = bp->b_bcount;
1362 biodone(bp);
1363 }
1364
1365 /*
1366 * dkstart:
1367 *
1368 * Start I/O that has been enqueued on the wedge.
1369 */
1370 static void
1371 dkstart(struct dkwedge_softc *sc)
1372 {
1373 struct vnode *vp;
1374 struct buf *bp, *nbp;
1375
1376 mutex_enter(&sc->sc_iolock);
1377
1378 /* Do as much work as has been enqueued. */
1379 while ((bp = bufq_peek(sc->sc_bufq)) != NULL) {
1380 if (sc->sc_state != DKW_STATE_RUNNING) {
1381 (void) bufq_get(sc->sc_bufq);
1382 if (--sc->sc_iopend == 0)
1383 cv_broadcast(&sc->sc_dkdrn);
1384 mutex_exit(&sc->sc_iolock);
1385 bp->b_error = ENXIO;
1386 bp->b_resid = bp->b_bcount;
1387 biodone(bp);
1388 mutex_enter(&sc->sc_iolock);
1389 continue;
1390 }
1391
1392 /* fetch an I/O buf with sc_iolock dropped */
1393 mutex_exit(&sc->sc_iolock);
1394 nbp = getiobuf(sc->sc_parent->dk_rawvp, false);
1395 mutex_enter(&sc->sc_iolock);
1396 if (nbp == NULL) {
1397 /*
1398 * No resources to run this request; leave the
1399 * buffer queued up, and schedule a timer to
1400 * restart the queue in 1/2 a second.
1401 */
1402 callout_schedule(&sc->sc_restart_ch, hz/2);
1403 break;
1404 }
1405
1406 /*
1407 * fetch buf, this can fail if another thread
1408 * has already processed the queue, it can also
1409 * return a completely different buf.
1410 */
1411 bp = bufq_get(sc->sc_bufq);
1412 if (bp == NULL) {
1413 mutex_exit(&sc->sc_iolock);
1414 putiobuf(nbp);
1415 mutex_enter(&sc->sc_iolock);
1416 continue;
1417 }
1418
1419 /* Instrumentation. */
1420 disk_busy(&sc->sc_dk);
1421
1422 /* release lock for VOP_STRATEGY */
1423 mutex_exit(&sc->sc_iolock);
1424
1425 nbp->b_data = bp->b_data;
1426 nbp->b_flags = bp->b_flags;
1427 nbp->b_oflags = bp->b_oflags;
1428 nbp->b_cflags = bp->b_cflags;
1429 nbp->b_iodone = dkiodone;
1430 nbp->b_proc = bp->b_proc;
1431 nbp->b_blkno = bp->b_rawblkno;
1432 nbp->b_dev = sc->sc_parent->dk_rawvp->v_rdev;
1433 nbp->b_bcount = bp->b_bcount;
1434 nbp->b_private = bp;
1435 BIO_COPYPRIO(nbp, bp);
1436
1437 vp = nbp->b_vp;
1438 if ((nbp->b_flags & B_READ) == 0) {
1439 mutex_enter(vp->v_interlock);
1440 vp->v_numoutput++;
1441 mutex_exit(vp->v_interlock);
1442 }
1443 VOP_STRATEGY(vp, nbp);
1444
1445 mutex_enter(&sc->sc_iolock);
1446 }
1447
1448 mutex_exit(&sc->sc_iolock);
1449 }
1450
1451 /*
1452 * dkiodone:
1453 *
1454 * I/O to a wedge has completed; alert the top half.
1455 */
1456 static void
1457 dkiodone(struct buf *bp)
1458 {
1459 struct buf *obp = bp->b_private;
1460 struct dkwedge_softc *sc = dkwedge_lookup(obp->b_dev);
1461
1462 if (bp->b_error != 0)
1463 obp->b_error = bp->b_error;
1464 obp->b_resid = bp->b_resid;
1465 putiobuf(bp);
1466
1467 mutex_enter(&sc->sc_iolock);
1468 if (--sc->sc_iopend == 0)
1469 cv_broadcast(&sc->sc_dkdrn);
1470
1471 disk_unbusy(&sc->sc_dk, obp->b_bcount - obp->b_resid,
1472 obp->b_flags & B_READ);
1473 mutex_exit(&sc->sc_iolock);
1474
1475 biodone(obp);
1476
1477 /* Kick the queue in case there is more work we can do. */
1478 dkstart(sc);
1479 }
1480
1481 /*
1482 * dkrestart:
1483 *
1484 * Restart the work queue after it was stalled due to
1485 * a resource shortage. Invoked via a callout.
1486 */
1487 static void
1488 dkrestart(void *v)
1489 {
1490 struct dkwedge_softc *sc = v;
1491
1492 dkstart(sc);
1493 }
1494
1495 /*
1496 * dkminphys:
1497 *
1498 * Call parent's minphys function.
1499 */
1500 static void
1501 dkminphys(struct buf *bp)
1502 {
1503 struct dkwedge_softc *sc = dkwedge_lookup(bp->b_dev);
1504 dev_t dev;
1505
1506 dev = bp->b_dev;
1507 bp->b_dev = sc->sc_pdev;
1508 if (sc->sc_parent->dk_driver && sc->sc_parent->dk_driver->d_minphys)
1509 (*sc->sc_parent->dk_driver->d_minphys)(bp);
1510 else
1511 minphys(bp);
1512 bp->b_dev = dev;
1513 }
1514
1515 /*
1516 * dkread: [devsw entry point]
1517 *
1518 * Read from a wedge.
1519 */
1520 static int
1521 dkread(dev_t dev, struct uio *uio, int flags)
1522 {
1523 struct dkwedge_softc *sc = dkwedge_lookup(dev);
1524
1525 if (sc == NULL)
1526 return ENODEV;
1527 if (sc->sc_state != DKW_STATE_RUNNING)
1528 return ENXIO;
1529
1530 return physio(dkstrategy, NULL, dev, B_READ, dkminphys, uio);
1531 }
1532
1533 /*
1534 * dkwrite: [devsw entry point]
1535 *
1536 * Write to a wedge.
1537 */
1538 static int
1539 dkwrite(dev_t dev, struct uio *uio, int flags)
1540 {
1541 struct dkwedge_softc *sc = dkwedge_lookup(dev);
1542
1543 if (sc == NULL)
1544 return ENODEV;
1545 if (sc->sc_state != DKW_STATE_RUNNING)
1546 return ENXIO;
1547
1548 return physio(dkstrategy, NULL, dev, B_WRITE, dkminphys, uio);
1549 }
1550
1551 /*
1552 * dkioctl: [devsw entry point]
1553 *
1554 * Perform an ioctl request on a wedge.
1555 */
1556 static int
1557 dkioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
1558 {
1559 struct dkwedge_softc *sc = dkwedge_lookup(dev);
1560 int error = 0;
1561
1562 if (sc == NULL)
1563 return ENODEV;
1564 if (sc->sc_state != DKW_STATE_RUNNING)
1565 return ENXIO;
1566 if (sc->sc_parent->dk_rawvp == NULL)
1567 return ENXIO;
1568
1569 /*
1570 * We pass NODEV instead of our device to indicate we don't
1571 * want to handle disklabel ioctls
1572 */
1573 error = disk_ioctl(&sc->sc_dk, NODEV, cmd, data, flag, l);
1574 if (error != EPASSTHROUGH)
1575 return error;
1576
1577 error = 0;
1578
1579 switch (cmd) {
1580 case DIOCGSTRATEGY:
1581 case DIOCGCACHE:
1582 case DIOCCACHESYNC:
1583 error = VOP_IOCTL(sc->sc_parent->dk_rawvp, cmd, data, flag,
1584 l != NULL ? l->l_cred : NOCRED);
1585 break;
1586 case DIOCGWEDGEINFO: {
1587 struct dkwedge_info *dkw = data;
1588
1589 strlcpy(dkw->dkw_devname, device_xname(sc->sc_dev),
1590 sizeof(dkw->dkw_devname));
1591 memcpy(dkw->dkw_wname, sc->sc_wname, sizeof(dkw->dkw_wname));
1592 dkw->dkw_wname[sizeof(dkw->dkw_wname) - 1] = '\0';
1593 strlcpy(dkw->dkw_parent, sc->sc_parent->dk_name,
1594 sizeof(dkw->dkw_parent));
1595 dkw->dkw_offset = sc->sc_offset;
1596 dkw->dkw_size = sc->sc_size;
1597 strlcpy(dkw->dkw_ptype, sc->sc_ptype, sizeof(dkw->dkw_ptype));
1598
1599 break;
1600 }
1601 case DIOCGSECTORALIGN: {
1602 struct disk_sectoralign *dsa = data;
1603 uint32_t r;
1604
1605 error = VOP_IOCTL(sc->sc_parent->dk_rawvp, cmd, dsa, flag,
1606 l != NULL ? l->l_cred : NOCRED);
1607 if (error)
1608 break;
1609
1610 r = sc->sc_offset % dsa->dsa_alignment;
1611 if (r < dsa->dsa_firstaligned)
1612 dsa->dsa_firstaligned = dsa->dsa_firstaligned - r;
1613 else
1614 dsa->dsa_firstaligned = (dsa->dsa_firstaligned +
1615 dsa->dsa_alignment) - r;
1616 break;
1617 }
1618 default:
1619 error = ENOTTY;
1620 }
1621
1622 return error;
1623 }
1624
1625 /*
1626 * dkdiscard: [devsw entry point]
1627 *
1628 * Perform a discard-range request on a wedge.
1629 */
1630 static int
1631 dkdiscard(dev_t dev, off_t pos, off_t len)
1632 {
1633 struct dkwedge_softc *sc = dkwedge_lookup(dev);
1634 unsigned shift;
1635 off_t offset, maxlen;
1636 int error;
1637
1638 if (sc == NULL)
1639 return ENODEV;
1640 if (sc->sc_state != DKW_STATE_RUNNING)
1641 return ENXIO;
1642 if (sc->sc_parent->dk_rawvp == NULL)
1643 return ENXIO;
1644
1645 shift = (sc->sc_parent->dk_blkshift + DEV_BSHIFT);
1646 KASSERT(__type_fit(off_t, sc->sc_size));
1647 KASSERT(__type_fit(off_t, sc->sc_offset));
1648 KASSERT(0 <= sc->sc_offset);
1649 KASSERT(sc->sc_size <= (__type_max(off_t) >> shift));
1650 KASSERT(sc->sc_offset <= ((__type_max(off_t) >> shift) - sc->sc_size));
1651 offset = ((off_t)sc->sc_offset << shift);
1652 maxlen = ((off_t)sc->sc_size << shift);
1653
1654 if (len > maxlen)
1655 return EINVAL;
1656 if (pos > (maxlen - len))
1657 return EINVAL;
1658
1659 pos += offset;
1660
1661 vn_lock(sc->sc_parent->dk_rawvp, LK_EXCLUSIVE | LK_RETRY);
1662 error = VOP_FDISCARD(sc->sc_parent->dk_rawvp, pos, len);
1663 VOP_UNLOCK(sc->sc_parent->dk_rawvp);
1664
1665 return error;
1666 }
1667
1668 /*
1669 * dksize: [devsw entry point]
1670 *
1671 * Query the size of a wedge for the purpose of performing a dump
1672 * or for swapping to.
1673 */
1674 static int
1675 dksize(dev_t dev)
1676 {
1677 struct dkwedge_softc *sc = dkwedge_lookup(dev);
1678 uint64_t p_size;
1679 int rv = -1;
1680
1681 if (sc == NULL)
1682 return -1;
1683 if (sc->sc_state != DKW_STATE_RUNNING)
1684 return -1;
1685
1686 mutex_enter(&sc->sc_dk.dk_openlock);
1687 mutex_enter(&sc->sc_parent->dk_rawlock);
1688
1689 /* Our content type is static, no need to open the device. */
1690
1691 p_size = sc->sc_size << sc->sc_parent->dk_blkshift;
1692 if (strcmp(sc->sc_ptype, DKW_PTYPE_SWAP) == 0) {
1693 /* Saturate if we are larger than INT_MAX. */
1694 if (p_size > INT_MAX)
1695 rv = INT_MAX;
1696 else
1697 rv = (int)p_size;
1698 }
1699
1700 mutex_exit(&sc->sc_parent->dk_rawlock);
1701 mutex_exit(&sc->sc_dk.dk_openlock);
1702
1703 return rv;
1704 }
1705
1706 /*
1707 * dkdump: [devsw entry point]
1708 *
1709 * Perform a crash dump to a wedge.
1710 */
1711 static int
1712 dkdump(dev_t dev, daddr_t blkno, void *va, size_t size)
1713 {
1714 struct dkwedge_softc *sc = dkwedge_lookup(dev);
1715 const struct bdevsw *bdev;
1716 uint64_t p_size, p_offset;
1717 int rv = 0;
1718
1719 if (sc == NULL)
1720 return ENODEV;
1721 if (sc->sc_state != DKW_STATE_RUNNING)
1722 return ENXIO;
1723
1724 mutex_enter(&sc->sc_dk.dk_openlock);
1725 mutex_enter(&sc->sc_parent->dk_rawlock);
1726
1727 /* Our content type is static, no need to open the device. */
1728
1729 if (strcmp(sc->sc_ptype, DKW_PTYPE_SWAP) != 0 &&
1730 strcmp(sc->sc_ptype, DKW_PTYPE_RAID) != 0 &&
1731 strcmp(sc->sc_ptype, DKW_PTYPE_CGD) != 0) {
1732 rv = ENXIO;
1733 goto out;
1734 }
1735 if (size % DEV_BSIZE != 0) {
1736 rv = EINVAL;
1737 goto out;
1738 }
1739
1740 p_offset = sc->sc_offset << sc->sc_parent->dk_blkshift;
1741 p_size = sc->sc_size << sc->sc_parent->dk_blkshift;
1742
1743 if (blkno < 0 || blkno + size/DEV_BSIZE > p_size) {
1744 printf("%s: blkno (%" PRIu64 ") + size / DEV_BSIZE (%zu) > "
1745 "p_size (%" PRIu64 ")\n", __func__, blkno,
1746 size/DEV_BSIZE, p_size);
1747 rv = EINVAL;
1748 goto out;
1749 }
1750
1751 bdev = bdevsw_lookup(sc->sc_pdev);
1752 rv = (*bdev->d_dump)(sc->sc_pdev, blkno + p_offset, va, size);
1753
1754 out:
1755 mutex_exit(&sc->sc_parent->dk_rawlock);
1756 mutex_exit(&sc->sc_dk.dk_openlock);
1757
1758 return rv;
1759 }
1760
1761 /*
1762 * config glue
1763 */
1764
1765 /*
1766 * dkwedge_find_partition
1767 *
1768 * Find wedge corresponding to the specified parent name
1769 * and offset/length.
1770 */
1771 device_t
1772 dkwedge_find_partition(device_t parent, daddr_t startblk, uint64_t nblks)
1773 {
1774 struct dkwedge_softc *sc;
1775 int i;
1776 device_t wedge = NULL;
1777
1778 rw_enter(&dkwedges_lock, RW_READER);
1779 for (i = 0; i < ndkwedges; i++) {
1780 if ((sc = dkwedges[i]) == NULL)
1781 continue;
1782 if (strcmp(sc->sc_parent->dk_name, device_xname(parent)) == 0 &&
1783 sc->sc_offset == startblk &&
1784 sc->sc_size == nblks) {
1785 if (wedge) {
1786 printf("WARNING: double match for boot wedge "
1787 "(%s, %s)\n",
1788 device_xname(wedge),
1789 device_xname(sc->sc_dev));
1790 continue;
1791 }
1792 wedge = sc->sc_dev;
1793 }
1794 }
1795 rw_exit(&dkwedges_lock);
1796
1797 return wedge;
1798 }
1799
1800 const char *
1801 dkwedge_get_parent_name(dev_t dev)
1802 {
1803 /* XXX: perhaps do this in lookup? */
1804 int bmaj = bdevsw_lookup_major(&dk_bdevsw);
1805 int cmaj = cdevsw_lookup_major(&dk_cdevsw);
1806
1807 if (major(dev) != bmaj && major(dev) != cmaj)
1808 return NULL;
1809 struct dkwedge_softc *sc = dkwedge_lookup(dev);
1810 if (sc == NULL)
1811 return NULL;
1812 return sc->sc_parent->dk_name;
1813 }
1814