dk.c revision 1.159 1 /* $NetBSD: dk.c,v 1.159 2023/05/22 14:58:32 riastradh Exp $ */
2
3 /*-
4 * Copyright (c) 2004, 2005, 2006, 2007 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 #include <sys/cdefs.h>
33 __KERNEL_RCSID(0, "$NetBSD: dk.c,v 1.159 2023/05/22 14:58:32 riastradh Exp $");
34
35 #ifdef _KERNEL_OPT
36 #include "opt_dkwedge.h"
37 #endif
38
39 #include <sys/param.h>
40 #include <sys/types.h>
41
42 #include <sys/buf.h>
43 #include <sys/bufq.h>
44 #include <sys/callout.h>
45 #include <sys/conf.h>
46 #include <sys/device.h>
47 #include <sys/disk.h>
48 #include <sys/disklabel.h>
49 #include <sys/errno.h>
50 #include <sys/fcntl.h>
51 #include <sys/ioctl.h>
52 #include <sys/kauth.h>
53 #include <sys/kernel.h>
54 #include <sys/malloc.h>
55 #include <sys/pool.h>
56 #include <sys/proc.h>
57 #include <sys/rwlock.h>
58 #include <sys/stat.h>
59 #include <sys/systm.h>
60 #include <sys/vnode.h>
61
62 #include <miscfs/specfs/specdev.h>
63
64 MALLOC_DEFINE(M_DKWEDGE, "dkwedge", "Disk wedge structures");
65
66 typedef enum {
67 DKW_STATE_LARVAL = 0,
68 DKW_STATE_RUNNING = 1,
69 DKW_STATE_DYING = 2,
70 DKW_STATE_DEAD = 666
71 } dkwedge_state_t;
72
73 struct dkwedge_softc {
74 device_t sc_dev; /* pointer to our pseudo-device */
75 struct cfdata sc_cfdata; /* our cfdata structure */
76 uint8_t sc_wname[128]; /* wedge name (Unicode, UTF-8) */
77
78 dkwedge_state_t sc_state; /* state this wedge is in */
79
80 struct disk *sc_parent; /* parent disk */
81 daddr_t sc_offset; /* LBA offset of wedge in parent */
82 krwlock_t sc_sizelock;
83 uint64_t sc_size; /* size of wedge in blocks */
84 char sc_ptype[32]; /* partition type */
85 dev_t sc_pdev; /* cached parent's dev_t */
86 /* link on parent's wedge list */
87 LIST_ENTRY(dkwedge_softc) sc_plink;
88
89 struct disk sc_dk; /* our own disk structure */
90 struct bufq_state *sc_bufq; /* buffer queue */
91 struct callout sc_restart_ch; /* callout to restart I/O */
92
93 kmutex_t sc_iolock;
94 bool sc_iostop; /* don't schedule restart */
95 int sc_mode; /* parent open mode */
96 };
97
98 static int dkwedge_match(device_t, cfdata_t, void *);
99 static void dkwedge_attach(device_t, device_t, void *);
100 static int dkwedge_detach(device_t, int);
101
102 static void dk_set_geometry(struct dkwedge_softc *, struct disk *);
103
104 static void dkstart(struct dkwedge_softc *);
105 static void dkiodone(struct buf *);
106 static void dkrestart(void *);
107 static void dkminphys(struct buf *);
108
109 static int dkfirstopen(struct dkwedge_softc *, int);
110 static void dklastclose(struct dkwedge_softc *);
111 static int dkwedge_detach(device_t, int);
112 static void dkwedge_delall1(struct disk *, bool);
113 static int dkwedge_del1(struct dkwedge_info *, int);
114 static int dk_open_parent(dev_t, int, struct vnode **);
115 static int dk_close_parent(struct vnode *, int);
116
117 static int dkunit(dev_t);
118
119 static dev_type_open(dkopen);
120 static dev_type_close(dkclose);
121 static dev_type_cancel(dkcancel);
122 static dev_type_read(dkread);
123 static dev_type_write(dkwrite);
124 static dev_type_ioctl(dkioctl);
125 static dev_type_strategy(dkstrategy);
126 static dev_type_dump(dkdump);
127 static dev_type_size(dksize);
128 static dev_type_discard(dkdiscard);
129
130 CFDRIVER_DECL(dk, DV_DISK, NULL);
131 CFATTACH_DECL3_NEW(dk, 0,
132 dkwedge_match, dkwedge_attach, dkwedge_detach, NULL, NULL, NULL,
133 DVF_DETACH_SHUTDOWN);
134
135 const struct bdevsw dk_bdevsw = {
136 .d_open = dkopen,
137 .d_close = dkclose,
138 .d_cancel = dkcancel,
139 .d_strategy = dkstrategy,
140 .d_ioctl = dkioctl,
141 .d_dump = dkdump,
142 .d_psize = dksize,
143 .d_discard = dkdiscard,
144 .d_cfdriver = &dk_cd,
145 .d_devtounit = dkunit,
146 .d_flag = D_DISK | D_MPSAFE
147 };
148
149 const struct cdevsw dk_cdevsw = {
150 .d_open = dkopen,
151 .d_close = dkclose,
152 .d_cancel = dkcancel,
153 .d_read = dkread,
154 .d_write = dkwrite,
155 .d_ioctl = dkioctl,
156 .d_stop = nostop,
157 .d_tty = notty,
158 .d_poll = nopoll,
159 .d_mmap = nommap,
160 .d_kqfilter = nokqfilter,
161 .d_discard = dkdiscard,
162 .d_cfdriver = &dk_cd,
163 .d_devtounit = dkunit,
164 .d_flag = D_DISK | D_MPSAFE
165 };
166
167 static struct dkwedge_softc **dkwedges;
168 static u_int ndkwedges;
169 static krwlock_t dkwedges_lock;
170
171 static LIST_HEAD(, dkwedge_discovery_method) dkwedge_discovery_methods;
172 static krwlock_t dkwedge_discovery_methods_lock;
173
174 /*
175 * dkwedge_match:
176 *
177 * Autoconfiguration match function for pseudo-device glue.
178 */
179 static int
180 dkwedge_match(device_t parent, cfdata_t match, void *aux)
181 {
182
183 /* Pseudo-device; always present. */
184 return 1;
185 }
186
187 /*
188 * dkwedge_attach:
189 *
190 * Autoconfiguration attach function for pseudo-device glue.
191 */
192 static void
193 dkwedge_attach(device_t parent, device_t self, void *aux)
194 {
195 struct dkwedge_softc *sc = aux;
196 struct disk *pdk = sc->sc_parent;
197 int unit = device_unit(self);
198
199 KASSERTMSG(unit >= 0, "unit=%d", unit);
200
201 if (!pmf_device_register(self, NULL, NULL))
202 aprint_error_dev(self, "couldn't establish power handler\n");
203
204 mutex_enter(&pdk->dk_openlock);
205 rw_enter(&dkwedges_lock, RW_WRITER);
206 KASSERTMSG(unit < ndkwedges, "unit=%d ndkwedges=%u", unit, ndkwedges);
207 KASSERTMSG(sc == dkwedges[unit], "sc=%p dkwedges[%d]=%p",
208 sc, unit, dkwedges[unit]);
209 KASSERTMSG(sc->sc_dev == NULL, "sc=%p sc->sc_dev=%p", sc, sc->sc_dev);
210 sc->sc_dev = self;
211 rw_exit(&dkwedges_lock);
212 mutex_exit(&pdk->dk_openlock);
213
214 disk_init(&sc->sc_dk, device_xname(sc->sc_dev), NULL);
215 mutex_enter(&pdk->dk_openlock);
216 dk_set_geometry(sc, pdk);
217 mutex_exit(&pdk->dk_openlock);
218 disk_attach(&sc->sc_dk);
219
220 /* Disk wedge is ready for use! */
221 device_set_private(self, sc);
222 sc->sc_state = DKW_STATE_RUNNING;
223 }
224
225 /*
226 * dkwedge_compute_pdev:
227 *
228 * Compute the parent disk's dev_t.
229 */
230 static int
231 dkwedge_compute_pdev(const char *pname, dev_t *pdevp, enum vtype type)
232 {
233 const char *name, *cp;
234 devmajor_t pmaj;
235 int punit;
236 char devname[16];
237
238 name = pname;
239 switch (type) {
240 case VBLK:
241 pmaj = devsw_name2blk(name, devname, sizeof(devname));
242 break;
243 case VCHR:
244 pmaj = devsw_name2chr(name, devname, sizeof(devname));
245 break;
246 default:
247 pmaj = NODEVMAJOR;
248 break;
249 }
250 if (pmaj == NODEVMAJOR)
251 return ENXIO;
252
253 name += strlen(devname);
254 for (cp = name, punit = 0; *cp >= '0' && *cp <= '9'; cp++)
255 punit = (punit * 10) + (*cp - '0');
256 if (cp == name) {
257 /* Invalid parent disk name. */
258 return ENXIO;
259 }
260
261 *pdevp = MAKEDISKDEV(pmaj, punit, RAW_PART);
262
263 return 0;
264 }
265
266 /*
267 * dkwedge_array_expand:
268 *
269 * Expand the dkwedges array.
270 *
271 * Releases and reacquires dkwedges_lock as a writer.
272 */
273 static int
274 dkwedge_array_expand(void)
275 {
276
277 const unsigned incr = 16;
278 unsigned newcnt, oldcnt;
279 struct dkwedge_softc **newarray = NULL, **oldarray = NULL;
280
281 KASSERT(rw_write_held(&dkwedges_lock));
282
283 oldcnt = ndkwedges;
284 oldarray = dkwedges;
285
286 if (oldcnt >= INT_MAX - incr)
287 return ENFILE; /* XXX */
288 newcnt = oldcnt + incr;
289
290 rw_exit(&dkwedges_lock);
291 newarray = malloc(newcnt * sizeof(*newarray), M_DKWEDGE,
292 M_WAITOK|M_ZERO);
293 rw_enter(&dkwedges_lock, RW_WRITER);
294
295 if (ndkwedges != oldcnt || dkwedges != oldarray) {
296 oldarray = NULL; /* already recycled */
297 goto out;
298 }
299
300 if (oldarray != NULL)
301 memcpy(newarray, dkwedges, ndkwedges * sizeof(*newarray));
302 dkwedges = newarray;
303 newarray = NULL; /* transferred to dkwedges */
304 ndkwedges = newcnt;
305
306 out: rw_exit(&dkwedges_lock);
307 if (oldarray != NULL)
308 free(oldarray, M_DKWEDGE);
309 if (newarray != NULL)
310 free(newarray, M_DKWEDGE);
311 rw_enter(&dkwedges_lock, RW_WRITER);
312 return 0;
313 }
314
315 static void
316 dkwedge_size_init(struct dkwedge_softc *sc, uint64_t size)
317 {
318
319 rw_init(&sc->sc_sizelock);
320 sc->sc_size = size;
321 }
322
323 static void
324 dkwedge_size_fini(struct dkwedge_softc *sc)
325 {
326
327 rw_destroy(&sc->sc_sizelock);
328 }
329
330 static uint64_t
331 dkwedge_size(struct dkwedge_softc *sc)
332 {
333 uint64_t size;
334
335 rw_enter(&sc->sc_sizelock, RW_READER);
336 size = sc->sc_size;
337 rw_exit(&sc->sc_sizelock);
338
339 return size;
340 }
341
342 static void
343 dkwedge_size_increase(struct dkwedge_softc *sc, uint64_t size)
344 {
345
346 KASSERT(mutex_owned(&sc->sc_parent->dk_openlock));
347
348 rw_enter(&sc->sc_sizelock, RW_WRITER);
349 KASSERTMSG(size >= sc->sc_size,
350 "decreasing dkwedge size from %"PRIu64" to %"PRIu64,
351 sc->sc_size, size);
352 sc->sc_size = size;
353 rw_exit(&sc->sc_sizelock);
354 }
355
356 static void
357 dk_set_geometry(struct dkwedge_softc *sc, struct disk *pdk)
358 {
359 struct disk *dk = &sc->sc_dk;
360 struct disk_geom *dg = &dk->dk_geom;
361
362 KASSERT(mutex_owned(&pdk->dk_openlock));
363
364 memset(dg, 0, sizeof(*dg));
365
366 dg->dg_secperunit = dkwedge_size(sc);
367 dg->dg_secsize = DEV_BSIZE << pdk->dk_blkshift;
368
369 /* fake numbers, 1 cylinder is 1 MB with default sector size */
370 dg->dg_nsectors = 32;
371 dg->dg_ntracks = 64;
372 dg->dg_ncylinders =
373 dg->dg_secperunit / (dg->dg_nsectors * dg->dg_ntracks);
374
375 disk_set_info(sc->sc_dev, dk, NULL);
376 }
377
378 /*
379 * dkwedge_add: [exported function]
380 *
381 * Add a disk wedge based on the provided information.
382 *
383 * The incoming dkw_devname[] is ignored, instead being
384 * filled in and returned to the caller.
385 */
386 int
387 dkwedge_add(struct dkwedge_info *dkw)
388 {
389 struct dkwedge_softc *sc, *lsc;
390 struct disk *pdk;
391 u_int unit;
392 int error;
393 dev_t pdev;
394 device_t dev __diagused;
395
396 dkw->dkw_parent[sizeof(dkw->dkw_parent) - 1] = '\0';
397 pdk = disk_find(dkw->dkw_parent);
398 if (pdk == NULL)
399 return ENXIO;
400
401 error = dkwedge_compute_pdev(pdk->dk_name, &pdev, VBLK);
402 if (error)
403 return error;
404
405 if (dkw->dkw_offset < 0)
406 return EINVAL;
407
408 /*
409 * Check for an existing wedge at the same disk offset. Allow
410 * updating a wedge if the only change is the size, and the new
411 * size is larger than the old.
412 */
413 sc = NULL;
414 mutex_enter(&pdk->dk_openlock);
415 LIST_FOREACH(lsc, &pdk->dk_wedges, sc_plink) {
416 if (lsc->sc_offset != dkw->dkw_offset)
417 continue;
418 if (strcmp(lsc->sc_wname, dkw->dkw_wname) != 0)
419 break;
420 if (strcmp(lsc->sc_ptype, dkw->dkw_ptype) != 0)
421 break;
422 if (dkwedge_size(lsc) > dkw->dkw_size)
423 break;
424 if (lsc->sc_dev == NULL)
425 break;
426
427 sc = lsc;
428 device_acquire(sc->sc_dev);
429 dkwedge_size_increase(sc, dkw->dkw_size);
430 dk_set_geometry(sc, pdk);
431
432 break;
433 }
434 mutex_exit(&pdk->dk_openlock);
435
436 if (sc != NULL)
437 goto announce;
438
439 sc = malloc(sizeof(*sc), M_DKWEDGE, M_WAITOK|M_ZERO);
440 sc->sc_state = DKW_STATE_LARVAL;
441 sc->sc_parent = pdk;
442 sc->sc_pdev = pdev;
443 sc->sc_offset = dkw->dkw_offset;
444 dkwedge_size_init(sc, dkw->dkw_size);
445
446 memcpy(sc->sc_wname, dkw->dkw_wname, sizeof(sc->sc_wname));
447 sc->sc_wname[sizeof(sc->sc_wname) - 1] = '\0';
448
449 memcpy(sc->sc_ptype, dkw->dkw_ptype, sizeof(sc->sc_ptype));
450 sc->sc_ptype[sizeof(sc->sc_ptype) - 1] = '\0';
451
452 bufq_alloc(&sc->sc_bufq, "fcfs", 0);
453
454 callout_init(&sc->sc_restart_ch, 0);
455 callout_setfunc(&sc->sc_restart_ch, dkrestart, sc);
456
457 mutex_init(&sc->sc_iolock, MUTEX_DEFAULT, IPL_BIO);
458
459 /*
460 * Wedge will be added; increment the wedge count for the parent.
461 * Only allow this to happen if RAW_PART is the only thing open.
462 */
463 mutex_enter(&pdk->dk_openlock);
464 if (pdk->dk_openmask & ~(1 << RAW_PART))
465 error = EBUSY;
466 else {
467 /* Check for wedge overlap. */
468 LIST_FOREACH(lsc, &pdk->dk_wedges, sc_plink) {
469 /* XXX arithmetic overflow */
470 uint64_t size = dkwedge_size(sc);
471 uint64_t lsize = dkwedge_size(lsc);
472 daddr_t lastblk = sc->sc_offset + size - 1;
473 daddr_t llastblk = lsc->sc_offset + lsize - 1;
474
475 if (sc->sc_offset >= lsc->sc_offset &&
476 sc->sc_offset <= llastblk) {
477 /* Overlaps the tail of the existing wedge. */
478 break;
479 }
480 if (lastblk >= lsc->sc_offset &&
481 lastblk <= llastblk) {
482 /* Overlaps the head of the existing wedge. */
483 break;
484 }
485 }
486 if (lsc != NULL) {
487 if (sc->sc_offset == lsc->sc_offset &&
488 dkwedge_size(sc) == dkwedge_size(lsc) &&
489 strcmp(sc->sc_wname, lsc->sc_wname) == 0)
490 error = EEXIST;
491 else
492 error = EINVAL;
493 } else {
494 pdk->dk_nwedges++;
495 LIST_INSERT_HEAD(&pdk->dk_wedges, sc, sc_plink);
496 }
497 }
498 mutex_exit(&pdk->dk_openlock);
499 if (error) {
500 mutex_destroy(&sc->sc_iolock);
501 bufq_free(sc->sc_bufq);
502 dkwedge_size_fini(sc);
503 free(sc, M_DKWEDGE);
504 return error;
505 }
506
507 /* Fill in our cfdata for the pseudo-device glue. */
508 sc->sc_cfdata.cf_name = dk_cd.cd_name;
509 sc->sc_cfdata.cf_atname = dk_ca.ca_name;
510 /* sc->sc_cfdata.cf_unit set below */
511 sc->sc_cfdata.cf_fstate = FSTATE_NOTFOUND; /* use chosen cf_unit */
512
513 /* Insert the larval wedge into the array. */
514 rw_enter(&dkwedges_lock, RW_WRITER);
515 for (error = 0;;) {
516 struct dkwedge_softc **scpp;
517
518 /*
519 * Check for a duplicate wname while searching for
520 * a slot.
521 */
522 for (scpp = NULL, unit = 0; unit < ndkwedges; unit++) {
523 if (dkwedges[unit] == NULL) {
524 if (scpp == NULL) {
525 scpp = &dkwedges[unit];
526 sc->sc_cfdata.cf_unit = unit;
527 }
528 } else {
529 /* XXX Unicode. */
530 if (strcmp(dkwedges[unit]->sc_wname,
531 sc->sc_wname) == 0) {
532 error = EEXIST;
533 break;
534 }
535 }
536 }
537 if (error)
538 break;
539 KASSERT(unit == ndkwedges);
540 if (scpp == NULL) {
541 error = dkwedge_array_expand();
542 if (error)
543 break;
544 } else {
545 KASSERT(scpp == &dkwedges[sc->sc_cfdata.cf_unit]);
546 *scpp = sc;
547 break;
548 }
549 }
550 rw_exit(&dkwedges_lock);
551 if (error) {
552 mutex_enter(&pdk->dk_openlock);
553 pdk->dk_nwedges--;
554 LIST_REMOVE(sc, sc_plink);
555 mutex_exit(&pdk->dk_openlock);
556
557 mutex_destroy(&sc->sc_iolock);
558 bufq_free(sc->sc_bufq);
559 dkwedge_size_fini(sc);
560 free(sc, M_DKWEDGE);
561 return error;
562 }
563
564 /*
565 * Now that we know the unit #, attach a pseudo-device for
566 * this wedge instance. This will provide us with the
567 * device_t necessary for glue to other parts of the system.
568 *
569 * This should never fail, unless we're almost totally out of
570 * memory.
571 */
572 if ((dev = config_attach_pseudo_acquire(&sc->sc_cfdata, sc)) == NULL) {
573 aprint_error("%s%u: unable to attach pseudo-device\n",
574 sc->sc_cfdata.cf_name, sc->sc_cfdata.cf_unit);
575
576 rw_enter(&dkwedges_lock, RW_WRITER);
577 KASSERT(dkwedges[sc->sc_cfdata.cf_unit] == sc);
578 dkwedges[sc->sc_cfdata.cf_unit] = NULL;
579 rw_exit(&dkwedges_lock);
580
581 mutex_enter(&pdk->dk_openlock);
582 pdk->dk_nwedges--;
583 LIST_REMOVE(sc, sc_plink);
584 mutex_exit(&pdk->dk_openlock);
585
586 mutex_destroy(&sc->sc_iolock);
587 bufq_free(sc->sc_bufq);
588 dkwedge_size_fini(sc);
589 free(sc, M_DKWEDGE);
590 return ENOMEM;
591 }
592
593 KASSERT(dev == sc->sc_dev);
594
595 announce:
596 /* Announce our arrival. */
597 aprint_normal(
598 "%s at %s: \"%s\", %"PRIu64" blocks at %"PRId64", type: %s\n",
599 device_xname(sc->sc_dev), pdk->dk_name,
600 sc->sc_wname, /* XXX Unicode */
601 dkwedge_size(sc), sc->sc_offset,
602 sc->sc_ptype[0] == '\0' ? "<unknown>" : sc->sc_ptype);
603
604 /* Return the devname to the caller. */
605 strlcpy(dkw->dkw_devname, device_xname(sc->sc_dev),
606 sizeof(dkw->dkw_devname));
607
608 device_release(sc->sc_dev);
609 return 0;
610 }
611
612 /*
613 * dkwedge_find_acquire:
614 *
615 * Lookup a disk wedge based on the provided information.
616 * NOTE: We look up the wedge based on the wedge devname,
617 * not wname.
618 *
619 * Return NULL if the wedge is not found, otherwise return
620 * the wedge's softc. Assign the wedge's unit number to unitp
621 * if unitp is not NULL. The wedge's sc_dev is referenced and
622 * must be released by device_release or equivalent.
623 */
624 static struct dkwedge_softc *
625 dkwedge_find_acquire(struct dkwedge_info *dkw, u_int *unitp)
626 {
627 struct dkwedge_softc *sc = NULL;
628 u_int unit;
629
630 /* Find our softc. */
631 dkw->dkw_devname[sizeof(dkw->dkw_devname) - 1] = '\0';
632 rw_enter(&dkwedges_lock, RW_READER);
633 for (unit = 0; unit < ndkwedges; unit++) {
634 if ((sc = dkwedges[unit]) != NULL &&
635 sc->sc_dev != NULL &&
636 strcmp(device_xname(sc->sc_dev), dkw->dkw_devname) == 0 &&
637 strcmp(sc->sc_parent->dk_name, dkw->dkw_parent) == 0) {
638 device_acquire(sc->sc_dev);
639 break;
640 }
641 }
642 rw_exit(&dkwedges_lock);
643 if (sc == NULL)
644 return NULL;
645
646 if (unitp != NULL)
647 *unitp = unit;
648
649 return sc;
650 }
651
652 /*
653 * dkwedge_del: [exported function]
654 *
655 * Delete a disk wedge based on the provided information.
656 * NOTE: We look up the wedge based on the wedge devname,
657 * not wname.
658 */
659 int
660 dkwedge_del(struct dkwedge_info *dkw)
661 {
662
663 return dkwedge_del1(dkw, 0);
664 }
665
666 int
667 dkwedge_del1(struct dkwedge_info *dkw, int flags)
668 {
669 struct dkwedge_softc *sc = NULL;
670
671 /* Find our softc. */
672 if ((sc = dkwedge_find_acquire(dkw, NULL)) == NULL)
673 return ESRCH;
674
675 return config_detach_release(sc->sc_dev, flags);
676 }
677
678 /*
679 * dkwedge_detach:
680 *
681 * Autoconfiguration detach function for pseudo-device glue.
682 */
683 static int
684 dkwedge_detach(device_t self, int flags)
685 {
686 struct dkwedge_softc *const sc = device_private(self);
687 const u_int unit = device_unit(self);
688 int bmaj, cmaj, error;
689
690 error = disk_begindetach(&sc->sc_dk, /*lastclose*/NULL, self, flags);
691 if (error)
692 return error;
693
694 /* Mark the wedge as dying. */
695 sc->sc_state = DKW_STATE_DYING;
696
697 pmf_device_deregister(self);
698
699 /* Kill any pending restart. */
700 mutex_enter(&sc->sc_iolock);
701 sc->sc_iostop = true;
702 mutex_exit(&sc->sc_iolock);
703 callout_halt(&sc->sc_restart_ch, NULL);
704
705 /* Locate the wedge major numbers. */
706 bmaj = bdevsw_lookup_major(&dk_bdevsw);
707 cmaj = cdevsw_lookup_major(&dk_cdevsw);
708
709 /* Nuke the vnodes for any open instances. */
710 vdevgone(bmaj, unit, unit, VBLK);
711 vdevgone(cmaj, unit, unit, VCHR);
712
713 /*
714 * At this point, all block device opens have been closed,
715 * synchronously flushing any buffered writes; and all
716 * character device I/O operations have completed
717 * synchronously, and character device opens have been closed.
718 *
719 * So there can be no more opens or queued buffers by now.
720 */
721 KASSERT(sc->sc_dk.dk_openmask == 0);
722 KASSERT(bufq_peek(sc->sc_bufq) == NULL);
723 bufq_drain(sc->sc_bufq);
724
725 /* Announce our departure. */
726 aprint_normal("%s at %s (%s) deleted\n", device_xname(sc->sc_dev),
727 sc->sc_parent->dk_name,
728 sc->sc_wname); /* XXX Unicode */
729
730 mutex_enter(&sc->sc_parent->dk_openlock);
731 sc->sc_parent->dk_nwedges--;
732 LIST_REMOVE(sc, sc_plink);
733 mutex_exit(&sc->sc_parent->dk_openlock);
734
735 /* Delete our buffer queue. */
736 bufq_free(sc->sc_bufq);
737
738 /* Detach from the disk list. */
739 disk_detach(&sc->sc_dk);
740 disk_destroy(&sc->sc_dk);
741
742 /* Poof. */
743 rw_enter(&dkwedges_lock, RW_WRITER);
744 KASSERT(dkwedges[unit] == sc);
745 dkwedges[unit] = NULL;
746 sc->sc_state = DKW_STATE_DEAD;
747 rw_exit(&dkwedges_lock);
748
749 mutex_destroy(&sc->sc_iolock);
750 dkwedge_size_fini(sc);
751
752 free(sc, M_DKWEDGE);
753
754 return 0;
755 }
756
757 /*
758 * dkwedge_delall: [exported function]
759 *
760 * Forcibly delete all of the wedges on the specified disk. Used
761 * when a disk is being detached.
762 */
763 void
764 dkwedge_delall(struct disk *pdk)
765 {
766
767 dkwedge_delall1(pdk, /*idleonly*/false);
768 }
769
770 /*
771 * dkwedge_delidle: [exported function]
772 *
773 * Delete all of the wedges on the specified disk if idle. Used
774 * by ioctl(DIOCRMWEDGES).
775 */
776 void
777 dkwedge_delidle(struct disk *pdk)
778 {
779
780 dkwedge_delall1(pdk, /*idleonly*/true);
781 }
782
783 static void
784 dkwedge_delall1(struct disk *pdk, bool idleonly)
785 {
786 struct dkwedge_info dkw;
787 struct dkwedge_softc *sc;
788 int flags;
789
790 flags = DETACH_QUIET;
791 if (!idleonly)
792 flags |= DETACH_FORCE;
793
794 for (;;) {
795 mutex_enter(&pdk->dk_rawlock); /* for sc->sc_dk.dk_openmask */
796 mutex_enter(&pdk->dk_openlock);
797 LIST_FOREACH(sc, &pdk->dk_wedges, sc_plink) {
798 if (!idleonly || sc->sc_dk.dk_openmask == 0)
799 break;
800 }
801 if (sc == NULL) {
802 KASSERT(idleonly || pdk->dk_nwedges == 0);
803 mutex_exit(&pdk->dk_openlock);
804 mutex_exit(&pdk->dk_rawlock);
805 return;
806 }
807 strlcpy(dkw.dkw_parent, pdk->dk_name, sizeof(dkw.dkw_parent));
808 strlcpy(dkw.dkw_devname, device_xname(sc->sc_dev),
809 sizeof(dkw.dkw_devname));
810 mutex_exit(&pdk->dk_openlock);
811 mutex_exit(&pdk->dk_rawlock);
812 (void) dkwedge_del1(&dkw, flags);
813 }
814 }
815
816 /*
817 * dkwedge_list: [exported function]
818 *
819 * List all of the wedges on a particular disk.
820 */
821 int
822 dkwedge_list(struct disk *pdk, struct dkwedge_list *dkwl, struct lwp *l)
823 {
824 struct uio uio;
825 struct iovec iov;
826 struct dkwedge_softc *sc;
827 struct dkwedge_info dkw;
828 int error = 0;
829
830 iov.iov_base = dkwl->dkwl_buf;
831 iov.iov_len = dkwl->dkwl_bufsize;
832
833 uio.uio_iov = &iov;
834 uio.uio_iovcnt = 1;
835 uio.uio_offset = 0;
836 uio.uio_resid = dkwl->dkwl_bufsize;
837 uio.uio_rw = UIO_READ;
838 KASSERT(l == curlwp);
839 uio.uio_vmspace = l->l_proc->p_vmspace;
840
841 dkwl->dkwl_ncopied = 0;
842
843 mutex_enter(&pdk->dk_openlock);
844 LIST_FOREACH(sc, &pdk->dk_wedges, sc_plink) {
845 if (uio.uio_resid < sizeof(dkw))
846 break;
847
848 if (sc->sc_state != DKW_STATE_RUNNING)
849 continue;
850
851 strlcpy(dkw.dkw_devname, device_xname(sc->sc_dev),
852 sizeof(dkw.dkw_devname));
853 memcpy(dkw.dkw_wname, sc->sc_wname, sizeof(dkw.dkw_wname));
854 dkw.dkw_wname[sizeof(dkw.dkw_wname) - 1] = '\0';
855 strlcpy(dkw.dkw_parent, sc->sc_parent->dk_name,
856 sizeof(dkw.dkw_parent));
857 dkw.dkw_offset = sc->sc_offset;
858 dkw.dkw_size = dkwedge_size(sc);
859 strlcpy(dkw.dkw_ptype, sc->sc_ptype, sizeof(dkw.dkw_ptype));
860
861 error = uiomove(&dkw, sizeof(dkw), &uio);
862 if (error)
863 break;
864 dkwl->dkwl_ncopied++;
865 }
866 dkwl->dkwl_nwedges = pdk->dk_nwedges;
867 mutex_exit(&pdk->dk_openlock);
868
869 return error;
870 }
871
872 device_t
873 dkwedge_find_by_wname(const char *wname)
874 {
875 device_t dv = NULL;
876 struct dkwedge_softc *sc;
877 int i;
878
879 rw_enter(&dkwedges_lock, RW_READER);
880 for (i = 0; i < ndkwedges; i++) {
881 if ((sc = dkwedges[i]) == NULL)
882 continue;
883 if (strcmp(sc->sc_wname, wname) == 0) {
884 if (dv != NULL) {
885 printf(
886 "WARNING: double match for wedge name %s "
887 "(%s, %s)\n", wname, device_xname(dv),
888 device_xname(sc->sc_dev));
889 continue;
890 }
891 dv = sc->sc_dev;
892 }
893 }
894 rw_exit(&dkwedges_lock);
895 return dv;
896 }
897
898 device_t
899 dkwedge_find_by_parent(const char *name, size_t *i)
900 {
901
902 rw_enter(&dkwedges_lock, RW_READER);
903 for (; *i < (size_t)ndkwedges; (*i)++) {
904 struct dkwedge_softc *sc;
905 if ((sc = dkwedges[*i]) == NULL)
906 continue;
907 if (strcmp(sc->sc_parent->dk_name, name) != 0)
908 continue;
909 rw_exit(&dkwedges_lock);
910 return sc->sc_dev;
911 }
912 rw_exit(&dkwedges_lock);
913 return NULL;
914 }
915
916 void
917 dkwedge_print_wnames(void)
918 {
919 struct dkwedge_softc *sc;
920 int i;
921
922 rw_enter(&dkwedges_lock, RW_READER);
923 for (i = 0; i < ndkwedges; i++) {
924 if ((sc = dkwedges[i]) == NULL)
925 continue;
926 printf(" wedge:%s", sc->sc_wname);
927 }
928 rw_exit(&dkwedges_lock);
929 }
930
931 /*
932 * We need a dummy object to stuff into the dkwedge discovery method link
933 * set to ensure that there is always at least one object in the set.
934 */
935 static struct dkwedge_discovery_method dummy_discovery_method;
936 __link_set_add_bss(dkwedge_methods, dummy_discovery_method);
937
938 /*
939 * dkwedge_init:
940 *
941 * Initialize the disk wedge subsystem.
942 */
943 void
944 dkwedge_init(void)
945 {
946 __link_set_decl(dkwedge_methods, struct dkwedge_discovery_method);
947 struct dkwedge_discovery_method * const *ddmp;
948 struct dkwedge_discovery_method *lddm, *ddm;
949
950 rw_init(&dkwedges_lock);
951 rw_init(&dkwedge_discovery_methods_lock);
952
953 if (config_cfdriver_attach(&dk_cd) != 0)
954 panic("dkwedge: unable to attach cfdriver");
955 if (config_cfattach_attach(dk_cd.cd_name, &dk_ca) != 0)
956 panic("dkwedge: unable to attach cfattach");
957
958 rw_enter(&dkwedge_discovery_methods_lock, RW_WRITER);
959
960 LIST_INIT(&dkwedge_discovery_methods);
961
962 __link_set_foreach(ddmp, dkwedge_methods) {
963 ddm = *ddmp;
964 if (ddm == &dummy_discovery_method)
965 continue;
966 if (LIST_EMPTY(&dkwedge_discovery_methods)) {
967 LIST_INSERT_HEAD(&dkwedge_discovery_methods,
968 ddm, ddm_list);
969 continue;
970 }
971 LIST_FOREACH(lddm, &dkwedge_discovery_methods, ddm_list) {
972 if (ddm->ddm_priority == lddm->ddm_priority) {
973 aprint_error("dk-method-%s: method \"%s\" "
974 "already exists at priority %d\n",
975 ddm->ddm_name, lddm->ddm_name,
976 lddm->ddm_priority);
977 /* Not inserted. */
978 break;
979 }
980 if (ddm->ddm_priority < lddm->ddm_priority) {
981 /* Higher priority; insert before. */
982 LIST_INSERT_BEFORE(lddm, ddm, ddm_list);
983 break;
984 }
985 if (LIST_NEXT(lddm, ddm_list) == NULL) {
986 /* Last one; insert after. */
987 KASSERT(lddm->ddm_priority < ddm->ddm_priority);
988 LIST_INSERT_AFTER(lddm, ddm, ddm_list);
989 break;
990 }
991 }
992 }
993
994 rw_exit(&dkwedge_discovery_methods_lock);
995 }
996
997 #ifdef DKWEDGE_AUTODISCOVER
998 int dkwedge_autodiscover = 1;
999 #else
1000 int dkwedge_autodiscover = 0;
1001 #endif
1002
1003 /*
1004 * dkwedge_discover: [exported function]
1005 *
1006 * Discover the wedges on a newly attached disk.
1007 * Remove all unused wedges on the disk first.
1008 */
1009 void
1010 dkwedge_discover(struct disk *pdk)
1011 {
1012 struct dkwedge_discovery_method *ddm;
1013 struct vnode *vp;
1014 int error;
1015 dev_t pdev;
1016
1017 /*
1018 * Require people playing with wedges to enable this explicitly.
1019 */
1020 if (dkwedge_autodiscover == 0)
1021 return;
1022
1023 rw_enter(&dkwedge_discovery_methods_lock, RW_READER);
1024
1025 /*
1026 * Use the character device for scanning, the block device
1027 * is busy if there are already wedges attached.
1028 */
1029 error = dkwedge_compute_pdev(pdk->dk_name, &pdev, VCHR);
1030 if (error) {
1031 aprint_error("%s: unable to compute pdev, error = %d\n",
1032 pdk->dk_name, error);
1033 goto out;
1034 }
1035
1036 error = cdevvp(pdev, &vp);
1037 if (error) {
1038 aprint_error("%s: unable to find vnode for pdev, error = %d\n",
1039 pdk->dk_name, error);
1040 goto out;
1041 }
1042
1043 error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1044 if (error) {
1045 aprint_error("%s: unable to lock vnode for pdev, error = %d\n",
1046 pdk->dk_name, error);
1047 vrele(vp);
1048 goto out;
1049 }
1050
1051 error = VOP_OPEN(vp, FREAD | FSILENT, NOCRED);
1052 if (error) {
1053 if (error != ENXIO)
1054 aprint_error("%s: unable to open device, error = %d\n",
1055 pdk->dk_name, error);
1056 vput(vp);
1057 goto out;
1058 }
1059 VOP_UNLOCK(vp);
1060
1061 /*
1062 * Remove unused wedges
1063 */
1064 dkwedge_delidle(pdk);
1065
1066 /*
1067 * For each supported partition map type, look to see if
1068 * this map type exists. If so, parse it and add the
1069 * corresponding wedges.
1070 */
1071 LIST_FOREACH(ddm, &dkwedge_discovery_methods, ddm_list) {
1072 error = (*ddm->ddm_discover)(pdk, vp);
1073 if (error == 0) {
1074 /* Successfully created wedges; we're done. */
1075 break;
1076 }
1077 }
1078
1079 error = vn_close(vp, FREAD, NOCRED);
1080 if (error) {
1081 aprint_error("%s: unable to close device, error = %d\n",
1082 pdk->dk_name, error);
1083 /* We'll just assume the vnode has been cleaned up. */
1084 }
1085
1086 out:
1087 rw_exit(&dkwedge_discovery_methods_lock);
1088 }
1089
1090 /*
1091 * dkwedge_read:
1092 *
1093 * Read some data from the specified disk, used for
1094 * partition discovery.
1095 */
1096 int
1097 dkwedge_read(struct disk *pdk, struct vnode *vp, daddr_t blkno,
1098 void *tbuf, size_t len)
1099 {
1100 buf_t *bp;
1101 int error;
1102 bool isopen;
1103 dev_t bdev;
1104 struct vnode *bdvp;
1105
1106 /*
1107 * The kernel cannot read from a character device vnode
1108 * as physio() only handles user memory.
1109 *
1110 * If the block device has already been opened by a wedge
1111 * use that vnode and temporarily bump the open counter.
1112 *
1113 * Otherwise try to open the block device.
1114 */
1115
1116 bdev = devsw_chr2blk(vp->v_rdev);
1117
1118 mutex_enter(&pdk->dk_rawlock);
1119 if (pdk->dk_rawopens != 0) {
1120 KASSERT(pdk->dk_rawvp != NULL);
1121 isopen = true;
1122 ++pdk->dk_rawopens;
1123 bdvp = pdk->dk_rawvp;
1124 error = 0;
1125 } else {
1126 isopen = false;
1127 error = dk_open_parent(bdev, FREAD, &bdvp);
1128 }
1129 mutex_exit(&pdk->dk_rawlock);
1130
1131 if (error)
1132 return error;
1133
1134 bp = getiobuf(bdvp, true);
1135 bp->b_flags = B_READ;
1136 bp->b_cflags = BC_BUSY;
1137 bp->b_dev = bdev;
1138 bp->b_data = tbuf;
1139 bp->b_bufsize = bp->b_bcount = len;
1140 bp->b_blkno = blkno;
1141 bp->b_cylinder = 0;
1142 bp->b_error = 0;
1143
1144 VOP_STRATEGY(bdvp, bp);
1145 error = biowait(bp);
1146 putiobuf(bp);
1147
1148 mutex_enter(&pdk->dk_rawlock);
1149 if (isopen) {
1150 --pdk->dk_rawopens;
1151 } else {
1152 dk_close_parent(bdvp, FREAD);
1153 }
1154 mutex_exit(&pdk->dk_rawlock);
1155
1156 return error;
1157 }
1158
1159 /*
1160 * dkwedge_lookup:
1161 *
1162 * Look up a dkwedge_softc based on the provided dev_t.
1163 *
1164 * Caller must guarantee the wedge is referenced.
1165 */
1166 static struct dkwedge_softc *
1167 dkwedge_lookup(dev_t dev)
1168 {
1169 const int unit = minor(dev);
1170 struct dkwedge_softc *sc;
1171
1172 rw_enter(&dkwedges_lock, RW_READER);
1173 if (unit < 0 || unit >= ndkwedges)
1174 sc = NULL;
1175 else
1176 sc = dkwedges[unit];
1177 rw_exit(&dkwedges_lock);
1178
1179 return sc;
1180 }
1181
1182 static int
1183 dk_open_parent(dev_t dev, int mode, struct vnode **vpp)
1184 {
1185 struct vnode *vp;
1186 int error;
1187
1188 error = bdevvp(dev, &vp);
1189 if (error)
1190 return error;
1191
1192 error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1193 if (error) {
1194 vrele(vp);
1195 return error;
1196 }
1197 error = VOP_OPEN(vp, mode, NOCRED);
1198 if (error) {
1199 vput(vp);
1200 return error;
1201 }
1202
1203 /* VOP_OPEN() doesn't do this for us. */
1204 if (mode & FWRITE) {
1205 mutex_enter(vp->v_interlock);
1206 vp->v_writecount++;
1207 mutex_exit(vp->v_interlock);
1208 }
1209
1210 VOP_UNLOCK(vp);
1211
1212 *vpp = vp;
1213
1214 return 0;
1215 }
1216
1217 static int
1218 dk_close_parent(struct vnode *vp, int mode)
1219 {
1220 int error;
1221
1222 error = vn_close(vp, mode, NOCRED);
1223 return error;
1224 }
1225
1226 /*
1227 * dkunit: [devsw entry point]
1228 *
1229 * Return the autoconf device_t unit number of a wedge by its
1230 * devsw dev_t number, or -1 if there is none.
1231 *
1232 * XXX This is a temporary hack until dkwedge numbering is made to
1233 * correspond 1:1 to autoconf device numbering.
1234 */
1235 static int
1236 dkunit(dev_t dev)
1237 {
1238 int mn = minor(dev);
1239 struct dkwedge_softc *sc;
1240 device_t dv;
1241 int unit = -1;
1242
1243 if (mn < 0)
1244 return -1;
1245
1246 rw_enter(&dkwedges_lock, RW_READER);
1247 if (mn < ndkwedges &&
1248 (sc = dkwedges[minor(dev)]) != NULL &&
1249 (dv = sc->sc_dev) != NULL)
1250 unit = device_unit(dv);
1251 rw_exit(&dkwedges_lock);
1252
1253 return unit;
1254 }
1255
1256 /*
1257 * dkopen: [devsw entry point]
1258 *
1259 * Open a wedge.
1260 */
1261 static int
1262 dkopen(dev_t dev, int flags, int fmt, struct lwp *l)
1263 {
1264 struct dkwedge_softc *sc = dkwedge_lookup(dev);
1265 int error = 0;
1266
1267 if (sc == NULL)
1268 return ENXIO;
1269 if (sc->sc_state != DKW_STATE_RUNNING)
1270 return ENXIO;
1271
1272 /*
1273 * We go through a complicated little dance to only open the parent
1274 * vnode once per wedge, no matter how many times the wedge is
1275 * opened. The reason? We see one dkopen() per open call, but
1276 * only dkclose() on the last close.
1277 */
1278 mutex_enter(&sc->sc_dk.dk_openlock);
1279 mutex_enter(&sc->sc_parent->dk_rawlock);
1280 if (sc->sc_dk.dk_openmask == 0) {
1281 error = dkfirstopen(sc, flags);
1282 if (error)
1283 goto out;
1284 } else if (flags & ~sc->sc_mode & FWRITE) {
1285 /*
1286 * The parent is already open, but the previous attempt
1287 * to open it read/write failed and fell back to
1288 * read-only. In that case, we assume the medium is
1289 * read-only and fail to open the wedge read/write.
1290 */
1291 error = EROFS;
1292 goto out;
1293 }
1294 KASSERT(sc->sc_mode != 0);
1295 KASSERTMSG(sc->sc_mode & FREAD, "%s: sc_mode=%x",
1296 device_xname(sc->sc_dev), sc->sc_mode);
1297 KASSERTMSG((flags & FWRITE) ? (sc->sc_mode & FWRITE) : 1,
1298 "%s: flags=%x sc_mode=%x",
1299 device_xname(sc->sc_dev), flags, sc->sc_mode);
1300 if (fmt == S_IFCHR)
1301 sc->sc_dk.dk_copenmask |= 1;
1302 else
1303 sc->sc_dk.dk_bopenmask |= 1;
1304 sc->sc_dk.dk_openmask =
1305 sc->sc_dk.dk_copenmask | sc->sc_dk.dk_bopenmask;
1306
1307 out: mutex_exit(&sc->sc_parent->dk_rawlock);
1308 mutex_exit(&sc->sc_dk.dk_openlock);
1309 return error;
1310 }
1311
1312 static int
1313 dkfirstopen(struct dkwedge_softc *sc, int flags)
1314 {
1315 struct dkwedge_softc *nsc;
1316 struct vnode *vp;
1317 int mode;
1318 int error;
1319
1320 KASSERT(mutex_owned(&sc->sc_dk.dk_openlock));
1321 KASSERT(mutex_owned(&sc->sc_parent->dk_rawlock));
1322
1323 if (sc->sc_parent->dk_rawopens == 0) {
1324 KASSERT(sc->sc_parent->dk_rawvp == NULL);
1325 /*
1326 * Try open read-write. If this fails for EROFS
1327 * and wedge is read-only, retry to open read-only.
1328 */
1329 mode = FREAD | FWRITE;
1330 error = dk_open_parent(sc->sc_pdev, mode, &vp);
1331 if (error == EROFS && (flags & FWRITE) == 0) {
1332 mode &= ~FWRITE;
1333 error = dk_open_parent(sc->sc_pdev, mode, &vp);
1334 }
1335 if (error)
1336 return error;
1337 KASSERT(vp != NULL);
1338 sc->sc_parent->dk_rawvp = vp;
1339 } else {
1340 /*
1341 * Retrieve mode from an already opened wedge.
1342 *
1343 * At this point, dk_rawopens is bounded by the number
1344 * of dkwedge devices in the system, which is limited
1345 * by autoconf device numbering to INT_MAX. Since
1346 * dk_rawopens is unsigned, this can't overflow.
1347 */
1348 KASSERT(sc->sc_parent->dk_rawopens < UINT_MAX);
1349 KASSERT(sc->sc_parent->dk_rawvp != NULL);
1350 mode = 0;
1351 mutex_enter(&sc->sc_parent->dk_openlock);
1352 LIST_FOREACH(nsc, &sc->sc_parent->dk_wedges, sc_plink) {
1353 if (nsc == sc || nsc->sc_dk.dk_openmask == 0)
1354 continue;
1355 mode = nsc->sc_mode;
1356 break;
1357 }
1358 mutex_exit(&sc->sc_parent->dk_openlock);
1359 }
1360 sc->sc_mode = mode;
1361 sc->sc_parent->dk_rawopens++;
1362
1363 return 0;
1364 }
1365
1366 static void
1367 dklastclose(struct dkwedge_softc *sc)
1368 {
1369
1370 KASSERT(mutex_owned(&sc->sc_dk.dk_openlock));
1371 KASSERT(mutex_owned(&sc->sc_parent->dk_rawlock));
1372 KASSERT(sc->sc_parent->dk_rawopens > 0);
1373 KASSERT(sc->sc_parent->dk_rawvp != NULL);
1374
1375 if (--sc->sc_parent->dk_rawopens == 0) {
1376 struct vnode *const vp = sc->sc_parent->dk_rawvp;
1377 const int mode = sc->sc_mode;
1378
1379 sc->sc_parent->dk_rawvp = NULL;
1380 sc->sc_mode = 0;
1381
1382 dk_close_parent(vp, mode);
1383 }
1384 }
1385
1386 /*
1387 * dkclose: [devsw entry point]
1388 *
1389 * Close a wedge.
1390 */
1391 static int
1392 dkclose(dev_t dev, int flags, int fmt, struct lwp *l)
1393 {
1394 struct dkwedge_softc *sc = dkwedge_lookup(dev);
1395
1396 if (sc == NULL)
1397 return ENXIO;
1398 if (sc->sc_state != DKW_STATE_RUNNING &&
1399 sc->sc_state != DKW_STATE_DYING)
1400 return ENXIO;
1401
1402 mutex_enter(&sc->sc_dk.dk_openlock);
1403 mutex_enter(&sc->sc_parent->dk_rawlock);
1404
1405 KASSERT(sc->sc_dk.dk_openmask != 0);
1406
1407 if (fmt == S_IFCHR)
1408 sc->sc_dk.dk_copenmask &= ~1;
1409 else
1410 sc->sc_dk.dk_bopenmask &= ~1;
1411 sc->sc_dk.dk_openmask =
1412 sc->sc_dk.dk_copenmask | sc->sc_dk.dk_bopenmask;
1413
1414 if (sc->sc_dk.dk_openmask == 0) {
1415 dklastclose(sc);
1416 }
1417
1418 mutex_exit(&sc->sc_parent->dk_rawlock);
1419 mutex_exit(&sc->sc_dk.dk_openlock);
1420
1421 return 0;
1422 }
1423
1424 /*
1425 * dkcancel: [devsw entry point]
1426 *
1427 * Cancel any pending I/O operations waiting on a wedge.
1428 */
1429 static int
1430 dkcancel(dev_t dev, int flags, int fmt, struct lwp *l)
1431 {
1432 struct dkwedge_softc *sc = dkwedge_lookup(dev);
1433
1434 KASSERT(sc != NULL);
1435 KASSERT(sc->sc_dev != NULL);
1436 KASSERT(sc->sc_state != DKW_STATE_LARVAL);
1437 KASSERT(sc->sc_state != DKW_STATE_DEAD);
1438
1439 /*
1440 * Disk I/O is expected to complete or fail within a reasonable
1441 * timeframe -- it's storage, not communication. Further, the
1442 * character and block device interface guarantees that prior
1443 * reads and writes have completed or failed by the time close
1444 * returns -- we are not to cancel them here. If the parent
1445 * device's hardware is gone, the parent driver can make them
1446 * fail. Nothing for dk(4) itself to do.
1447 */
1448
1449 return 0;
1450 }
1451
1452 /*
1453 * dkstrategy: [devsw entry point]
1454 *
1455 * Perform I/O based on the wedge I/O strategy.
1456 */
1457 static void
1458 dkstrategy(struct buf *bp)
1459 {
1460 struct dkwedge_softc *sc = dkwedge_lookup(bp->b_dev);
1461 uint64_t p_size, p_offset;
1462
1463 KASSERT(sc != NULL);
1464 KASSERT(sc->sc_state != DKW_STATE_LARVAL);
1465 KASSERT(sc->sc_state != DKW_STATE_DEAD);
1466 KASSERT(sc->sc_parent->dk_rawvp != NULL);
1467
1468 /* If it's an empty transfer, wake up the top half now. */
1469 if (bp->b_bcount == 0)
1470 goto done;
1471
1472 p_offset = sc->sc_offset << sc->sc_parent->dk_blkshift;
1473 p_size = dkwedge_size(sc) << sc->sc_parent->dk_blkshift;
1474
1475 /* Make sure it's in-range. */
1476 if (bounds_check_with_mediasize(bp, DEV_BSIZE, p_size) <= 0)
1477 goto done;
1478
1479 /* Translate it to the parent's raw LBA. */
1480 bp->b_rawblkno = bp->b_blkno + p_offset;
1481
1482 /* Place it in the queue and start I/O on the unit. */
1483 mutex_enter(&sc->sc_iolock);
1484 disk_wait(&sc->sc_dk);
1485 bufq_put(sc->sc_bufq, bp);
1486 mutex_exit(&sc->sc_iolock);
1487
1488 dkstart(sc);
1489 return;
1490
1491 done:
1492 bp->b_resid = bp->b_bcount;
1493 biodone(bp);
1494 }
1495
1496 /*
1497 * dkstart:
1498 *
1499 * Start I/O that has been enqueued on the wedge.
1500 */
1501 static void
1502 dkstart(struct dkwedge_softc *sc)
1503 {
1504 struct vnode *vp;
1505 struct buf *bp, *nbp;
1506
1507 mutex_enter(&sc->sc_iolock);
1508
1509 /* Do as much work as has been enqueued. */
1510 while ((bp = bufq_peek(sc->sc_bufq)) != NULL) {
1511 if (sc->sc_iostop) {
1512 (void) bufq_get(sc->sc_bufq);
1513 mutex_exit(&sc->sc_iolock);
1514 bp->b_error = ENXIO;
1515 bp->b_resid = bp->b_bcount;
1516 biodone(bp);
1517 mutex_enter(&sc->sc_iolock);
1518 continue;
1519 }
1520
1521 /* fetch an I/O buf with sc_iolock dropped */
1522 mutex_exit(&sc->sc_iolock);
1523 nbp = getiobuf(sc->sc_parent->dk_rawvp, false);
1524 mutex_enter(&sc->sc_iolock);
1525 if (nbp == NULL) {
1526 /*
1527 * No resources to run this request; leave the
1528 * buffer queued up, and schedule a timer to
1529 * restart the queue in 1/2 a second.
1530 */
1531 if (!sc->sc_iostop)
1532 callout_schedule(&sc->sc_restart_ch, hz/2);
1533 break;
1534 }
1535
1536 /*
1537 * fetch buf, this can fail if another thread
1538 * has already processed the queue, it can also
1539 * return a completely different buf.
1540 */
1541 bp = bufq_get(sc->sc_bufq);
1542 if (bp == NULL) {
1543 mutex_exit(&sc->sc_iolock);
1544 putiobuf(nbp);
1545 mutex_enter(&sc->sc_iolock);
1546 continue;
1547 }
1548
1549 /* Instrumentation. */
1550 disk_busy(&sc->sc_dk);
1551
1552 /* release lock for VOP_STRATEGY */
1553 mutex_exit(&sc->sc_iolock);
1554
1555 nbp->b_data = bp->b_data;
1556 nbp->b_flags = bp->b_flags;
1557 nbp->b_oflags = bp->b_oflags;
1558 nbp->b_cflags = bp->b_cflags;
1559 nbp->b_iodone = dkiodone;
1560 nbp->b_proc = bp->b_proc;
1561 nbp->b_blkno = bp->b_rawblkno;
1562 nbp->b_dev = sc->sc_parent->dk_rawvp->v_rdev;
1563 nbp->b_bcount = bp->b_bcount;
1564 nbp->b_private = bp;
1565 BIO_COPYPRIO(nbp, bp);
1566
1567 vp = nbp->b_vp;
1568 if ((nbp->b_flags & B_READ) == 0) {
1569 mutex_enter(vp->v_interlock);
1570 vp->v_numoutput++;
1571 mutex_exit(vp->v_interlock);
1572 }
1573 VOP_STRATEGY(vp, nbp);
1574
1575 mutex_enter(&sc->sc_iolock);
1576 }
1577
1578 mutex_exit(&sc->sc_iolock);
1579 }
1580
1581 /*
1582 * dkiodone:
1583 *
1584 * I/O to a wedge has completed; alert the top half.
1585 */
1586 static void
1587 dkiodone(struct buf *bp)
1588 {
1589 struct buf *obp = bp->b_private;
1590 struct dkwedge_softc *sc = dkwedge_lookup(obp->b_dev);
1591
1592 if (bp->b_error != 0)
1593 obp->b_error = bp->b_error;
1594 obp->b_resid = bp->b_resid;
1595 putiobuf(bp);
1596
1597 mutex_enter(&sc->sc_iolock);
1598 disk_unbusy(&sc->sc_dk, obp->b_bcount - obp->b_resid,
1599 obp->b_flags & B_READ);
1600 mutex_exit(&sc->sc_iolock);
1601
1602 biodone(obp);
1603
1604 /* Kick the queue in case there is more work we can do. */
1605 dkstart(sc);
1606 }
1607
1608 /*
1609 * dkrestart:
1610 *
1611 * Restart the work queue after it was stalled due to
1612 * a resource shortage. Invoked via a callout.
1613 */
1614 static void
1615 dkrestart(void *v)
1616 {
1617 struct dkwedge_softc *sc = v;
1618
1619 dkstart(sc);
1620 }
1621
1622 /*
1623 * dkminphys:
1624 *
1625 * Call parent's minphys function.
1626 */
1627 static void
1628 dkminphys(struct buf *bp)
1629 {
1630 struct dkwedge_softc *sc = dkwedge_lookup(bp->b_dev);
1631 dev_t dev;
1632
1633 dev = bp->b_dev;
1634 bp->b_dev = sc->sc_pdev;
1635 if (sc->sc_parent->dk_driver && sc->sc_parent->dk_driver->d_minphys)
1636 (*sc->sc_parent->dk_driver->d_minphys)(bp);
1637 else
1638 minphys(bp);
1639 bp->b_dev = dev;
1640 }
1641
1642 /*
1643 * dkread: [devsw entry point]
1644 *
1645 * Read from a wedge.
1646 */
1647 static int
1648 dkread(dev_t dev, struct uio *uio, int flags)
1649 {
1650 struct dkwedge_softc *sc __diagused = dkwedge_lookup(dev);
1651
1652 KASSERT(sc != NULL);
1653 KASSERT(sc->sc_state != DKW_STATE_LARVAL);
1654 KASSERT(sc->sc_state != DKW_STATE_DEAD);
1655
1656 return physio(dkstrategy, NULL, dev, B_READ, dkminphys, uio);
1657 }
1658
1659 /*
1660 * dkwrite: [devsw entry point]
1661 *
1662 * Write to a wedge.
1663 */
1664 static int
1665 dkwrite(dev_t dev, struct uio *uio, int flags)
1666 {
1667 struct dkwedge_softc *sc __diagused = dkwedge_lookup(dev);
1668
1669 KASSERT(sc != NULL);
1670 KASSERT(sc->sc_state != DKW_STATE_LARVAL);
1671 KASSERT(sc->sc_state != DKW_STATE_DEAD);
1672
1673 return physio(dkstrategy, NULL, dev, B_WRITE, dkminphys, uio);
1674 }
1675
1676 /*
1677 * dkioctl: [devsw entry point]
1678 *
1679 * Perform an ioctl request on a wedge.
1680 */
1681 static int
1682 dkioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
1683 {
1684 struct dkwedge_softc *sc = dkwedge_lookup(dev);
1685 int error = 0;
1686
1687 KASSERT(sc != NULL);
1688 KASSERT(sc->sc_state != DKW_STATE_LARVAL);
1689 KASSERT(sc->sc_state != DKW_STATE_DEAD);
1690 KASSERT(sc->sc_parent->dk_rawvp != NULL);
1691
1692 /*
1693 * We pass NODEV instead of our device to indicate we don't
1694 * want to handle disklabel ioctls
1695 */
1696 error = disk_ioctl(&sc->sc_dk, NODEV, cmd, data, flag, l);
1697 if (error != EPASSTHROUGH)
1698 return error;
1699
1700 error = 0;
1701
1702 switch (cmd) {
1703 case DIOCGSTRATEGY:
1704 case DIOCGCACHE:
1705 case DIOCCACHESYNC:
1706 error = VOP_IOCTL(sc->sc_parent->dk_rawvp, cmd, data, flag,
1707 l != NULL ? l->l_cred : NOCRED);
1708 break;
1709 case DIOCGWEDGEINFO: {
1710 struct dkwedge_info *dkw = data;
1711
1712 strlcpy(dkw->dkw_devname, device_xname(sc->sc_dev),
1713 sizeof(dkw->dkw_devname));
1714 memcpy(dkw->dkw_wname, sc->sc_wname, sizeof(dkw->dkw_wname));
1715 dkw->dkw_wname[sizeof(dkw->dkw_wname) - 1] = '\0';
1716 strlcpy(dkw->dkw_parent, sc->sc_parent->dk_name,
1717 sizeof(dkw->dkw_parent));
1718 dkw->dkw_offset = sc->sc_offset;
1719 dkw->dkw_size = dkwedge_size(sc);
1720 strlcpy(dkw->dkw_ptype, sc->sc_ptype, sizeof(dkw->dkw_ptype));
1721
1722 break;
1723 }
1724 case DIOCGSECTORALIGN: {
1725 struct disk_sectoralign *dsa = data;
1726 uint32_t r;
1727
1728 error = VOP_IOCTL(sc->sc_parent->dk_rawvp, cmd, dsa, flag,
1729 l != NULL ? l->l_cred : NOCRED);
1730 if (error)
1731 break;
1732
1733 r = sc->sc_offset % dsa->dsa_alignment;
1734 if (r < dsa->dsa_firstaligned)
1735 dsa->dsa_firstaligned = dsa->dsa_firstaligned - r;
1736 else
1737 dsa->dsa_firstaligned = (dsa->dsa_firstaligned +
1738 dsa->dsa_alignment) - r;
1739 break;
1740 }
1741 default:
1742 error = ENOTTY;
1743 }
1744
1745 return error;
1746 }
1747
1748 /*
1749 * dkdiscard: [devsw entry point]
1750 *
1751 * Perform a discard-range request on a wedge.
1752 */
1753 static int
1754 dkdiscard(dev_t dev, off_t pos, off_t len)
1755 {
1756 struct dkwedge_softc *sc = dkwedge_lookup(dev);
1757 uint64_t size = dkwedge_size(sc);
1758 unsigned shift;
1759 off_t offset, maxlen;
1760 int error;
1761
1762 KASSERT(sc != NULL);
1763 KASSERT(sc->sc_state != DKW_STATE_LARVAL);
1764 KASSERT(sc->sc_state != DKW_STATE_DEAD);
1765 KASSERT(sc->sc_parent->dk_rawvp != NULL);
1766
1767 /* XXX check bounds on size/offset up front */
1768 shift = (sc->sc_parent->dk_blkshift + DEV_BSHIFT);
1769 KASSERT(__type_fit(off_t, size));
1770 KASSERT(__type_fit(off_t, sc->sc_offset));
1771 KASSERT(0 <= sc->sc_offset);
1772 KASSERT(size <= (__type_max(off_t) >> shift));
1773 KASSERT(sc->sc_offset <= ((__type_max(off_t) >> shift) - size));
1774 offset = ((off_t)sc->sc_offset << shift);
1775 maxlen = ((off_t)size << shift);
1776
1777 if (len > maxlen)
1778 return EINVAL;
1779 if (pos > (maxlen - len))
1780 return EINVAL;
1781
1782 pos += offset;
1783
1784 vn_lock(sc->sc_parent->dk_rawvp, LK_EXCLUSIVE | LK_RETRY);
1785 error = VOP_FDISCARD(sc->sc_parent->dk_rawvp, pos, len);
1786 VOP_UNLOCK(sc->sc_parent->dk_rawvp);
1787
1788 return error;
1789 }
1790
1791 /*
1792 * dksize: [devsw entry point]
1793 *
1794 * Query the size of a wedge for the purpose of performing a dump
1795 * or for swapping to.
1796 */
1797 static int
1798 dksize(dev_t dev)
1799 {
1800 struct dkwedge_softc *sc = dkwedge_lookup(dev);
1801 uint64_t p_size;
1802 int rv = -1;
1803
1804 if (sc == NULL)
1805 return -1;
1806 if (sc->sc_state != DKW_STATE_RUNNING)
1807 return -1;
1808
1809 /* Our content type is static, no need to open the device. */
1810
1811 p_size = dkwedge_size(sc) << sc->sc_parent->dk_blkshift;
1812 if (strcmp(sc->sc_ptype, DKW_PTYPE_SWAP) == 0) {
1813 /* Saturate if we are larger than INT_MAX. */
1814 if (p_size > INT_MAX)
1815 rv = INT_MAX;
1816 else
1817 rv = (int)p_size;
1818 }
1819
1820 return rv;
1821 }
1822
1823 /*
1824 * dkdump: [devsw entry point]
1825 *
1826 * Perform a crash dump to a wedge.
1827 */
1828 static int
1829 dkdump(dev_t dev, daddr_t blkno, void *va, size_t size)
1830 {
1831 struct dkwedge_softc *sc = dkwedge_lookup(dev);
1832 const struct bdevsw *bdev;
1833 uint64_t p_size, p_offset;
1834
1835 if (sc == NULL)
1836 return ENXIO;
1837 if (sc->sc_state != DKW_STATE_RUNNING)
1838 return ENXIO;
1839
1840 /* Our content type is static, no need to open the device. */
1841
1842 if (strcmp(sc->sc_ptype, DKW_PTYPE_SWAP) != 0 &&
1843 strcmp(sc->sc_ptype, DKW_PTYPE_RAID) != 0 &&
1844 strcmp(sc->sc_ptype, DKW_PTYPE_CGD) != 0)
1845 return ENXIO;
1846 if (size % DEV_BSIZE != 0)
1847 return EINVAL;
1848
1849 p_offset = sc->sc_offset << sc->sc_parent->dk_blkshift;
1850 p_size = dkwedge_size(sc) << sc->sc_parent->dk_blkshift;
1851
1852 if (blkno < 0 || blkno + size/DEV_BSIZE > p_size) {
1853 printf("%s: blkno (%" PRIu64 ") + size / DEV_BSIZE (%zu) > "
1854 "p_size (%" PRIu64 ")\n", __func__, blkno,
1855 size/DEV_BSIZE, p_size);
1856 return EINVAL;
1857 }
1858
1859 bdev = bdevsw_lookup(sc->sc_pdev);
1860 return (*bdev->d_dump)(sc->sc_pdev, blkno + p_offset, va, size);
1861 }
1862
1863 /*
1864 * config glue
1865 */
1866
1867 /*
1868 * dkwedge_find_partition
1869 *
1870 * Find wedge corresponding to the specified parent name
1871 * and offset/length.
1872 */
1873 device_t
1874 dkwedge_find_partition(device_t parent, daddr_t startblk, uint64_t nblks)
1875 {
1876 struct dkwedge_softc *sc;
1877 int i;
1878 device_t wedge = NULL;
1879
1880 rw_enter(&dkwedges_lock, RW_READER);
1881 for (i = 0; i < ndkwedges; i++) {
1882 if ((sc = dkwedges[i]) == NULL)
1883 continue;
1884 if (strcmp(sc->sc_parent->dk_name, device_xname(parent)) == 0 &&
1885 sc->sc_offset == startblk &&
1886 dkwedge_size(sc) == nblks) {
1887 if (wedge) {
1888 printf("WARNING: double match for boot wedge "
1889 "(%s, %s)\n",
1890 device_xname(wedge),
1891 device_xname(sc->sc_dev));
1892 continue;
1893 }
1894 wedge = sc->sc_dev;
1895 }
1896 }
1897 rw_exit(&dkwedges_lock);
1898
1899 return wedge;
1900 }
1901
1902 const char *
1903 dkwedge_get_parent_name(dev_t dev)
1904 {
1905 /* XXX: perhaps do this in lookup? */
1906 int bmaj = bdevsw_lookup_major(&dk_bdevsw);
1907 int cmaj = cdevsw_lookup_major(&dk_cdevsw);
1908
1909 if (major(dev) != bmaj && major(dev) != cmaj)
1910 return NULL;
1911 struct dkwedge_softc *sc = dkwedge_lookup(dev);
1912 if (sc == NULL)
1913 return NULL;
1914 return sc->sc_parent->dk_name;
1915 }
1916