dk.c revision 1.168 1 /* $NetBSD: dk.c,v 1.168 2023/05/22 14:59:50 riastradh Exp $ */
2
3 /*-
4 * Copyright (c) 2004, 2005, 2006, 2007 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 #include <sys/cdefs.h>
33 __KERNEL_RCSID(0, "$NetBSD: dk.c,v 1.168 2023/05/22 14:59:50 riastradh Exp $");
34
35 #ifdef _KERNEL_OPT
36 #include "opt_dkwedge.h"
37 #endif
38
39 #include <sys/param.h>
40 #include <sys/types.h>
41
42 #include <sys/buf.h>
43 #include <sys/bufq.h>
44 #include <sys/callout.h>
45 #include <sys/conf.h>
46 #include <sys/device.h>
47 #include <sys/disk.h>
48 #include <sys/disklabel.h>
49 #include <sys/errno.h>
50 #include <sys/fcntl.h>
51 #include <sys/ioctl.h>
52 #include <sys/kauth.h>
53 #include <sys/kernel.h>
54 #include <sys/malloc.h>
55 #include <sys/pool.h>
56 #include <sys/proc.h>
57 #include <sys/rwlock.h>
58 #include <sys/stat.h>
59 #include <sys/systm.h>
60 #include <sys/vnode.h>
61
62 #include <miscfs/specfs/specdev.h>
63
64 MALLOC_DEFINE(M_DKWEDGE, "dkwedge", "Disk wedge structures");
65
66 typedef enum {
67 DKW_STATE_LARVAL = 0,
68 DKW_STATE_RUNNING = 1,
69 DKW_STATE_DYING = 2,
70 DKW_STATE_DEAD = 666
71 } dkwedge_state_t;
72
73 struct dkwedge_softc {
74 device_t sc_dev; /* pointer to our pseudo-device */
75 struct cfdata sc_cfdata; /* our cfdata structure */
76 uint8_t sc_wname[128]; /* wedge name (Unicode, UTF-8) */
77
78 dkwedge_state_t sc_state; /* state this wedge is in */
79
80 struct disk *sc_parent; /* parent disk */
81 daddr_t sc_offset; /* LBA offset of wedge in parent */
82 krwlock_t sc_sizelock;
83 uint64_t sc_size; /* size of wedge in blocks */
84 char sc_ptype[32]; /* partition type */
85 dev_t sc_pdev; /* cached parent's dev_t */
86 /* link on parent's wedge list */
87 LIST_ENTRY(dkwedge_softc) sc_plink;
88
89 struct disk sc_dk; /* our own disk structure */
90 struct bufq_state *sc_bufq; /* buffer queue */
91 struct callout sc_restart_ch; /* callout to restart I/O */
92
93 kmutex_t sc_iolock;
94 bool sc_iostop; /* don't schedule restart */
95 int sc_mode; /* parent open mode */
96 };
97
98 static int dkwedge_match(device_t, cfdata_t, void *);
99 static void dkwedge_attach(device_t, device_t, void *);
100 static int dkwedge_detach(device_t, int);
101
102 static void dk_set_geometry(struct dkwedge_softc *, struct disk *);
103
104 static void dkstart(struct dkwedge_softc *);
105 static void dkiodone(struct buf *);
106 static void dkrestart(void *);
107 static void dkminphys(struct buf *);
108
109 static int dkfirstopen(struct dkwedge_softc *, int);
110 static void dklastclose(struct dkwedge_softc *);
111 static int dkwedge_detach(device_t, int);
112 static void dkwedge_delall1(struct disk *, bool);
113 static int dkwedge_del1(struct dkwedge_info *, int);
114 static int dk_open_parent(dev_t, int, struct vnode **);
115 static int dk_close_parent(struct vnode *, int);
116
117 static dev_type_open(dkopen);
118 static dev_type_close(dkclose);
119 static dev_type_cancel(dkcancel);
120 static dev_type_read(dkread);
121 static dev_type_write(dkwrite);
122 static dev_type_ioctl(dkioctl);
123 static dev_type_strategy(dkstrategy);
124 static dev_type_dump(dkdump);
125 static dev_type_size(dksize);
126 static dev_type_discard(dkdiscard);
127
128 CFDRIVER_DECL(dk, DV_DISK, NULL);
129 CFATTACH_DECL3_NEW(dk, 0,
130 dkwedge_match, dkwedge_attach, dkwedge_detach, NULL, NULL, NULL,
131 DVF_DETACH_SHUTDOWN);
132
133 const struct bdevsw dk_bdevsw = {
134 .d_open = dkopen,
135 .d_close = dkclose,
136 .d_cancel = dkcancel,
137 .d_strategy = dkstrategy,
138 .d_ioctl = dkioctl,
139 .d_dump = dkdump,
140 .d_psize = dksize,
141 .d_discard = dkdiscard,
142 .d_cfdriver = &dk_cd,
143 .d_devtounit = dev_minor_unit,
144 .d_flag = D_DISK | D_MPSAFE
145 };
146
147 const struct cdevsw dk_cdevsw = {
148 .d_open = dkopen,
149 .d_close = dkclose,
150 .d_cancel = dkcancel,
151 .d_read = dkread,
152 .d_write = dkwrite,
153 .d_ioctl = dkioctl,
154 .d_stop = nostop,
155 .d_tty = notty,
156 .d_poll = nopoll,
157 .d_mmap = nommap,
158 .d_kqfilter = nokqfilter,
159 .d_discard = dkdiscard,
160 .d_cfdriver = &dk_cd,
161 .d_devtounit = dev_minor_unit,
162 .d_flag = D_DISK | D_MPSAFE
163 };
164
165 static struct dkwedge_softc **dkwedges;
166 static u_int ndkwedges;
167 static krwlock_t dkwedges_lock;
168
169 static LIST_HEAD(, dkwedge_discovery_method) dkwedge_discovery_methods;
170 static krwlock_t dkwedge_discovery_methods_lock;
171
172 /*
173 * dkwedge_match:
174 *
175 * Autoconfiguration match function for pseudo-device glue.
176 */
177 static int
178 dkwedge_match(device_t parent, cfdata_t match, void *aux)
179 {
180
181 /* Pseudo-device; always present. */
182 return 1;
183 }
184
185 /*
186 * dkwedge_attach:
187 *
188 * Autoconfiguration attach function for pseudo-device glue.
189 */
190 static void
191 dkwedge_attach(device_t parent, device_t self, void *aux)
192 {
193 struct dkwedge_softc *sc = aux;
194 struct disk *pdk = sc->sc_parent;
195 int unit = device_unit(self);
196
197 KASSERTMSG(unit >= 0, "unit=%d", unit);
198
199 if (!pmf_device_register(self, NULL, NULL))
200 aprint_error_dev(self, "couldn't establish power handler\n");
201
202 mutex_enter(&pdk->dk_openlock);
203 rw_enter(&dkwedges_lock, RW_WRITER);
204 KASSERTMSG(unit < ndkwedges, "unit=%d ndkwedges=%u", unit, ndkwedges);
205 KASSERTMSG(sc == dkwedges[unit], "sc=%p dkwedges[%d]=%p",
206 sc, unit, dkwedges[unit]);
207 KASSERTMSG(sc->sc_dev == NULL, "sc=%p sc->sc_dev=%p", sc, sc->sc_dev);
208 sc->sc_dev = self;
209 rw_exit(&dkwedges_lock);
210 mutex_exit(&pdk->dk_openlock);
211
212 disk_init(&sc->sc_dk, device_xname(sc->sc_dev), NULL);
213 mutex_enter(&pdk->dk_openlock);
214 dk_set_geometry(sc, pdk);
215 mutex_exit(&pdk->dk_openlock);
216 disk_attach(&sc->sc_dk);
217
218 /* Disk wedge is ready for use! */
219 device_set_private(self, sc);
220 sc->sc_state = DKW_STATE_RUNNING;
221 }
222
223 /*
224 * dkwedge_compute_pdev:
225 *
226 * Compute the parent disk's dev_t.
227 */
228 static int
229 dkwedge_compute_pdev(const char *pname, dev_t *pdevp, enum vtype type)
230 {
231 const char *name, *cp;
232 devmajor_t pmaj;
233 int punit;
234 char devname[16];
235
236 name = pname;
237 switch (type) {
238 case VBLK:
239 pmaj = devsw_name2blk(name, devname, sizeof(devname));
240 break;
241 case VCHR:
242 pmaj = devsw_name2chr(name, devname, sizeof(devname));
243 break;
244 default:
245 pmaj = NODEVMAJOR;
246 break;
247 }
248 if (pmaj == NODEVMAJOR)
249 return ENXIO;
250
251 name += strlen(devname);
252 for (cp = name, punit = 0; *cp >= '0' && *cp <= '9'; cp++)
253 punit = (punit * 10) + (*cp - '0');
254 if (cp == name) {
255 /* Invalid parent disk name. */
256 return ENXIO;
257 }
258
259 *pdevp = MAKEDISKDEV(pmaj, punit, RAW_PART);
260
261 return 0;
262 }
263
264 /*
265 * dkwedge_array_expand:
266 *
267 * Expand the dkwedges array.
268 *
269 * Releases and reacquires dkwedges_lock as a writer.
270 */
271 static int
272 dkwedge_array_expand(void)
273 {
274
275 const unsigned incr = 16;
276 unsigned newcnt, oldcnt;
277 struct dkwedge_softc **newarray = NULL, **oldarray = NULL;
278
279 KASSERT(rw_write_held(&dkwedges_lock));
280
281 oldcnt = ndkwedges;
282 oldarray = dkwedges;
283
284 if (oldcnt >= INT_MAX - incr)
285 return ENFILE; /* XXX */
286 newcnt = oldcnt + incr;
287
288 rw_exit(&dkwedges_lock);
289 newarray = malloc(newcnt * sizeof(*newarray), M_DKWEDGE,
290 M_WAITOK|M_ZERO);
291 rw_enter(&dkwedges_lock, RW_WRITER);
292
293 if (ndkwedges != oldcnt || dkwedges != oldarray) {
294 oldarray = NULL; /* already recycled */
295 goto out;
296 }
297
298 if (oldarray != NULL)
299 memcpy(newarray, dkwedges, ndkwedges * sizeof(*newarray));
300 dkwedges = newarray;
301 newarray = NULL; /* transferred to dkwedges */
302 ndkwedges = newcnt;
303
304 out: rw_exit(&dkwedges_lock);
305 if (oldarray != NULL)
306 free(oldarray, M_DKWEDGE);
307 if (newarray != NULL)
308 free(newarray, M_DKWEDGE);
309 rw_enter(&dkwedges_lock, RW_WRITER);
310 return 0;
311 }
312
313 static void
314 dkwedge_size_init(struct dkwedge_softc *sc, uint64_t size)
315 {
316
317 rw_init(&sc->sc_sizelock);
318 sc->sc_size = size;
319 }
320
321 static void
322 dkwedge_size_fini(struct dkwedge_softc *sc)
323 {
324
325 rw_destroy(&sc->sc_sizelock);
326 }
327
328 static uint64_t
329 dkwedge_size(struct dkwedge_softc *sc)
330 {
331 uint64_t size;
332
333 rw_enter(&sc->sc_sizelock, RW_READER);
334 size = sc->sc_size;
335 rw_exit(&sc->sc_sizelock);
336
337 return size;
338 }
339
340 static void
341 dkwedge_size_increase(struct dkwedge_softc *sc, uint64_t size)
342 {
343
344 KASSERT(mutex_owned(&sc->sc_parent->dk_openlock));
345
346 rw_enter(&sc->sc_sizelock, RW_WRITER);
347 KASSERTMSG(size >= sc->sc_size,
348 "decreasing dkwedge size from %"PRIu64" to %"PRIu64,
349 sc->sc_size, size);
350 sc->sc_size = size;
351 rw_exit(&sc->sc_sizelock);
352 }
353
354 static void
355 dk_set_geometry(struct dkwedge_softc *sc, struct disk *pdk)
356 {
357 struct disk *dk = &sc->sc_dk;
358 struct disk_geom *dg = &dk->dk_geom;
359
360 KASSERT(mutex_owned(&pdk->dk_openlock));
361
362 memset(dg, 0, sizeof(*dg));
363
364 dg->dg_secperunit = dkwedge_size(sc);
365 dg->dg_secsize = DEV_BSIZE << pdk->dk_blkshift;
366
367 /* fake numbers, 1 cylinder is 1 MB with default sector size */
368 dg->dg_nsectors = 32;
369 dg->dg_ntracks = 64;
370 dg->dg_ncylinders =
371 dg->dg_secperunit / (dg->dg_nsectors * dg->dg_ntracks);
372
373 disk_set_info(sc->sc_dev, dk, NULL);
374 }
375
376 /*
377 * dkwedge_add: [exported function]
378 *
379 * Add a disk wedge based on the provided information.
380 *
381 * The incoming dkw_devname[] is ignored, instead being
382 * filled in and returned to the caller.
383 */
384 int
385 dkwedge_add(struct dkwedge_info *dkw)
386 {
387 struct dkwedge_softc *sc, *lsc;
388 struct disk *pdk;
389 u_int unit;
390 int error;
391 dev_t pdev;
392 device_t dev __diagused;
393
394 dkw->dkw_parent[sizeof(dkw->dkw_parent) - 1] = '\0';
395 pdk = disk_find(dkw->dkw_parent);
396 if (pdk == NULL)
397 return ENXIO;
398
399 error = dkwedge_compute_pdev(pdk->dk_name, &pdev, VBLK);
400 if (error)
401 return error;
402
403 if (dkw->dkw_offset < 0)
404 return EINVAL;
405
406 /*
407 * Check for an existing wedge at the same disk offset. Allow
408 * updating a wedge if the only change is the size, and the new
409 * size is larger than the old.
410 */
411 sc = NULL;
412 mutex_enter(&pdk->dk_openlock);
413 LIST_FOREACH(lsc, &pdk->dk_wedges, sc_plink) {
414 if (lsc->sc_offset != dkw->dkw_offset)
415 continue;
416 if (strcmp(lsc->sc_wname, dkw->dkw_wname) != 0)
417 break;
418 if (strcmp(lsc->sc_ptype, dkw->dkw_ptype) != 0)
419 break;
420 if (dkwedge_size(lsc) > dkw->dkw_size)
421 break;
422 if (lsc->sc_dev == NULL)
423 break;
424
425 sc = lsc;
426 device_acquire(sc->sc_dev);
427 dkwedge_size_increase(sc, dkw->dkw_size);
428 dk_set_geometry(sc, pdk);
429
430 break;
431 }
432 mutex_exit(&pdk->dk_openlock);
433
434 if (sc != NULL)
435 goto announce;
436
437 sc = malloc(sizeof(*sc), M_DKWEDGE, M_WAITOK|M_ZERO);
438 sc->sc_state = DKW_STATE_LARVAL;
439 sc->sc_parent = pdk;
440 sc->sc_pdev = pdev;
441 sc->sc_offset = dkw->dkw_offset;
442 dkwedge_size_init(sc, dkw->dkw_size);
443
444 memcpy(sc->sc_wname, dkw->dkw_wname, sizeof(sc->sc_wname));
445 sc->sc_wname[sizeof(sc->sc_wname) - 1] = '\0';
446
447 memcpy(sc->sc_ptype, dkw->dkw_ptype, sizeof(sc->sc_ptype));
448 sc->sc_ptype[sizeof(sc->sc_ptype) - 1] = '\0';
449
450 bufq_alloc(&sc->sc_bufq, "fcfs", 0);
451
452 callout_init(&sc->sc_restart_ch, 0);
453 callout_setfunc(&sc->sc_restart_ch, dkrestart, sc);
454
455 mutex_init(&sc->sc_iolock, MUTEX_DEFAULT, IPL_BIO);
456
457 /*
458 * Wedge will be added; increment the wedge count for the parent.
459 * Only allow this to happen if RAW_PART is the only thing open.
460 */
461 mutex_enter(&pdk->dk_openlock);
462 if (pdk->dk_openmask & ~(1 << RAW_PART))
463 error = EBUSY;
464 else {
465 /* Check for wedge overlap. */
466 LIST_FOREACH(lsc, &pdk->dk_wedges, sc_plink) {
467 /* XXX arithmetic overflow */
468 uint64_t size = dkwedge_size(sc);
469 uint64_t lsize = dkwedge_size(lsc);
470 daddr_t lastblk = sc->sc_offset + size - 1;
471 daddr_t llastblk = lsc->sc_offset + lsize - 1;
472
473 if (sc->sc_offset >= lsc->sc_offset &&
474 sc->sc_offset <= llastblk) {
475 /* Overlaps the tail of the existing wedge. */
476 break;
477 }
478 if (lastblk >= lsc->sc_offset &&
479 lastblk <= llastblk) {
480 /* Overlaps the head of the existing wedge. */
481 break;
482 }
483 }
484 if (lsc != NULL) {
485 if (sc->sc_offset == lsc->sc_offset &&
486 dkwedge_size(sc) == dkwedge_size(lsc) &&
487 strcmp(sc->sc_wname, lsc->sc_wname) == 0)
488 error = EEXIST;
489 else
490 error = EINVAL;
491 } else {
492 pdk->dk_nwedges++;
493 LIST_INSERT_HEAD(&pdk->dk_wedges, sc, sc_plink);
494 }
495 }
496 mutex_exit(&pdk->dk_openlock);
497 if (error) {
498 mutex_destroy(&sc->sc_iolock);
499 bufq_free(sc->sc_bufq);
500 dkwedge_size_fini(sc);
501 free(sc, M_DKWEDGE);
502 return error;
503 }
504
505 /* Fill in our cfdata for the pseudo-device glue. */
506 sc->sc_cfdata.cf_name = dk_cd.cd_name;
507 sc->sc_cfdata.cf_atname = dk_ca.ca_name;
508 /* sc->sc_cfdata.cf_unit set below */
509 sc->sc_cfdata.cf_fstate = FSTATE_NOTFOUND; /* use chosen cf_unit */
510
511 /* Insert the larval wedge into the array. */
512 rw_enter(&dkwedges_lock, RW_WRITER);
513 for (error = 0;;) {
514 struct dkwedge_softc **scpp;
515
516 /*
517 * Check for a duplicate wname while searching for
518 * a slot.
519 */
520 for (scpp = NULL, unit = 0; unit < ndkwedges; unit++) {
521 if (dkwedges[unit] == NULL) {
522 if (scpp == NULL) {
523 scpp = &dkwedges[unit];
524 sc->sc_cfdata.cf_unit = unit;
525 }
526 } else {
527 /* XXX Unicode. */
528 if (strcmp(dkwedges[unit]->sc_wname,
529 sc->sc_wname) == 0) {
530 error = EEXIST;
531 break;
532 }
533 }
534 }
535 if (error)
536 break;
537 KASSERT(unit == ndkwedges);
538 if (scpp == NULL) {
539 error = dkwedge_array_expand();
540 if (error)
541 break;
542 } else {
543 KASSERT(scpp == &dkwedges[sc->sc_cfdata.cf_unit]);
544 *scpp = sc;
545 break;
546 }
547 }
548 rw_exit(&dkwedges_lock);
549 if (error) {
550 mutex_enter(&pdk->dk_openlock);
551 pdk->dk_nwedges--;
552 LIST_REMOVE(sc, sc_plink);
553 mutex_exit(&pdk->dk_openlock);
554
555 mutex_destroy(&sc->sc_iolock);
556 bufq_free(sc->sc_bufq);
557 dkwedge_size_fini(sc);
558 free(sc, M_DKWEDGE);
559 return error;
560 }
561
562 /*
563 * Now that we know the unit #, attach a pseudo-device for
564 * this wedge instance. This will provide us with the
565 * device_t necessary for glue to other parts of the system.
566 *
567 * This should never fail, unless we're almost totally out of
568 * memory.
569 */
570 if ((dev = config_attach_pseudo_acquire(&sc->sc_cfdata, sc)) == NULL) {
571 aprint_error("%s%u: unable to attach pseudo-device\n",
572 sc->sc_cfdata.cf_name, sc->sc_cfdata.cf_unit);
573
574 rw_enter(&dkwedges_lock, RW_WRITER);
575 KASSERT(dkwedges[sc->sc_cfdata.cf_unit] == sc);
576 dkwedges[sc->sc_cfdata.cf_unit] = NULL;
577 rw_exit(&dkwedges_lock);
578
579 mutex_enter(&pdk->dk_openlock);
580 pdk->dk_nwedges--;
581 LIST_REMOVE(sc, sc_plink);
582 mutex_exit(&pdk->dk_openlock);
583
584 mutex_destroy(&sc->sc_iolock);
585 bufq_free(sc->sc_bufq);
586 dkwedge_size_fini(sc);
587 free(sc, M_DKWEDGE);
588 return ENOMEM;
589 }
590
591 KASSERT(dev == sc->sc_dev);
592
593 announce:
594 /* Announce our arrival. */
595 aprint_normal(
596 "%s at %s: \"%s\", %"PRIu64" blocks at %"PRId64", type: %s\n",
597 device_xname(sc->sc_dev), pdk->dk_name,
598 sc->sc_wname, /* XXX Unicode */
599 dkwedge_size(sc), sc->sc_offset,
600 sc->sc_ptype[0] == '\0' ? "<unknown>" : sc->sc_ptype);
601
602 /* Return the devname to the caller. */
603 strlcpy(dkw->dkw_devname, device_xname(sc->sc_dev),
604 sizeof(dkw->dkw_devname));
605
606 device_release(sc->sc_dev);
607 return 0;
608 }
609
610 /*
611 * dkwedge_find_acquire:
612 *
613 * Lookup a disk wedge based on the provided information.
614 * NOTE: We look up the wedge based on the wedge devname,
615 * not wname.
616 *
617 * Return NULL if the wedge is not found, otherwise return
618 * the wedge's softc. Assign the wedge's unit number to unitp
619 * if unitp is not NULL. The wedge's sc_dev is referenced and
620 * must be released by device_release or equivalent.
621 */
622 static struct dkwedge_softc *
623 dkwedge_find_acquire(struct dkwedge_info *dkw, u_int *unitp)
624 {
625 struct dkwedge_softc *sc = NULL;
626 u_int unit;
627
628 /* Find our softc. */
629 dkw->dkw_devname[sizeof(dkw->dkw_devname) - 1] = '\0';
630 rw_enter(&dkwedges_lock, RW_READER);
631 for (unit = 0; unit < ndkwedges; unit++) {
632 if ((sc = dkwedges[unit]) != NULL &&
633 sc->sc_dev != NULL &&
634 strcmp(device_xname(sc->sc_dev), dkw->dkw_devname) == 0 &&
635 strcmp(sc->sc_parent->dk_name, dkw->dkw_parent) == 0) {
636 device_acquire(sc->sc_dev);
637 break;
638 }
639 }
640 rw_exit(&dkwedges_lock);
641 if (sc == NULL)
642 return NULL;
643
644 if (unitp != NULL)
645 *unitp = unit;
646
647 return sc;
648 }
649
650 /*
651 * dkwedge_del: [exported function]
652 *
653 * Delete a disk wedge based on the provided information.
654 * NOTE: We look up the wedge based on the wedge devname,
655 * not wname.
656 */
657 int
658 dkwedge_del(struct dkwedge_info *dkw)
659 {
660
661 return dkwedge_del1(dkw, 0);
662 }
663
664 int
665 dkwedge_del1(struct dkwedge_info *dkw, int flags)
666 {
667 struct dkwedge_softc *sc = NULL;
668
669 /* Find our softc. */
670 if ((sc = dkwedge_find_acquire(dkw, NULL)) == NULL)
671 return ESRCH;
672
673 return config_detach_release(sc->sc_dev, flags);
674 }
675
676 /*
677 * dkwedge_detach:
678 *
679 * Autoconfiguration detach function for pseudo-device glue.
680 */
681 static int
682 dkwedge_detach(device_t self, int flags)
683 {
684 struct dkwedge_softc *const sc = device_private(self);
685 const u_int unit = device_unit(self);
686 int bmaj, cmaj, error;
687
688 error = disk_begindetach(&sc->sc_dk, /*lastclose*/NULL, self, flags);
689 if (error)
690 return error;
691
692 /* Mark the wedge as dying. */
693 sc->sc_state = DKW_STATE_DYING;
694
695 pmf_device_deregister(self);
696
697 /* Kill any pending restart. */
698 mutex_enter(&sc->sc_iolock);
699 sc->sc_iostop = true;
700 mutex_exit(&sc->sc_iolock);
701 callout_halt(&sc->sc_restart_ch, NULL);
702
703 /* Locate the wedge major numbers. */
704 bmaj = bdevsw_lookup_major(&dk_bdevsw);
705 cmaj = cdevsw_lookup_major(&dk_cdevsw);
706
707 /* Nuke the vnodes for any open instances. */
708 vdevgone(bmaj, unit, unit, VBLK);
709 vdevgone(cmaj, unit, unit, VCHR);
710
711 /*
712 * At this point, all block device opens have been closed,
713 * synchronously flushing any buffered writes; and all
714 * character device I/O operations have completed
715 * synchronously, and character device opens have been closed.
716 *
717 * So there can be no more opens or queued buffers by now.
718 */
719 KASSERT(sc->sc_dk.dk_openmask == 0);
720 KASSERT(bufq_peek(sc->sc_bufq) == NULL);
721 bufq_drain(sc->sc_bufq);
722
723 /* Announce our departure. */
724 aprint_normal("%s at %s (%s) deleted\n", device_xname(sc->sc_dev),
725 sc->sc_parent->dk_name,
726 sc->sc_wname); /* XXX Unicode */
727
728 mutex_enter(&sc->sc_parent->dk_openlock);
729 sc->sc_parent->dk_nwedges--;
730 LIST_REMOVE(sc, sc_plink);
731 mutex_exit(&sc->sc_parent->dk_openlock);
732
733 /* Delete our buffer queue. */
734 bufq_free(sc->sc_bufq);
735
736 /* Detach from the disk list. */
737 disk_detach(&sc->sc_dk);
738 disk_destroy(&sc->sc_dk);
739
740 /* Poof. */
741 rw_enter(&dkwedges_lock, RW_WRITER);
742 KASSERT(dkwedges[unit] == sc);
743 dkwedges[unit] = NULL;
744 sc->sc_state = DKW_STATE_DEAD;
745 rw_exit(&dkwedges_lock);
746
747 mutex_destroy(&sc->sc_iolock);
748 dkwedge_size_fini(sc);
749
750 free(sc, M_DKWEDGE);
751
752 return 0;
753 }
754
755 /*
756 * dkwedge_delall: [exported function]
757 *
758 * Forcibly delete all of the wedges on the specified disk. Used
759 * when a disk is being detached.
760 */
761 void
762 dkwedge_delall(struct disk *pdk)
763 {
764
765 dkwedge_delall1(pdk, /*idleonly*/false);
766 }
767
768 /*
769 * dkwedge_delidle: [exported function]
770 *
771 * Delete all of the wedges on the specified disk if idle. Used
772 * by ioctl(DIOCRMWEDGES).
773 */
774 void
775 dkwedge_delidle(struct disk *pdk)
776 {
777
778 dkwedge_delall1(pdk, /*idleonly*/true);
779 }
780
781 static void
782 dkwedge_delall1(struct disk *pdk, bool idleonly)
783 {
784 struct dkwedge_softc *sc;
785 int flags;
786
787 flags = DETACH_QUIET;
788 if (!idleonly)
789 flags |= DETACH_FORCE;
790
791 for (;;) {
792 mutex_enter(&pdk->dk_rawlock); /* for sc->sc_dk.dk_openmask */
793 mutex_enter(&pdk->dk_openlock);
794 LIST_FOREACH(sc, &pdk->dk_wedges, sc_plink) {
795 /*
796 * Wedge is not yet created. This is a race --
797 * it may as well have been added just after we
798 * deleted all the wedges, so pretend it's not
799 * here yet.
800 */
801 if (sc->sc_dev == NULL)
802 continue;
803 if (!idleonly || sc->sc_dk.dk_openmask == 0) {
804 device_acquire(sc->sc_dev);
805 break;
806 }
807 }
808 if (sc == NULL) {
809 KASSERT(idleonly || pdk->dk_nwedges == 0);
810 mutex_exit(&pdk->dk_openlock);
811 mutex_exit(&pdk->dk_rawlock);
812 return;
813 }
814 mutex_exit(&pdk->dk_openlock);
815 mutex_exit(&pdk->dk_rawlock);
816 (void)config_detach_release(sc->sc_dev, flags);
817 }
818 }
819
820 /*
821 * dkwedge_list: [exported function]
822 *
823 * List all of the wedges on a particular disk.
824 */
825 int
826 dkwedge_list(struct disk *pdk, struct dkwedge_list *dkwl, struct lwp *l)
827 {
828 struct uio uio;
829 struct iovec iov;
830 struct dkwedge_softc *sc;
831 struct dkwedge_info dkw;
832 int error = 0;
833
834 iov.iov_base = dkwl->dkwl_buf;
835 iov.iov_len = dkwl->dkwl_bufsize;
836
837 uio.uio_iov = &iov;
838 uio.uio_iovcnt = 1;
839 uio.uio_offset = 0;
840 uio.uio_resid = dkwl->dkwl_bufsize;
841 uio.uio_rw = UIO_READ;
842 KASSERT(l == curlwp);
843 uio.uio_vmspace = l->l_proc->p_vmspace;
844
845 dkwl->dkwl_ncopied = 0;
846
847 mutex_enter(&pdk->dk_openlock);
848 LIST_FOREACH(sc, &pdk->dk_wedges, sc_plink) {
849 if (uio.uio_resid < sizeof(dkw))
850 break;
851
852 if (sc->sc_dev == NULL)
853 continue;
854
855 strlcpy(dkw.dkw_devname, device_xname(sc->sc_dev),
856 sizeof(dkw.dkw_devname));
857 memcpy(dkw.dkw_wname, sc->sc_wname, sizeof(dkw.dkw_wname));
858 dkw.dkw_wname[sizeof(dkw.dkw_wname) - 1] = '\0';
859 strlcpy(dkw.dkw_parent, sc->sc_parent->dk_name,
860 sizeof(dkw.dkw_parent));
861 dkw.dkw_offset = sc->sc_offset;
862 dkw.dkw_size = dkwedge_size(sc);
863 strlcpy(dkw.dkw_ptype, sc->sc_ptype, sizeof(dkw.dkw_ptype));
864
865 /*
866 * Acquire a device reference so this wedge doesn't go
867 * away before our next iteration in LIST_FOREACH, and
868 * then release the lock for uiomove.
869 */
870 device_acquire(sc->sc_dev);
871 mutex_exit(&pdk->dk_openlock);
872 error = uiomove(&dkw, sizeof(dkw), &uio);
873 mutex_enter(&pdk->dk_openlock);
874 device_release(sc->sc_dev);
875 if (error)
876 break;
877
878 dkwl->dkwl_ncopied++;
879 }
880 dkwl->dkwl_nwedges = pdk->dk_nwedges;
881 mutex_exit(&pdk->dk_openlock);
882
883 return error;
884 }
885
886 static device_t
887 dkwedge_find_by_wname_acquire(const char *wname)
888 {
889 device_t dv = NULL;
890 struct dkwedge_softc *sc;
891 int i;
892
893 rw_enter(&dkwedges_lock, RW_READER);
894 for (i = 0; i < ndkwedges; i++) {
895 if ((sc = dkwedges[i]) == NULL || sc->sc_dev == NULL)
896 continue;
897 if (strcmp(sc->sc_wname, wname) == 0) {
898 if (dv != NULL) {
899 printf(
900 "WARNING: double match for wedge name %s "
901 "(%s, %s)\n", wname, device_xname(dv),
902 device_xname(sc->sc_dev));
903 continue;
904 }
905 device_acquire(sc->sc_dev);
906 dv = sc->sc_dev;
907 }
908 }
909 rw_exit(&dkwedges_lock);
910 return dv;
911 }
912
913 static device_t
914 dkwedge_find_by_parent_acquire(const char *name, size_t *i)
915 {
916
917 rw_enter(&dkwedges_lock, RW_READER);
918 for (; *i < (size_t)ndkwedges; (*i)++) {
919 struct dkwedge_softc *sc;
920 if ((sc = dkwedges[*i]) == NULL || sc->sc_dev == NULL)
921 continue;
922 if (strcmp(sc->sc_parent->dk_name, name) != 0)
923 continue;
924 device_acquire(sc->sc_dev);
925 rw_exit(&dkwedges_lock);
926 return sc->sc_dev;
927 }
928 rw_exit(&dkwedges_lock);
929 return NULL;
930 }
931
932 /* XXX unsafe */
933 device_t
934 dkwedge_find_by_wname(const char *wname)
935 {
936 device_t dv;
937
938 if ((dv = dkwedge_find_by_wname_acquire(wname)) == NULL)
939 return NULL;
940 device_release(dv);
941 return dv;
942 }
943
944 /* XXX unsafe */
945 device_t
946 dkwedge_find_by_parent(const char *name, size_t *i)
947 {
948 device_t dv;
949
950 if ((dv = dkwedge_find_by_parent_acquire(name, i)) == NULL)
951 return NULL;
952 device_release(dv);
953 return dv;
954 }
955
956 void
957 dkwedge_print_wnames(void)
958 {
959 struct dkwedge_softc *sc;
960 int i;
961
962 rw_enter(&dkwedges_lock, RW_READER);
963 for (i = 0; i < ndkwedges; i++) {
964 if ((sc = dkwedges[i]) == NULL || sc->sc_dev == NULL)
965 continue;
966 printf(" wedge:%s", sc->sc_wname);
967 }
968 rw_exit(&dkwedges_lock);
969 }
970
971 /*
972 * We need a dummy object to stuff into the dkwedge discovery method link
973 * set to ensure that there is always at least one object in the set.
974 */
975 static struct dkwedge_discovery_method dummy_discovery_method;
976 __link_set_add_bss(dkwedge_methods, dummy_discovery_method);
977
978 /*
979 * dkwedge_init:
980 *
981 * Initialize the disk wedge subsystem.
982 */
983 void
984 dkwedge_init(void)
985 {
986 __link_set_decl(dkwedge_methods, struct dkwedge_discovery_method);
987 struct dkwedge_discovery_method * const *ddmp;
988 struct dkwedge_discovery_method *lddm, *ddm;
989
990 rw_init(&dkwedges_lock);
991 rw_init(&dkwedge_discovery_methods_lock);
992
993 if (config_cfdriver_attach(&dk_cd) != 0)
994 panic("dkwedge: unable to attach cfdriver");
995 if (config_cfattach_attach(dk_cd.cd_name, &dk_ca) != 0)
996 panic("dkwedge: unable to attach cfattach");
997
998 rw_enter(&dkwedge_discovery_methods_lock, RW_WRITER);
999
1000 LIST_INIT(&dkwedge_discovery_methods);
1001
1002 __link_set_foreach(ddmp, dkwedge_methods) {
1003 ddm = *ddmp;
1004 if (ddm == &dummy_discovery_method)
1005 continue;
1006 if (LIST_EMPTY(&dkwedge_discovery_methods)) {
1007 LIST_INSERT_HEAD(&dkwedge_discovery_methods,
1008 ddm, ddm_list);
1009 continue;
1010 }
1011 LIST_FOREACH(lddm, &dkwedge_discovery_methods, ddm_list) {
1012 if (ddm->ddm_priority == lddm->ddm_priority) {
1013 aprint_error("dk-method-%s: method \"%s\" "
1014 "already exists at priority %d\n",
1015 ddm->ddm_name, lddm->ddm_name,
1016 lddm->ddm_priority);
1017 /* Not inserted. */
1018 break;
1019 }
1020 if (ddm->ddm_priority < lddm->ddm_priority) {
1021 /* Higher priority; insert before. */
1022 LIST_INSERT_BEFORE(lddm, ddm, ddm_list);
1023 break;
1024 }
1025 if (LIST_NEXT(lddm, ddm_list) == NULL) {
1026 /* Last one; insert after. */
1027 KASSERT(lddm->ddm_priority < ddm->ddm_priority);
1028 LIST_INSERT_AFTER(lddm, ddm, ddm_list);
1029 break;
1030 }
1031 }
1032 }
1033
1034 rw_exit(&dkwedge_discovery_methods_lock);
1035 }
1036
1037 #ifdef DKWEDGE_AUTODISCOVER
1038 int dkwedge_autodiscover = 1;
1039 #else
1040 int dkwedge_autodiscover = 0;
1041 #endif
1042
1043 /*
1044 * dkwedge_discover: [exported function]
1045 *
1046 * Discover the wedges on a newly attached disk.
1047 * Remove all unused wedges on the disk first.
1048 */
1049 void
1050 dkwedge_discover(struct disk *pdk)
1051 {
1052 struct dkwedge_discovery_method *ddm;
1053 struct vnode *vp;
1054 int error;
1055 dev_t pdev;
1056
1057 /*
1058 * Require people playing with wedges to enable this explicitly.
1059 */
1060 if (dkwedge_autodiscover == 0)
1061 return;
1062
1063 rw_enter(&dkwedge_discovery_methods_lock, RW_READER);
1064
1065 /*
1066 * Use the character device for scanning, the block device
1067 * is busy if there are already wedges attached.
1068 */
1069 error = dkwedge_compute_pdev(pdk->dk_name, &pdev, VCHR);
1070 if (error) {
1071 aprint_error("%s: unable to compute pdev, error = %d\n",
1072 pdk->dk_name, error);
1073 goto out;
1074 }
1075
1076 error = cdevvp(pdev, &vp);
1077 if (error) {
1078 aprint_error("%s: unable to find vnode for pdev, error = %d\n",
1079 pdk->dk_name, error);
1080 goto out;
1081 }
1082
1083 error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1084 if (error) {
1085 aprint_error("%s: unable to lock vnode for pdev, error = %d\n",
1086 pdk->dk_name, error);
1087 vrele(vp);
1088 goto out;
1089 }
1090
1091 error = VOP_OPEN(vp, FREAD | FSILENT, NOCRED);
1092 if (error) {
1093 if (error != ENXIO)
1094 aprint_error("%s: unable to open device, error = %d\n",
1095 pdk->dk_name, error);
1096 vput(vp);
1097 goto out;
1098 }
1099 VOP_UNLOCK(vp);
1100
1101 /*
1102 * Remove unused wedges
1103 */
1104 dkwedge_delidle(pdk);
1105
1106 /*
1107 * For each supported partition map type, look to see if
1108 * this map type exists. If so, parse it and add the
1109 * corresponding wedges.
1110 */
1111 LIST_FOREACH(ddm, &dkwedge_discovery_methods, ddm_list) {
1112 error = (*ddm->ddm_discover)(pdk, vp);
1113 if (error == 0) {
1114 /* Successfully created wedges; we're done. */
1115 break;
1116 }
1117 }
1118
1119 error = vn_close(vp, FREAD, NOCRED);
1120 if (error) {
1121 aprint_error("%s: unable to close device, error = %d\n",
1122 pdk->dk_name, error);
1123 /* We'll just assume the vnode has been cleaned up. */
1124 }
1125
1126 out:
1127 rw_exit(&dkwedge_discovery_methods_lock);
1128 }
1129
1130 /*
1131 * dkwedge_read:
1132 *
1133 * Read some data from the specified disk, used for
1134 * partition discovery.
1135 */
1136 int
1137 dkwedge_read(struct disk *pdk, struct vnode *vp, daddr_t blkno,
1138 void *tbuf, size_t len)
1139 {
1140 buf_t *bp;
1141 int error;
1142 bool isopen;
1143 dev_t bdev;
1144 struct vnode *bdvp;
1145
1146 /*
1147 * The kernel cannot read from a character device vnode
1148 * as physio() only handles user memory.
1149 *
1150 * If the block device has already been opened by a wedge
1151 * use that vnode and temporarily bump the open counter.
1152 *
1153 * Otherwise try to open the block device.
1154 */
1155
1156 bdev = devsw_chr2blk(vp->v_rdev);
1157
1158 mutex_enter(&pdk->dk_rawlock);
1159 if (pdk->dk_rawopens != 0) {
1160 KASSERT(pdk->dk_rawvp != NULL);
1161 isopen = true;
1162 ++pdk->dk_rawopens;
1163 bdvp = pdk->dk_rawvp;
1164 error = 0;
1165 } else {
1166 isopen = false;
1167 error = dk_open_parent(bdev, FREAD, &bdvp);
1168 }
1169 mutex_exit(&pdk->dk_rawlock);
1170
1171 if (error)
1172 return error;
1173
1174 bp = getiobuf(bdvp, true);
1175 bp->b_flags = B_READ;
1176 bp->b_cflags = BC_BUSY;
1177 bp->b_dev = bdev;
1178 bp->b_data = tbuf;
1179 bp->b_bufsize = bp->b_bcount = len;
1180 bp->b_blkno = blkno;
1181 bp->b_cylinder = 0;
1182 bp->b_error = 0;
1183
1184 VOP_STRATEGY(bdvp, bp);
1185 error = biowait(bp);
1186 putiobuf(bp);
1187
1188 mutex_enter(&pdk->dk_rawlock);
1189 if (isopen) {
1190 --pdk->dk_rawopens;
1191 } else {
1192 dk_close_parent(bdvp, FREAD);
1193 }
1194 mutex_exit(&pdk->dk_rawlock);
1195
1196 return error;
1197 }
1198
1199 /*
1200 * dkwedge_lookup:
1201 *
1202 * Look up a dkwedge_softc based on the provided dev_t.
1203 *
1204 * Caller must guarantee the wedge is referenced.
1205 */
1206 static struct dkwedge_softc *
1207 dkwedge_lookup(dev_t dev)
1208 {
1209
1210 return device_lookup_private(&dk_cd, minor(dev));
1211 }
1212
1213 static struct dkwedge_softc *
1214 dkwedge_lookup_acquire(dev_t dev)
1215 {
1216 device_t dv = device_lookup_acquire(&dk_cd, minor(dev));
1217
1218 if (dv == NULL)
1219 return NULL;
1220 return device_private(dv);
1221 }
1222
1223 static int
1224 dk_open_parent(dev_t dev, int mode, struct vnode **vpp)
1225 {
1226 struct vnode *vp;
1227 int error;
1228
1229 error = bdevvp(dev, &vp);
1230 if (error)
1231 return error;
1232
1233 error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1234 if (error) {
1235 vrele(vp);
1236 return error;
1237 }
1238 error = VOP_OPEN(vp, mode, NOCRED);
1239 if (error) {
1240 vput(vp);
1241 return error;
1242 }
1243
1244 /* VOP_OPEN() doesn't do this for us. */
1245 if (mode & FWRITE) {
1246 mutex_enter(vp->v_interlock);
1247 vp->v_writecount++;
1248 mutex_exit(vp->v_interlock);
1249 }
1250
1251 VOP_UNLOCK(vp);
1252
1253 *vpp = vp;
1254
1255 return 0;
1256 }
1257
1258 static int
1259 dk_close_parent(struct vnode *vp, int mode)
1260 {
1261 int error;
1262
1263 error = vn_close(vp, mode, NOCRED);
1264 return error;
1265 }
1266
1267 /*
1268 * dkopen: [devsw entry point]
1269 *
1270 * Open a wedge.
1271 */
1272 static int
1273 dkopen(dev_t dev, int flags, int fmt, struct lwp *l)
1274 {
1275 struct dkwedge_softc *sc = dkwedge_lookup(dev);
1276 int error = 0;
1277
1278 if (sc == NULL)
1279 return ENXIO;
1280 KASSERT(sc->sc_dev != NULL);
1281 KASSERT(sc->sc_state == DKW_STATE_RUNNING);
1282
1283 /*
1284 * We go through a complicated little dance to only open the parent
1285 * vnode once per wedge, no matter how many times the wedge is
1286 * opened. The reason? We see one dkopen() per open call, but
1287 * only dkclose() on the last close.
1288 */
1289 mutex_enter(&sc->sc_dk.dk_openlock);
1290 mutex_enter(&sc->sc_parent->dk_rawlock);
1291 if (sc->sc_dk.dk_openmask == 0) {
1292 error = dkfirstopen(sc, flags);
1293 if (error)
1294 goto out;
1295 } else if (flags & ~sc->sc_mode & FWRITE) {
1296 /*
1297 * The parent is already open, but the previous attempt
1298 * to open it read/write failed and fell back to
1299 * read-only. In that case, we assume the medium is
1300 * read-only and fail to open the wedge read/write.
1301 */
1302 error = EROFS;
1303 goto out;
1304 }
1305 KASSERT(sc->sc_mode != 0);
1306 KASSERTMSG(sc->sc_mode & FREAD, "%s: sc_mode=%x",
1307 device_xname(sc->sc_dev), sc->sc_mode);
1308 KASSERTMSG((flags & FWRITE) ? (sc->sc_mode & FWRITE) : 1,
1309 "%s: flags=%x sc_mode=%x",
1310 device_xname(sc->sc_dev), flags, sc->sc_mode);
1311 if (fmt == S_IFCHR)
1312 sc->sc_dk.dk_copenmask |= 1;
1313 else
1314 sc->sc_dk.dk_bopenmask |= 1;
1315 sc->sc_dk.dk_openmask =
1316 sc->sc_dk.dk_copenmask | sc->sc_dk.dk_bopenmask;
1317
1318 out: mutex_exit(&sc->sc_parent->dk_rawlock);
1319 mutex_exit(&sc->sc_dk.dk_openlock);
1320 return error;
1321 }
1322
1323 static int
1324 dkfirstopen(struct dkwedge_softc *sc, int flags)
1325 {
1326 struct dkwedge_softc *nsc;
1327 struct vnode *vp;
1328 int mode;
1329 int error;
1330
1331 KASSERT(mutex_owned(&sc->sc_dk.dk_openlock));
1332 KASSERT(mutex_owned(&sc->sc_parent->dk_rawlock));
1333
1334 if (sc->sc_parent->dk_rawopens == 0) {
1335 KASSERT(sc->sc_parent->dk_rawvp == NULL);
1336 /*
1337 * Try open read-write. If this fails for EROFS
1338 * and wedge is read-only, retry to open read-only.
1339 */
1340 mode = FREAD | FWRITE;
1341 error = dk_open_parent(sc->sc_pdev, mode, &vp);
1342 if (error == EROFS && (flags & FWRITE) == 0) {
1343 mode &= ~FWRITE;
1344 error = dk_open_parent(sc->sc_pdev, mode, &vp);
1345 }
1346 if (error)
1347 return error;
1348 KASSERT(vp != NULL);
1349 sc->sc_parent->dk_rawvp = vp;
1350 } else {
1351 /*
1352 * Retrieve mode from an already opened wedge.
1353 *
1354 * At this point, dk_rawopens is bounded by the number
1355 * of dkwedge devices in the system, which is limited
1356 * by autoconf device numbering to INT_MAX. Since
1357 * dk_rawopens is unsigned, this can't overflow.
1358 */
1359 KASSERT(sc->sc_parent->dk_rawopens < UINT_MAX);
1360 KASSERT(sc->sc_parent->dk_rawvp != NULL);
1361 mode = 0;
1362 mutex_enter(&sc->sc_parent->dk_openlock);
1363 LIST_FOREACH(nsc, &sc->sc_parent->dk_wedges, sc_plink) {
1364 if (nsc == sc || nsc->sc_dk.dk_openmask == 0)
1365 continue;
1366 mode = nsc->sc_mode;
1367 break;
1368 }
1369 mutex_exit(&sc->sc_parent->dk_openlock);
1370 }
1371 sc->sc_mode = mode;
1372 sc->sc_parent->dk_rawopens++;
1373
1374 return 0;
1375 }
1376
1377 static void
1378 dklastclose(struct dkwedge_softc *sc)
1379 {
1380
1381 KASSERT(mutex_owned(&sc->sc_dk.dk_openlock));
1382 KASSERT(mutex_owned(&sc->sc_parent->dk_rawlock));
1383 KASSERT(sc->sc_parent->dk_rawopens > 0);
1384 KASSERT(sc->sc_parent->dk_rawvp != NULL);
1385
1386 if (--sc->sc_parent->dk_rawopens == 0) {
1387 struct vnode *const vp = sc->sc_parent->dk_rawvp;
1388 const int mode = sc->sc_mode;
1389
1390 sc->sc_parent->dk_rawvp = NULL;
1391 sc->sc_mode = 0;
1392
1393 dk_close_parent(vp, mode);
1394 }
1395 }
1396
1397 /*
1398 * dkclose: [devsw entry point]
1399 *
1400 * Close a wedge.
1401 */
1402 static int
1403 dkclose(dev_t dev, int flags, int fmt, struct lwp *l)
1404 {
1405 struct dkwedge_softc *sc = dkwedge_lookup(dev);
1406
1407 /*
1408 * dkclose can be called even if dkopen didn't succeed, so we
1409 * have to handle the same possibility that the wedge may not
1410 * exist.
1411 */
1412 if (sc == NULL)
1413 return ENXIO;
1414 KASSERT(sc->sc_dev != NULL);
1415 KASSERT(sc->sc_state != DKW_STATE_LARVAL);
1416 KASSERT(sc->sc_state != DKW_STATE_DEAD);
1417
1418 mutex_enter(&sc->sc_dk.dk_openlock);
1419 mutex_enter(&sc->sc_parent->dk_rawlock);
1420
1421 KASSERT(sc->sc_dk.dk_openmask != 0);
1422
1423 if (fmt == S_IFCHR)
1424 sc->sc_dk.dk_copenmask &= ~1;
1425 else
1426 sc->sc_dk.dk_bopenmask &= ~1;
1427 sc->sc_dk.dk_openmask =
1428 sc->sc_dk.dk_copenmask | sc->sc_dk.dk_bopenmask;
1429
1430 if (sc->sc_dk.dk_openmask == 0) {
1431 dklastclose(sc);
1432 }
1433
1434 mutex_exit(&sc->sc_parent->dk_rawlock);
1435 mutex_exit(&sc->sc_dk.dk_openlock);
1436
1437 return 0;
1438 }
1439
1440 /*
1441 * dkcancel: [devsw entry point]
1442 *
1443 * Cancel any pending I/O operations waiting on a wedge.
1444 */
1445 static int
1446 dkcancel(dev_t dev, int flags, int fmt, struct lwp *l)
1447 {
1448 struct dkwedge_softc *sc = dkwedge_lookup(dev);
1449
1450 KASSERT(sc != NULL);
1451 KASSERT(sc->sc_dev != NULL);
1452 KASSERT(sc->sc_state != DKW_STATE_LARVAL);
1453 KASSERT(sc->sc_state != DKW_STATE_DEAD);
1454
1455 /*
1456 * Disk I/O is expected to complete or fail within a reasonable
1457 * timeframe -- it's storage, not communication. Further, the
1458 * character and block device interface guarantees that prior
1459 * reads and writes have completed or failed by the time close
1460 * returns -- we are not to cancel them here. If the parent
1461 * device's hardware is gone, the parent driver can make them
1462 * fail. Nothing for dk(4) itself to do.
1463 */
1464
1465 return 0;
1466 }
1467
1468 /*
1469 * dkstrategy: [devsw entry point]
1470 *
1471 * Perform I/O based on the wedge I/O strategy.
1472 */
1473 static void
1474 dkstrategy(struct buf *bp)
1475 {
1476 struct dkwedge_softc *sc = dkwedge_lookup(bp->b_dev);
1477 uint64_t p_size, p_offset;
1478
1479 KASSERT(sc != NULL);
1480 KASSERT(sc->sc_state != DKW_STATE_LARVAL);
1481 KASSERT(sc->sc_state != DKW_STATE_DEAD);
1482 KASSERT(sc->sc_parent->dk_rawvp != NULL);
1483
1484 /* If it's an empty transfer, wake up the top half now. */
1485 if (bp->b_bcount == 0)
1486 goto done;
1487
1488 p_offset = sc->sc_offset << sc->sc_parent->dk_blkshift;
1489 p_size = dkwedge_size(sc) << sc->sc_parent->dk_blkshift;
1490
1491 /* Make sure it's in-range. */
1492 if (bounds_check_with_mediasize(bp, DEV_BSIZE, p_size) <= 0)
1493 goto done;
1494
1495 /* Translate it to the parent's raw LBA. */
1496 bp->b_rawblkno = bp->b_blkno + p_offset;
1497
1498 /* Place it in the queue and start I/O on the unit. */
1499 mutex_enter(&sc->sc_iolock);
1500 disk_wait(&sc->sc_dk);
1501 bufq_put(sc->sc_bufq, bp);
1502 mutex_exit(&sc->sc_iolock);
1503
1504 dkstart(sc);
1505 return;
1506
1507 done:
1508 bp->b_resid = bp->b_bcount;
1509 biodone(bp);
1510 }
1511
1512 /*
1513 * dkstart:
1514 *
1515 * Start I/O that has been enqueued on the wedge.
1516 */
1517 static void
1518 dkstart(struct dkwedge_softc *sc)
1519 {
1520 struct vnode *vp;
1521 struct buf *bp, *nbp;
1522
1523 mutex_enter(&sc->sc_iolock);
1524
1525 /* Do as much work as has been enqueued. */
1526 while ((bp = bufq_peek(sc->sc_bufq)) != NULL) {
1527 if (sc->sc_iostop) {
1528 (void) bufq_get(sc->sc_bufq);
1529 mutex_exit(&sc->sc_iolock);
1530 bp->b_error = ENXIO;
1531 bp->b_resid = bp->b_bcount;
1532 biodone(bp);
1533 mutex_enter(&sc->sc_iolock);
1534 continue;
1535 }
1536
1537 /* fetch an I/O buf with sc_iolock dropped */
1538 mutex_exit(&sc->sc_iolock);
1539 nbp = getiobuf(sc->sc_parent->dk_rawvp, false);
1540 mutex_enter(&sc->sc_iolock);
1541 if (nbp == NULL) {
1542 /*
1543 * No resources to run this request; leave the
1544 * buffer queued up, and schedule a timer to
1545 * restart the queue in 1/2 a second.
1546 */
1547 if (!sc->sc_iostop)
1548 callout_schedule(&sc->sc_restart_ch, hz/2);
1549 break;
1550 }
1551
1552 /*
1553 * fetch buf, this can fail if another thread
1554 * has already processed the queue, it can also
1555 * return a completely different buf.
1556 */
1557 bp = bufq_get(sc->sc_bufq);
1558 if (bp == NULL) {
1559 mutex_exit(&sc->sc_iolock);
1560 putiobuf(nbp);
1561 mutex_enter(&sc->sc_iolock);
1562 continue;
1563 }
1564
1565 /* Instrumentation. */
1566 disk_busy(&sc->sc_dk);
1567
1568 /* release lock for VOP_STRATEGY */
1569 mutex_exit(&sc->sc_iolock);
1570
1571 nbp->b_data = bp->b_data;
1572 nbp->b_flags = bp->b_flags;
1573 nbp->b_oflags = bp->b_oflags;
1574 nbp->b_cflags = bp->b_cflags;
1575 nbp->b_iodone = dkiodone;
1576 nbp->b_proc = bp->b_proc;
1577 nbp->b_blkno = bp->b_rawblkno;
1578 nbp->b_dev = sc->sc_parent->dk_rawvp->v_rdev;
1579 nbp->b_bcount = bp->b_bcount;
1580 nbp->b_private = bp;
1581 BIO_COPYPRIO(nbp, bp);
1582
1583 vp = nbp->b_vp;
1584 if ((nbp->b_flags & B_READ) == 0) {
1585 mutex_enter(vp->v_interlock);
1586 vp->v_numoutput++;
1587 mutex_exit(vp->v_interlock);
1588 }
1589 VOP_STRATEGY(vp, nbp);
1590
1591 mutex_enter(&sc->sc_iolock);
1592 }
1593
1594 mutex_exit(&sc->sc_iolock);
1595 }
1596
1597 /*
1598 * dkiodone:
1599 *
1600 * I/O to a wedge has completed; alert the top half.
1601 */
1602 static void
1603 dkiodone(struct buf *bp)
1604 {
1605 struct buf *obp = bp->b_private;
1606 struct dkwedge_softc *sc = dkwedge_lookup(obp->b_dev);
1607
1608 if (bp->b_error != 0)
1609 obp->b_error = bp->b_error;
1610 obp->b_resid = bp->b_resid;
1611 putiobuf(bp);
1612
1613 mutex_enter(&sc->sc_iolock);
1614 disk_unbusy(&sc->sc_dk, obp->b_bcount - obp->b_resid,
1615 obp->b_flags & B_READ);
1616 mutex_exit(&sc->sc_iolock);
1617
1618 biodone(obp);
1619
1620 /* Kick the queue in case there is more work we can do. */
1621 dkstart(sc);
1622 }
1623
1624 /*
1625 * dkrestart:
1626 *
1627 * Restart the work queue after it was stalled due to
1628 * a resource shortage. Invoked via a callout.
1629 */
1630 static void
1631 dkrestart(void *v)
1632 {
1633 struct dkwedge_softc *sc = v;
1634
1635 dkstart(sc);
1636 }
1637
1638 /*
1639 * dkminphys:
1640 *
1641 * Call parent's minphys function.
1642 */
1643 static void
1644 dkminphys(struct buf *bp)
1645 {
1646 struct dkwedge_softc *sc = dkwedge_lookup(bp->b_dev);
1647 dev_t dev;
1648
1649 dev = bp->b_dev;
1650 bp->b_dev = sc->sc_pdev;
1651 if (sc->sc_parent->dk_driver && sc->sc_parent->dk_driver->d_minphys)
1652 (*sc->sc_parent->dk_driver->d_minphys)(bp);
1653 else
1654 minphys(bp);
1655 bp->b_dev = dev;
1656 }
1657
1658 /*
1659 * dkread: [devsw entry point]
1660 *
1661 * Read from a wedge.
1662 */
1663 static int
1664 dkread(dev_t dev, struct uio *uio, int flags)
1665 {
1666 struct dkwedge_softc *sc __diagused = dkwedge_lookup(dev);
1667
1668 KASSERT(sc != NULL);
1669 KASSERT(sc->sc_state != DKW_STATE_LARVAL);
1670 KASSERT(sc->sc_state != DKW_STATE_DEAD);
1671
1672 return physio(dkstrategy, NULL, dev, B_READ, dkminphys, uio);
1673 }
1674
1675 /*
1676 * dkwrite: [devsw entry point]
1677 *
1678 * Write to a wedge.
1679 */
1680 static int
1681 dkwrite(dev_t dev, struct uio *uio, int flags)
1682 {
1683 struct dkwedge_softc *sc __diagused = dkwedge_lookup(dev);
1684
1685 KASSERT(sc != NULL);
1686 KASSERT(sc->sc_state != DKW_STATE_LARVAL);
1687 KASSERT(sc->sc_state != DKW_STATE_DEAD);
1688
1689 return physio(dkstrategy, NULL, dev, B_WRITE, dkminphys, uio);
1690 }
1691
1692 /*
1693 * dkioctl: [devsw entry point]
1694 *
1695 * Perform an ioctl request on a wedge.
1696 */
1697 static int
1698 dkioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
1699 {
1700 struct dkwedge_softc *sc = dkwedge_lookup(dev);
1701 int error = 0;
1702
1703 KASSERT(sc != NULL);
1704 KASSERT(sc->sc_state != DKW_STATE_LARVAL);
1705 KASSERT(sc->sc_state != DKW_STATE_DEAD);
1706 KASSERT(sc->sc_parent->dk_rawvp != NULL);
1707
1708 /*
1709 * We pass NODEV instead of our device to indicate we don't
1710 * want to handle disklabel ioctls
1711 */
1712 error = disk_ioctl(&sc->sc_dk, NODEV, cmd, data, flag, l);
1713 if (error != EPASSTHROUGH)
1714 return error;
1715
1716 error = 0;
1717
1718 switch (cmd) {
1719 case DIOCGSTRATEGY:
1720 case DIOCGCACHE:
1721 case DIOCCACHESYNC:
1722 error = VOP_IOCTL(sc->sc_parent->dk_rawvp, cmd, data, flag,
1723 l != NULL ? l->l_cred : NOCRED);
1724 break;
1725 case DIOCGWEDGEINFO: {
1726 struct dkwedge_info *dkw = data;
1727
1728 strlcpy(dkw->dkw_devname, device_xname(sc->sc_dev),
1729 sizeof(dkw->dkw_devname));
1730 memcpy(dkw->dkw_wname, sc->sc_wname, sizeof(dkw->dkw_wname));
1731 dkw->dkw_wname[sizeof(dkw->dkw_wname) - 1] = '\0';
1732 strlcpy(dkw->dkw_parent, sc->sc_parent->dk_name,
1733 sizeof(dkw->dkw_parent));
1734 dkw->dkw_offset = sc->sc_offset;
1735 dkw->dkw_size = dkwedge_size(sc);
1736 strlcpy(dkw->dkw_ptype, sc->sc_ptype, sizeof(dkw->dkw_ptype));
1737
1738 break;
1739 }
1740 case DIOCGSECTORALIGN: {
1741 struct disk_sectoralign *dsa = data;
1742 uint32_t r;
1743
1744 error = VOP_IOCTL(sc->sc_parent->dk_rawvp, cmd, dsa, flag,
1745 l != NULL ? l->l_cred : NOCRED);
1746 if (error)
1747 break;
1748
1749 r = sc->sc_offset % dsa->dsa_alignment;
1750 if (r < dsa->dsa_firstaligned)
1751 dsa->dsa_firstaligned = dsa->dsa_firstaligned - r;
1752 else
1753 dsa->dsa_firstaligned = (dsa->dsa_firstaligned +
1754 dsa->dsa_alignment) - r;
1755 break;
1756 }
1757 default:
1758 error = ENOTTY;
1759 }
1760
1761 return error;
1762 }
1763
1764 /*
1765 * dkdiscard: [devsw entry point]
1766 *
1767 * Perform a discard-range request on a wedge.
1768 */
1769 static int
1770 dkdiscard(dev_t dev, off_t pos, off_t len)
1771 {
1772 struct dkwedge_softc *sc = dkwedge_lookup(dev);
1773 uint64_t size = dkwedge_size(sc);
1774 unsigned shift;
1775 off_t offset, maxlen;
1776 int error;
1777
1778 KASSERT(sc != NULL);
1779 KASSERT(sc->sc_state != DKW_STATE_LARVAL);
1780 KASSERT(sc->sc_state != DKW_STATE_DEAD);
1781 KASSERT(sc->sc_parent->dk_rawvp != NULL);
1782
1783 /* XXX check bounds on size/offset up front */
1784 shift = (sc->sc_parent->dk_blkshift + DEV_BSHIFT);
1785 KASSERT(__type_fit(off_t, size));
1786 KASSERT(__type_fit(off_t, sc->sc_offset));
1787 KASSERT(0 <= sc->sc_offset);
1788 KASSERT(size <= (__type_max(off_t) >> shift));
1789 KASSERT(sc->sc_offset <= ((__type_max(off_t) >> shift) - size));
1790 offset = ((off_t)sc->sc_offset << shift);
1791 maxlen = ((off_t)size << shift);
1792
1793 if (len > maxlen)
1794 return EINVAL;
1795 if (pos > (maxlen - len))
1796 return EINVAL;
1797
1798 pos += offset;
1799
1800 vn_lock(sc->sc_parent->dk_rawvp, LK_EXCLUSIVE | LK_RETRY);
1801 error = VOP_FDISCARD(sc->sc_parent->dk_rawvp, pos, len);
1802 VOP_UNLOCK(sc->sc_parent->dk_rawvp);
1803
1804 return error;
1805 }
1806
1807 /*
1808 * dksize: [devsw entry point]
1809 *
1810 * Query the size of a wedge for the purpose of performing a dump
1811 * or for swapping to.
1812 */
1813 static int
1814 dksize(dev_t dev)
1815 {
1816 struct dkwedge_softc *sc = dkwedge_lookup(dev);
1817 uint64_t p_size;
1818 int rv = -1;
1819
1820 if (sc == NULL)
1821 return -1;
1822 if (sc->sc_state != DKW_STATE_RUNNING)
1823 return -1;
1824
1825 /* Our content type is static, no need to open the device. */
1826
1827 p_size = dkwedge_size(sc) << sc->sc_parent->dk_blkshift;
1828 if (strcmp(sc->sc_ptype, DKW_PTYPE_SWAP) == 0) {
1829 /* Saturate if we are larger than INT_MAX. */
1830 if (p_size > INT_MAX)
1831 rv = INT_MAX;
1832 else
1833 rv = (int)p_size;
1834 }
1835
1836 return rv;
1837 }
1838
1839 /*
1840 * dkdump: [devsw entry point]
1841 *
1842 * Perform a crash dump to a wedge.
1843 */
1844 static int
1845 dkdump(dev_t dev, daddr_t blkno, void *va, size_t size)
1846 {
1847 struct dkwedge_softc *sc = dkwedge_lookup(dev);
1848 const struct bdevsw *bdev;
1849 uint64_t p_size, p_offset;
1850
1851 if (sc == NULL)
1852 return ENXIO;
1853 if (sc->sc_state != DKW_STATE_RUNNING)
1854 return ENXIO;
1855
1856 /* Our content type is static, no need to open the device. */
1857
1858 if (strcmp(sc->sc_ptype, DKW_PTYPE_SWAP) != 0 &&
1859 strcmp(sc->sc_ptype, DKW_PTYPE_RAID) != 0 &&
1860 strcmp(sc->sc_ptype, DKW_PTYPE_CGD) != 0)
1861 return ENXIO;
1862 if (size % DEV_BSIZE != 0)
1863 return EINVAL;
1864
1865 p_offset = sc->sc_offset << sc->sc_parent->dk_blkshift;
1866 p_size = dkwedge_size(sc) << sc->sc_parent->dk_blkshift;
1867
1868 if (blkno < 0 || blkno + size/DEV_BSIZE > p_size) {
1869 printf("%s: blkno (%" PRIu64 ") + size / DEV_BSIZE (%zu) > "
1870 "p_size (%" PRIu64 ")\n", __func__, blkno,
1871 size/DEV_BSIZE, p_size);
1872 return EINVAL;
1873 }
1874
1875 bdev = bdevsw_lookup(sc->sc_pdev);
1876 return (*bdev->d_dump)(sc->sc_pdev, blkno + p_offset, va, size);
1877 }
1878
1879 /*
1880 * config glue
1881 */
1882
1883 /*
1884 * dkwedge_find_partition
1885 *
1886 * Find wedge corresponding to the specified parent name
1887 * and offset/length.
1888 */
1889 static device_t
1890 dkwedge_find_partition_acquire(device_t parent, daddr_t startblk,
1891 uint64_t nblks)
1892 {
1893 struct dkwedge_softc *sc;
1894 int i;
1895 device_t wedge = NULL;
1896
1897 rw_enter(&dkwedges_lock, RW_READER);
1898 for (i = 0; i < ndkwedges; i++) {
1899 if ((sc = dkwedges[i]) == NULL || sc->sc_dev == NULL)
1900 continue;
1901 if (strcmp(sc->sc_parent->dk_name, device_xname(parent)) == 0 &&
1902 sc->sc_offset == startblk &&
1903 dkwedge_size(sc) == nblks) {
1904 if (wedge) {
1905 printf("WARNING: double match for boot wedge "
1906 "(%s, %s)\n",
1907 device_xname(wedge),
1908 device_xname(sc->sc_dev));
1909 continue;
1910 }
1911 wedge = sc->sc_dev;
1912 device_acquire(wedge);
1913 }
1914 }
1915 rw_exit(&dkwedges_lock);
1916
1917 return wedge;
1918 }
1919
1920 /* XXX unsafe */
1921 device_t
1922 dkwedge_find_partition(device_t parent, daddr_t startblk,
1923 uint64_t nblks)
1924 {
1925 device_t dv;
1926
1927 if ((dv = dkwedge_find_partition_acquire(parent, startblk, nblks))
1928 == NULL)
1929 return NULL;
1930 device_release(dv);
1931 return dv;
1932 }
1933
1934 const char *
1935 dkwedge_get_parent_name(dev_t dev)
1936 {
1937 /* XXX: perhaps do this in lookup? */
1938 int bmaj = bdevsw_lookup_major(&dk_bdevsw);
1939 int cmaj = cdevsw_lookup_major(&dk_cdevsw);
1940
1941 if (major(dev) != bmaj && major(dev) != cmaj)
1942 return NULL;
1943
1944 struct dkwedge_softc *const sc = dkwedge_lookup_acquire(dev);
1945 if (sc == NULL)
1946 return NULL;
1947 const char *const name = sc->sc_parent->dk_name;
1948 device_release(sc->sc_dev);
1949 return name;
1950 }
1951