1 /* $NetBSD: dk.c,v 1.173 2025/04/13 14:01:00 jakllsch Exp $ */ 2 3 /*- 4 * Copyright (c) 2004, 2005, 2006, 2007 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 #include <sys/cdefs.h> 33 __KERNEL_RCSID(0, "$NetBSD: dk.c,v 1.173 2025/04/13 14:01:00 jakllsch Exp $"); 34 35 #ifdef _KERNEL_OPT 36 #include "opt_dkwedge.h" 37 #endif 38 39 #include <sys/param.h> 40 #include <sys/types.h> 41 42 #include <sys/buf.h> 43 #include <sys/bufq.h> 44 #include <sys/callout.h> 45 #include <sys/conf.h> 46 #include <sys/device.h> 47 #include <sys/disk.h> 48 #include <sys/disklabel.h> 49 #include <sys/errno.h> 50 #include <sys/fcntl.h> 51 #include <sys/ioctl.h> 52 #include <sys/kauth.h> 53 #include <sys/kernel.h> 54 #include <sys/malloc.h> 55 #include <sys/pool.h> 56 #include <sys/proc.h> 57 #include <sys/rwlock.h> 58 #include <sys/stat.h> 59 #include <sys/systm.h> 60 #include <sys/vnode.h> 61 62 #include <miscfs/specfs/specdev.h> 63 64 MALLOC_DEFINE(M_DKWEDGE, "dkwedge", "Disk wedge structures"); 65 66 typedef enum { 67 DKW_STATE_LARVAL = 0, 68 DKW_STATE_RUNNING = 1, 69 DKW_STATE_DYING = 2, 70 DKW_STATE_DEAD = 666 71 } dkwedge_state_t; 72 73 /* 74 * Lock order: 75 * 76 * sc->sc_dk.dk_openlock 77 * => sc->sc_parent->dk_rawlock 78 * => sc->sc_parent->dk_openlock 79 * => dkwedges_lock 80 * => sc->sc_sizelock 81 * 82 * Locking notes: 83 * 84 * W dkwedges_lock 85 * D device reference 86 * O sc->sc_dk.dk_openlock 87 * P sc->sc_parent->dk_openlock 88 * R sc->sc_parent->dk_rawlock 89 * S sc->sc_sizelock 90 * I sc->sc_iolock 91 * $ stable after initialization 92 * 1 used only by a single thread 93 * 94 * x&y means both x and y must be held to write (with a write lock if 95 * one is rwlock), and either x or y must be held to read. 96 */ 97 98 struct dkwedge_softc { 99 device_t sc_dev; /* P&W: pointer to our pseudo-device */ 100 /* sc_dev is also stable while device is referenced */ 101 struct cfdata sc_cfdata; /* 1: our cfdata structure */ 102 uint8_t sc_wname[128]; /* $: wedge name (Unicode, UTF-8) */ 103 104 dkwedge_state_t sc_state; /* state this wedge is in */ 105 /* stable while device is referenced */ 106 /* used only in assertions when stable, and in dump in ddb */ 107 108 struct disk *sc_parent; /* $: parent disk */ 109 /* P: sc_parent->dk_openmask */ 110 /* P: sc_parent->dk_nwedges */ 111 /* P: sc_parent->dk_wedges */ 112 /* R: sc_parent->dk_rawopens */ 113 /* R: sc_parent->dk_rawvp (also stable while wedge is open) */ 114 daddr_t sc_offset; /* $: LBA offset of wedge in parent */ 115 krwlock_t sc_sizelock; 116 uint64_t sc_size; /* S: size of wedge in blocks */ 117 char sc_ptype[32]; /* $: partition type */ 118 dev_t sc_pdev; /* $: cached parent's dev_t */ 119 /* P: link on parent's wedge list */ 120 LIST_ENTRY(dkwedge_softc) sc_plink; 121 122 struct disk sc_dk; /* our own disk structure */ 123 /* O&R: sc_dk.dk_bopenmask */ 124 /* O&R: sc_dk.dk_copenmask */ 125 /* O&R: sc_dk.dk_openmask */ 126 struct bufq_state *sc_bufq; /* $: buffer queue */ 127 struct callout sc_restart_ch; /* I: callout to restart I/O */ 128 129 kmutex_t sc_iolock; 130 bool sc_iostop; /* I: don't schedule restart */ 131 int sc_mode; /* O&R: parent open mode */ 132 }; 133 134 static int dkwedge_match(device_t, cfdata_t, void *); 135 static void dkwedge_attach(device_t, device_t, void *); 136 static int dkwedge_detach(device_t, int); 137 138 static void dk_set_geometry(struct dkwedge_softc *, struct disk *); 139 140 static void dkstart(struct dkwedge_softc *); 141 static void dkiodone(struct buf *); 142 static void dkrestart(void *); 143 static void dkminphys(struct buf *); 144 145 static int dkfirstopen(struct dkwedge_softc *, int); 146 static void dklastclose(struct dkwedge_softc *); 147 static int dkwedge_detach(device_t, int); 148 static void dkwedge_delall1(struct disk *, bool); 149 static int dkwedge_del1(struct dkwedge_info *, int); 150 static int dk_open_parent(dev_t, int, struct vnode **); 151 static int dk_close_parent(struct vnode *, int); 152 153 static dev_type_open(dkopen); 154 static dev_type_close(dkclose); 155 static dev_type_cancel(dkcancel); 156 static dev_type_read(dkread); 157 static dev_type_write(dkwrite); 158 static dev_type_ioctl(dkioctl); 159 static dev_type_strategy(dkstrategy); 160 static dev_type_dump(dkdump); 161 static dev_type_size(dksize); 162 static dev_type_discard(dkdiscard); 163 164 CFDRIVER_DECL(dk, DV_DISK, NULL); 165 CFATTACH_DECL3_NEW(dk, 0, 166 dkwedge_match, dkwedge_attach, dkwedge_detach, NULL, NULL, NULL, 167 DVF_DETACH_SHUTDOWN); 168 169 const struct bdevsw dk_bdevsw = { 170 .d_open = dkopen, 171 .d_close = dkclose, 172 .d_cancel = dkcancel, 173 .d_strategy = dkstrategy, 174 .d_ioctl = dkioctl, 175 .d_dump = dkdump, 176 .d_psize = dksize, 177 .d_discard = dkdiscard, 178 .d_cfdriver = &dk_cd, 179 .d_devtounit = dev_minor_unit, 180 .d_flag = D_DISK | D_MPSAFE 181 }; 182 183 const struct cdevsw dk_cdevsw = { 184 .d_open = dkopen, 185 .d_close = dkclose, 186 .d_cancel = dkcancel, 187 .d_read = dkread, 188 .d_write = dkwrite, 189 .d_ioctl = dkioctl, 190 .d_stop = nostop, 191 .d_tty = notty, 192 .d_poll = nopoll, 193 .d_mmap = nommap, 194 .d_kqfilter = nokqfilter, 195 .d_discard = dkdiscard, 196 .d_cfdriver = &dk_cd, 197 .d_devtounit = dev_minor_unit, 198 .d_flag = D_DISK | D_MPSAFE 199 }; 200 201 static struct dkwedge_softc **dkwedges; 202 static u_int ndkwedges; 203 static krwlock_t dkwedges_lock; 204 205 static LIST_HEAD(, dkwedge_discovery_method) dkwedge_discovery_methods; 206 static krwlock_t dkwedge_discovery_methods_lock; 207 208 /* 209 * dkwedge_match: 210 * 211 * Autoconfiguration match function for pseudo-device glue. 212 */ 213 static int 214 dkwedge_match(device_t parent, cfdata_t match, void *aux) 215 { 216 217 /* Pseudo-device; always present. */ 218 return 1; 219 } 220 221 /* 222 * dkwedge_attach: 223 * 224 * Autoconfiguration attach function for pseudo-device glue. 225 */ 226 static void 227 dkwedge_attach(device_t parent, device_t self, void *aux) 228 { 229 struct dkwedge_softc *sc = aux; 230 struct disk *pdk = sc->sc_parent; 231 int unit = device_unit(self); 232 233 KASSERTMSG(unit >= 0, "unit=%d", unit); 234 235 if (!pmf_device_register(self, NULL, NULL)) 236 aprint_error_dev(self, "couldn't establish power handler\n"); 237 238 mutex_enter(&pdk->dk_openlock); 239 rw_enter(&dkwedges_lock, RW_WRITER); 240 KASSERTMSG(unit < ndkwedges, "unit=%d ndkwedges=%u", unit, ndkwedges); 241 KASSERTMSG(sc == dkwedges[unit], "sc=%p dkwedges[%d]=%p", 242 sc, unit, dkwedges[unit]); 243 KASSERTMSG(sc->sc_dev == NULL, "sc=%p sc->sc_dev=%p", sc, sc->sc_dev); 244 sc->sc_dev = self; 245 rw_exit(&dkwedges_lock); 246 mutex_exit(&pdk->dk_openlock); 247 248 disk_init(&sc->sc_dk, device_xname(sc->sc_dev), NULL); 249 mutex_enter(&pdk->dk_openlock); 250 dk_set_geometry(sc, pdk); 251 mutex_exit(&pdk->dk_openlock); 252 disk_attach(&sc->sc_dk); 253 254 /* Disk wedge is ready for use! */ 255 device_set_private(self, sc); 256 sc->sc_state = DKW_STATE_RUNNING; 257 } 258 259 /* 260 * dkwedge_compute_pdev: 261 * 262 * Compute the parent disk's dev_t. 263 */ 264 static int 265 dkwedge_compute_pdev(const char *pname, dev_t *pdevp, enum vtype type) 266 { 267 const char *name, *cp; 268 devmajor_t pmaj; 269 int punit; 270 char devname[16]; 271 272 name = pname; 273 switch (type) { 274 case VBLK: 275 pmaj = devsw_name2blk(name, devname, sizeof(devname)); 276 break; 277 case VCHR: 278 pmaj = devsw_name2chr(name, devname, sizeof(devname)); 279 break; 280 default: 281 pmaj = NODEVMAJOR; 282 break; 283 } 284 if (pmaj == NODEVMAJOR) 285 return ENXIO; 286 287 name += strlen(devname); 288 for (cp = name, punit = 0; *cp >= '0' && *cp <= '9'; cp++) 289 punit = (punit * 10) + (*cp - '0'); 290 if (cp == name) { 291 /* Invalid parent disk name. */ 292 return ENXIO; 293 } 294 295 *pdevp = MAKEDISKDEV(pmaj, punit, RAW_PART); 296 297 return 0; 298 } 299 300 /* 301 * dkwedge_array_expand: 302 * 303 * Expand the dkwedges array. 304 * 305 * Releases and reacquires dkwedges_lock as a writer. 306 */ 307 static int 308 dkwedge_array_expand(void) 309 { 310 311 const unsigned incr = 16; 312 unsigned newcnt, oldcnt; 313 struct dkwedge_softc **newarray = NULL, **oldarray = NULL; 314 315 KASSERT(rw_write_held(&dkwedges_lock)); 316 317 oldcnt = ndkwedges; 318 oldarray = dkwedges; 319 320 if (oldcnt >= INT_MAX - incr) 321 return ENFILE; /* XXX */ 322 newcnt = oldcnt + incr; 323 324 rw_exit(&dkwedges_lock); 325 newarray = malloc(newcnt * sizeof(*newarray), M_DKWEDGE, 326 M_WAITOK|M_ZERO); 327 rw_enter(&dkwedges_lock, RW_WRITER); 328 329 if (ndkwedges != oldcnt || dkwedges != oldarray) { 330 oldarray = NULL; /* already recycled */ 331 goto out; 332 } 333 334 if (oldarray != NULL) 335 memcpy(newarray, dkwedges, ndkwedges * sizeof(*newarray)); 336 dkwedges = newarray; 337 newarray = NULL; /* transferred to dkwedges */ 338 ndkwedges = newcnt; 339 340 out: rw_exit(&dkwedges_lock); 341 if (oldarray != NULL) 342 free(oldarray, M_DKWEDGE); 343 if (newarray != NULL) 344 free(newarray, M_DKWEDGE); 345 rw_enter(&dkwedges_lock, RW_WRITER); 346 return 0; 347 } 348 349 static void 350 dkwedge_size_init(struct dkwedge_softc *sc, uint64_t size) 351 { 352 353 rw_init(&sc->sc_sizelock); 354 sc->sc_size = size; 355 } 356 357 static void 358 dkwedge_size_fini(struct dkwedge_softc *sc) 359 { 360 361 rw_destroy(&sc->sc_sizelock); 362 } 363 364 static uint64_t 365 dkwedge_size(struct dkwedge_softc *sc) 366 { 367 uint64_t size; 368 369 rw_enter(&sc->sc_sizelock, RW_READER); 370 size = sc->sc_size; 371 rw_exit(&sc->sc_sizelock); 372 373 return size; 374 } 375 376 static void 377 dkwedge_size_increase(struct dkwedge_softc *sc, uint64_t size) 378 { 379 380 KASSERT(mutex_owned(&sc->sc_parent->dk_openlock)); 381 382 rw_enter(&sc->sc_sizelock, RW_WRITER); 383 KASSERTMSG(size >= sc->sc_size, 384 "decreasing dkwedge size from %"PRIu64" to %"PRIu64, 385 sc->sc_size, size); 386 sc->sc_size = size; 387 rw_exit(&sc->sc_sizelock); 388 } 389 390 static void 391 dk_set_geometry(struct dkwedge_softc *sc, struct disk *pdk) 392 { 393 struct disk *dk = &sc->sc_dk; 394 struct disk_geom *dg = &dk->dk_geom; 395 uint32_t r, lspps; 396 397 KASSERT(mutex_owned(&pdk->dk_openlock)); 398 399 memset(dg, 0, sizeof(*dg)); 400 401 dg->dg_secperunit = dkwedge_size(sc); 402 dg->dg_secsize = DEV_BSIZE << pdk->dk_blkshift; 403 404 /* fake numbers, 1 cylinder is 1 MB with default sector size */ 405 dg->dg_nsectors = 32; 406 dg->dg_ntracks = 64; 407 dg->dg_ncylinders = 408 dg->dg_secperunit / (dg->dg_nsectors * dg->dg_ntracks); 409 410 dg->dg_physsecsize = pdk->dk_geom.dg_physsecsize; 411 dg->dg_alignedsec = pdk->dk_geom.dg_alignedsec; 412 lspps = MAX(1u, dg->dg_physsecsize / dg->dg_secsize); 413 r = sc->sc_offset % lspps; 414 if (r > dg->dg_alignedsec) 415 dg->dg_alignedsec += lspps; 416 dg->dg_alignedsec -= r; 417 dg->dg_alignedsec %= lspps; 418 419 disk_set_info(sc->sc_dev, dk, NULL); 420 } 421 422 /* 423 * dkwedge_add: [exported function] 424 * 425 * Add a disk wedge based on the provided information. 426 * 427 * The incoming dkw_devname[] is ignored, instead being 428 * filled in and returned to the caller. 429 */ 430 int 431 dkwedge_add(struct dkwedge_info *dkw) 432 { 433 struct dkwedge_softc *sc, *lsc; 434 struct disk *pdk; 435 u_int unit; 436 int error; 437 dev_t pdev; 438 device_t dev __diagused; 439 440 dkw->dkw_parent[sizeof(dkw->dkw_parent) - 1] = '\0'; 441 pdk = disk_find(dkw->dkw_parent); 442 if (pdk == NULL) 443 return ENXIO; 444 445 error = dkwedge_compute_pdev(pdk->dk_name, &pdev, VBLK); 446 if (error) 447 return error; 448 449 if (dkw->dkw_offset < 0) 450 return EINVAL; 451 452 /* 453 * Check for an existing wedge at the same disk offset. Allow 454 * updating a wedge if the only change is the size, and the new 455 * size is larger than the old. 456 */ 457 sc = NULL; 458 mutex_enter(&pdk->dk_openlock); 459 LIST_FOREACH(lsc, &pdk->dk_wedges, sc_plink) { 460 if (lsc->sc_offset != dkw->dkw_offset) 461 continue; 462 if (strcmp(lsc->sc_wname, dkw->dkw_wname) != 0) 463 break; 464 if (strcmp(lsc->sc_ptype, dkw->dkw_ptype) != 0) 465 break; 466 if (dkwedge_size(lsc) > dkw->dkw_size) 467 break; 468 if (lsc->sc_dev == NULL) 469 break; 470 471 sc = lsc; 472 device_acquire(sc->sc_dev); 473 dkwedge_size_increase(sc, dkw->dkw_size); 474 dk_set_geometry(sc, pdk); 475 476 break; 477 } 478 mutex_exit(&pdk->dk_openlock); 479 480 if (sc != NULL) 481 goto announce; 482 483 sc = malloc(sizeof(*sc), M_DKWEDGE, M_WAITOK|M_ZERO); 484 sc->sc_state = DKW_STATE_LARVAL; 485 sc->sc_parent = pdk; 486 sc->sc_pdev = pdev; 487 sc->sc_offset = dkw->dkw_offset; 488 dkwedge_size_init(sc, dkw->dkw_size); 489 490 memcpy(sc->sc_wname, dkw->dkw_wname, sizeof(sc->sc_wname)); 491 sc->sc_wname[sizeof(sc->sc_wname) - 1] = '\0'; 492 493 memcpy(sc->sc_ptype, dkw->dkw_ptype, sizeof(sc->sc_ptype)); 494 sc->sc_ptype[sizeof(sc->sc_ptype) - 1] = '\0'; 495 496 bufq_alloc(&sc->sc_bufq, "fcfs", 0); 497 498 callout_init(&sc->sc_restart_ch, 0); 499 callout_setfunc(&sc->sc_restart_ch, dkrestart, sc); 500 501 mutex_init(&sc->sc_iolock, MUTEX_DEFAULT, IPL_BIO); 502 503 /* 504 * Wedge will be added; increment the wedge count for the parent. 505 * Only allow this to happen if RAW_PART is the only thing open. 506 */ 507 mutex_enter(&pdk->dk_openlock); 508 if (pdk->dk_openmask & ~(1 << RAW_PART)) 509 error = EBUSY; 510 else { 511 /* Check for wedge overlap. */ 512 LIST_FOREACH(lsc, &pdk->dk_wedges, sc_plink) { 513 /* XXX arithmetic overflow */ 514 uint64_t size = dkwedge_size(sc); 515 uint64_t lsize = dkwedge_size(lsc); 516 daddr_t lastblk = sc->sc_offset + size - 1; 517 daddr_t llastblk = lsc->sc_offset + lsize - 1; 518 519 if (sc->sc_offset >= lsc->sc_offset && 520 sc->sc_offset <= llastblk) { 521 /* Overlaps the tail of the existing wedge. */ 522 break; 523 } 524 if (lastblk >= lsc->sc_offset && 525 lastblk <= llastblk) { 526 /* Overlaps the head of the existing wedge. */ 527 break; 528 } 529 } 530 if (lsc != NULL) { 531 if (sc->sc_offset == lsc->sc_offset && 532 dkwedge_size(sc) == dkwedge_size(lsc) && 533 strcmp(sc->sc_wname, lsc->sc_wname) == 0) 534 error = EEXIST; 535 else 536 error = EINVAL; 537 } else { 538 pdk->dk_nwedges++; 539 LIST_INSERT_HEAD(&pdk->dk_wedges, sc, sc_plink); 540 } 541 } 542 mutex_exit(&pdk->dk_openlock); 543 if (error) { 544 mutex_destroy(&sc->sc_iolock); 545 bufq_free(sc->sc_bufq); 546 dkwedge_size_fini(sc); 547 free(sc, M_DKWEDGE); 548 return error; 549 } 550 551 /* Fill in our cfdata for the pseudo-device glue. */ 552 sc->sc_cfdata.cf_name = dk_cd.cd_name; 553 sc->sc_cfdata.cf_atname = dk_ca.ca_name; 554 /* sc->sc_cfdata.cf_unit set below */ 555 sc->sc_cfdata.cf_fstate = FSTATE_NOTFOUND; /* use chosen cf_unit */ 556 557 /* Insert the larval wedge into the array. */ 558 rw_enter(&dkwedges_lock, RW_WRITER); 559 for (error = 0;;) { 560 struct dkwedge_softc **scpp; 561 562 /* 563 * Check for a duplicate wname while searching for 564 * a slot. 565 */ 566 for (scpp = NULL, unit = 0; unit < ndkwedges; unit++) { 567 if (dkwedges[unit] == NULL) { 568 if (scpp == NULL) { 569 scpp = &dkwedges[unit]; 570 sc->sc_cfdata.cf_unit = unit; 571 } 572 } else { 573 /* XXX Unicode. */ 574 if (strcmp(dkwedges[unit]->sc_wname, 575 sc->sc_wname) == 0) { 576 error = EEXIST; 577 break; 578 } 579 } 580 } 581 if (error) 582 break; 583 KASSERT(unit == ndkwedges); 584 if (scpp == NULL) { 585 error = dkwedge_array_expand(); 586 if (error) 587 break; 588 } else { 589 KASSERT(scpp == &dkwedges[sc->sc_cfdata.cf_unit]); 590 *scpp = sc; 591 break; 592 } 593 } 594 rw_exit(&dkwedges_lock); 595 if (error) { 596 mutex_enter(&pdk->dk_openlock); 597 pdk->dk_nwedges--; 598 LIST_REMOVE(sc, sc_plink); 599 mutex_exit(&pdk->dk_openlock); 600 601 mutex_destroy(&sc->sc_iolock); 602 bufq_free(sc->sc_bufq); 603 dkwedge_size_fini(sc); 604 free(sc, M_DKWEDGE); 605 return error; 606 } 607 608 /* 609 * Now that we know the unit #, attach a pseudo-device for 610 * this wedge instance. This will provide us with the 611 * device_t necessary for glue to other parts of the system. 612 * 613 * This should never fail, unless we're almost totally out of 614 * memory. 615 */ 616 if ((dev = config_attach_pseudo_acquire(&sc->sc_cfdata, sc)) == NULL) { 617 aprint_error("%s%u: unable to attach pseudo-device\n", 618 sc->sc_cfdata.cf_name, sc->sc_cfdata.cf_unit); 619 620 rw_enter(&dkwedges_lock, RW_WRITER); 621 KASSERT(dkwedges[sc->sc_cfdata.cf_unit] == sc); 622 dkwedges[sc->sc_cfdata.cf_unit] = NULL; 623 rw_exit(&dkwedges_lock); 624 625 mutex_enter(&pdk->dk_openlock); 626 pdk->dk_nwedges--; 627 LIST_REMOVE(sc, sc_plink); 628 mutex_exit(&pdk->dk_openlock); 629 630 mutex_destroy(&sc->sc_iolock); 631 bufq_free(sc->sc_bufq); 632 dkwedge_size_fini(sc); 633 free(sc, M_DKWEDGE); 634 return ENOMEM; 635 } 636 637 KASSERT(dev == sc->sc_dev); 638 639 announce: 640 /* Announce our arrival. */ 641 aprint_normal( 642 "%s at %s: \"%s\", %"PRIu64" blocks at %"PRId64", type: %s\n", 643 device_xname(sc->sc_dev), pdk->dk_name, 644 sc->sc_wname, /* XXX Unicode */ 645 dkwedge_size(sc), sc->sc_offset, 646 sc->sc_ptype[0] == '\0' ? "<unknown>" : sc->sc_ptype); 647 648 /* Return the devname to the caller. */ 649 strlcpy(dkw->dkw_devname, device_xname(sc->sc_dev), 650 sizeof(dkw->dkw_devname)); 651 652 device_release(sc->sc_dev); 653 return 0; 654 } 655 656 /* 657 * dkwedge_find_acquire: 658 * 659 * Lookup a disk wedge based on the provided information. 660 * NOTE: We look up the wedge based on the wedge devname, 661 * not wname. 662 * 663 * Return NULL if the wedge is not found, otherwise return 664 * the wedge's softc. Assign the wedge's unit number to unitp 665 * if unitp is not NULL. The wedge's sc_dev is referenced and 666 * must be released by device_release or equivalent. 667 */ 668 static struct dkwedge_softc * 669 dkwedge_find_acquire(struct dkwedge_info *dkw, u_int *unitp) 670 { 671 struct dkwedge_softc *sc = NULL; 672 u_int unit; 673 674 /* Find our softc. */ 675 dkw->dkw_devname[sizeof(dkw->dkw_devname) - 1] = '\0'; 676 rw_enter(&dkwedges_lock, RW_READER); 677 for (unit = 0; unit < ndkwedges; unit++) { 678 if ((sc = dkwedges[unit]) != NULL && 679 sc->sc_dev != NULL && 680 strcmp(device_xname(sc->sc_dev), dkw->dkw_devname) == 0 && 681 strcmp(sc->sc_parent->dk_name, dkw->dkw_parent) == 0) { 682 device_acquire(sc->sc_dev); 683 break; 684 } 685 } 686 rw_exit(&dkwedges_lock); 687 if (sc == NULL) 688 return NULL; 689 690 if (unitp != NULL) 691 *unitp = unit; 692 693 return sc; 694 } 695 696 /* 697 * dkwedge_del: [exported function] 698 * 699 * Delete a disk wedge based on the provided information. 700 * NOTE: We look up the wedge based on the wedge devname, 701 * not wname. 702 */ 703 int 704 dkwedge_del(struct dkwedge_info *dkw) 705 { 706 707 return dkwedge_del1(dkw, 0); 708 } 709 710 int 711 dkwedge_del1(struct dkwedge_info *dkw, int flags) 712 { 713 struct dkwedge_softc *sc = NULL; 714 715 /* Find our softc. */ 716 if ((sc = dkwedge_find_acquire(dkw, NULL)) == NULL) 717 return ESRCH; 718 719 return config_detach_release(sc->sc_dev, flags); 720 } 721 722 /* 723 * dkwedge_detach: 724 * 725 * Autoconfiguration detach function for pseudo-device glue. 726 */ 727 static int 728 dkwedge_detach(device_t self, int flags) 729 { 730 struct dkwedge_softc *const sc = device_private(self); 731 const u_int unit = device_unit(self); 732 int bmaj, cmaj, error; 733 734 error = disk_begindetach(&sc->sc_dk, /*lastclose*/NULL, self, flags); 735 if (error) 736 return error; 737 738 /* Mark the wedge as dying. */ 739 sc->sc_state = DKW_STATE_DYING; 740 741 pmf_device_deregister(self); 742 743 /* Kill any pending restart. */ 744 mutex_enter(&sc->sc_iolock); 745 sc->sc_iostop = true; 746 mutex_exit(&sc->sc_iolock); 747 callout_halt(&sc->sc_restart_ch, NULL); 748 749 /* Locate the wedge major numbers. */ 750 bmaj = bdevsw_lookup_major(&dk_bdevsw); 751 cmaj = cdevsw_lookup_major(&dk_cdevsw); 752 753 /* Nuke the vnodes for any open instances. */ 754 vdevgone(bmaj, unit, unit, VBLK); 755 vdevgone(cmaj, unit, unit, VCHR); 756 757 /* 758 * At this point, all block device opens have been closed, 759 * synchronously flushing any buffered writes; and all 760 * character device I/O operations have completed 761 * synchronously, and character device opens have been closed. 762 * 763 * So there can be no more opens or queued buffers by now. 764 */ 765 KASSERT(sc->sc_dk.dk_openmask == 0); 766 KASSERT(bufq_peek(sc->sc_bufq) == NULL); 767 bufq_drain(sc->sc_bufq); 768 769 /* Announce our departure. */ 770 aprint_normal("%s at %s (%s) deleted\n", device_xname(sc->sc_dev), 771 sc->sc_parent->dk_name, 772 sc->sc_wname); /* XXX Unicode */ 773 774 mutex_enter(&sc->sc_parent->dk_openlock); 775 sc->sc_parent->dk_nwedges--; 776 LIST_REMOVE(sc, sc_plink); 777 mutex_exit(&sc->sc_parent->dk_openlock); 778 779 /* Delete our buffer queue. */ 780 bufq_free(sc->sc_bufq); 781 782 /* Detach from the disk list. */ 783 disk_detach(&sc->sc_dk); 784 disk_destroy(&sc->sc_dk); 785 786 /* Poof. */ 787 rw_enter(&dkwedges_lock, RW_WRITER); 788 KASSERT(dkwedges[unit] == sc); 789 dkwedges[unit] = NULL; 790 sc->sc_state = DKW_STATE_DEAD; 791 rw_exit(&dkwedges_lock); 792 793 mutex_destroy(&sc->sc_iolock); 794 dkwedge_size_fini(sc); 795 796 free(sc, M_DKWEDGE); 797 798 return 0; 799 } 800 801 /* 802 * dkwedge_delall: [exported function] 803 * 804 * Forcibly delete all of the wedges on the specified disk. Used 805 * when a disk is being detached. 806 */ 807 void 808 dkwedge_delall(struct disk *pdk) 809 { 810 811 dkwedge_delall1(pdk, /*idleonly*/false); 812 } 813 814 /* 815 * dkwedge_delidle: [exported function] 816 * 817 * Delete all of the wedges on the specified disk if idle. Used 818 * by ioctl(DIOCRMWEDGES). 819 */ 820 void 821 dkwedge_delidle(struct disk *pdk) 822 { 823 824 dkwedge_delall1(pdk, /*idleonly*/true); 825 } 826 827 static void 828 dkwedge_delall1(struct disk *pdk, bool idleonly) 829 { 830 struct dkwedge_softc *sc; 831 int flags; 832 833 flags = DETACH_QUIET; 834 if (!idleonly) 835 flags |= DETACH_FORCE; 836 837 for (;;) { 838 mutex_enter(&pdk->dk_rawlock); /* for sc->sc_dk.dk_openmask */ 839 mutex_enter(&pdk->dk_openlock); 840 LIST_FOREACH(sc, &pdk->dk_wedges, sc_plink) { 841 /* 842 * Wedge is not yet created. This is a race -- 843 * it may as well have been added just after we 844 * deleted all the wedges, so pretend it's not 845 * here yet. 846 */ 847 if (sc->sc_dev == NULL) 848 continue; 849 if (!idleonly || sc->sc_dk.dk_openmask == 0) { 850 device_acquire(sc->sc_dev); 851 break; 852 } 853 } 854 if (sc == NULL) { 855 KASSERT(idleonly || pdk->dk_nwedges == 0); 856 mutex_exit(&pdk->dk_openlock); 857 mutex_exit(&pdk->dk_rawlock); 858 return; 859 } 860 mutex_exit(&pdk->dk_openlock); 861 mutex_exit(&pdk->dk_rawlock); 862 (void)config_detach_release(sc->sc_dev, flags); 863 } 864 } 865 866 /* 867 * dkwedge_list: [exported function] 868 * 869 * List all of the wedges on a particular disk. 870 */ 871 int 872 dkwedge_list(struct disk *pdk, struct dkwedge_list *dkwl, struct lwp *l) 873 { 874 struct uio uio; 875 struct iovec iov; 876 struct dkwedge_softc *sc; 877 struct dkwedge_info dkw; 878 int error = 0; 879 880 iov.iov_base = dkwl->dkwl_buf; 881 iov.iov_len = dkwl->dkwl_bufsize; 882 883 uio.uio_iov = &iov; 884 uio.uio_iovcnt = 1; 885 uio.uio_offset = 0; 886 uio.uio_resid = dkwl->dkwl_bufsize; 887 uio.uio_rw = UIO_READ; 888 KASSERT(l == curlwp); 889 uio.uio_vmspace = l->l_proc->p_vmspace; 890 891 dkwl->dkwl_ncopied = 0; 892 893 mutex_enter(&pdk->dk_openlock); 894 LIST_FOREACH(sc, &pdk->dk_wedges, sc_plink) { 895 if (uio.uio_resid < sizeof(dkw)) 896 break; 897 898 if (sc->sc_dev == NULL) 899 continue; 900 901 strlcpy(dkw.dkw_devname, device_xname(sc->sc_dev), 902 sizeof(dkw.dkw_devname)); 903 memcpy(dkw.dkw_wname, sc->sc_wname, sizeof(dkw.dkw_wname)); 904 dkw.dkw_wname[sizeof(dkw.dkw_wname) - 1] = '\0'; 905 strlcpy(dkw.dkw_parent, sc->sc_parent->dk_name, 906 sizeof(dkw.dkw_parent)); 907 dkw.dkw_offset = sc->sc_offset; 908 dkw.dkw_size = dkwedge_size(sc); 909 strlcpy(dkw.dkw_ptype, sc->sc_ptype, sizeof(dkw.dkw_ptype)); 910 911 /* 912 * Acquire a device reference so this wedge doesn't go 913 * away before our next iteration in LIST_FOREACH, and 914 * then release the lock for uiomove. 915 */ 916 device_acquire(sc->sc_dev); 917 mutex_exit(&pdk->dk_openlock); 918 error = uiomove(&dkw, sizeof(dkw), &uio); 919 mutex_enter(&pdk->dk_openlock); 920 device_release(sc->sc_dev); 921 if (error) 922 break; 923 924 dkwl->dkwl_ncopied++; 925 } 926 dkwl->dkwl_nwedges = pdk->dk_nwedges; 927 mutex_exit(&pdk->dk_openlock); 928 929 return error; 930 } 931 932 static device_t 933 dkwedge_find_by_wname_acquire(const char *wname) 934 { 935 device_t dv = NULL; 936 struct dkwedge_softc *sc; 937 int i; 938 939 rw_enter(&dkwedges_lock, RW_READER); 940 for (i = 0; i < ndkwedges; i++) { 941 if ((sc = dkwedges[i]) == NULL || sc->sc_dev == NULL) 942 continue; 943 if (strcmp(sc->sc_wname, wname) == 0) { 944 if (dv != NULL) { 945 printf( 946 "WARNING: double match for wedge name %s " 947 "(%s, %s)\n", wname, device_xname(dv), 948 device_xname(sc->sc_dev)); 949 continue; 950 } 951 device_acquire(sc->sc_dev); 952 dv = sc->sc_dev; 953 } 954 } 955 rw_exit(&dkwedges_lock); 956 return dv; 957 } 958 959 static device_t 960 dkwedge_find_by_parent_acquire(const char *name, size_t *i) 961 { 962 963 rw_enter(&dkwedges_lock, RW_READER); 964 for (; *i < (size_t)ndkwedges; (*i)++) { 965 struct dkwedge_softc *sc; 966 if ((sc = dkwedges[*i]) == NULL || sc->sc_dev == NULL) 967 continue; 968 if (strcmp(sc->sc_parent->dk_name, name) != 0) 969 continue; 970 device_acquire(sc->sc_dev); 971 rw_exit(&dkwedges_lock); 972 return sc->sc_dev; 973 } 974 rw_exit(&dkwedges_lock); 975 return NULL; 976 } 977 978 /* XXX unsafe */ 979 device_t 980 dkwedge_find_by_wname(const char *wname) 981 { 982 device_t dv; 983 984 if ((dv = dkwedge_find_by_wname_acquire(wname)) == NULL) 985 return NULL; 986 device_release(dv); 987 return dv; 988 } 989 990 /* XXX unsafe */ 991 device_t 992 dkwedge_find_by_parent(const char *name, size_t *i) 993 { 994 device_t dv; 995 996 if ((dv = dkwedge_find_by_parent_acquire(name, i)) == NULL) 997 return NULL; 998 device_release(dv); 999 return dv; 1000 } 1001 1002 void 1003 dkwedge_print_wnames(void) 1004 { 1005 struct dkwedge_softc *sc; 1006 int i; 1007 1008 rw_enter(&dkwedges_lock, RW_READER); 1009 for (i = 0; i < ndkwedges; i++) { 1010 if ((sc = dkwedges[i]) == NULL || sc->sc_dev == NULL) 1011 continue; 1012 printf(" wedge:%s", sc->sc_wname); 1013 } 1014 rw_exit(&dkwedges_lock); 1015 } 1016 1017 /* 1018 * We need a dummy object to stuff into the dkwedge discovery method link 1019 * set to ensure that there is always at least one object in the set. 1020 */ 1021 static struct dkwedge_discovery_method dummy_discovery_method; 1022 __link_set_add_bss(dkwedge_methods, dummy_discovery_method); 1023 1024 /* 1025 * dkwedge_init: 1026 * 1027 * Initialize the disk wedge subsystem. 1028 */ 1029 void 1030 dkwedge_init(void) 1031 { 1032 __link_set_decl(dkwedge_methods, struct dkwedge_discovery_method); 1033 struct dkwedge_discovery_method * const *ddmp; 1034 struct dkwedge_discovery_method *lddm, *ddm; 1035 1036 rw_init(&dkwedges_lock); 1037 rw_init(&dkwedge_discovery_methods_lock); 1038 1039 if (config_cfdriver_attach(&dk_cd) != 0) 1040 panic("dkwedge: unable to attach cfdriver"); 1041 if (config_cfattach_attach(dk_cd.cd_name, &dk_ca) != 0) 1042 panic("dkwedge: unable to attach cfattach"); 1043 1044 rw_enter(&dkwedge_discovery_methods_lock, RW_WRITER); 1045 1046 LIST_INIT(&dkwedge_discovery_methods); 1047 1048 __link_set_foreach(ddmp, dkwedge_methods) { 1049 ddm = *ddmp; 1050 if (ddm == &dummy_discovery_method) 1051 continue; 1052 if (LIST_EMPTY(&dkwedge_discovery_methods)) { 1053 LIST_INSERT_HEAD(&dkwedge_discovery_methods, 1054 ddm, ddm_list); 1055 continue; 1056 } 1057 LIST_FOREACH(lddm, &dkwedge_discovery_methods, ddm_list) { 1058 if (ddm->ddm_priority == lddm->ddm_priority) { 1059 aprint_error("dk-method-%s: method \"%s\" " 1060 "already exists at priority %d\n", 1061 ddm->ddm_name, lddm->ddm_name, 1062 lddm->ddm_priority); 1063 /* Not inserted. */ 1064 break; 1065 } 1066 if (ddm->ddm_priority < lddm->ddm_priority) { 1067 /* Higher priority; insert before. */ 1068 LIST_INSERT_BEFORE(lddm, ddm, ddm_list); 1069 break; 1070 } 1071 if (LIST_NEXT(lddm, ddm_list) == NULL) { 1072 /* Last one; insert after. */ 1073 KASSERT(lddm->ddm_priority < ddm->ddm_priority); 1074 LIST_INSERT_AFTER(lddm, ddm, ddm_list); 1075 break; 1076 } 1077 } 1078 } 1079 1080 rw_exit(&dkwedge_discovery_methods_lock); 1081 } 1082 1083 #ifdef DKWEDGE_AUTODISCOVER 1084 int dkwedge_autodiscover = 1; 1085 #else 1086 int dkwedge_autodiscover = 0; 1087 #endif 1088 1089 /* 1090 * dkwedge_discover: [exported function] 1091 * 1092 * Discover the wedges on a newly attached disk. 1093 * Remove all unused wedges on the disk first. 1094 */ 1095 void 1096 dkwedge_discover(struct disk *pdk) 1097 { 1098 struct dkwedge_discovery_method *ddm; 1099 struct vnode *vp; 1100 int error; 1101 dev_t pdev; 1102 1103 /* 1104 * Require people playing with wedges to enable this explicitly. 1105 */ 1106 if (dkwedge_autodiscover == 0) 1107 return; 1108 1109 rw_enter(&dkwedge_discovery_methods_lock, RW_READER); 1110 1111 /* 1112 * Use the character device for scanning, the block device 1113 * is busy if there are already wedges attached. 1114 */ 1115 error = dkwedge_compute_pdev(pdk->dk_name, &pdev, VCHR); 1116 if (error) { 1117 aprint_error("%s: unable to compute pdev, error = %d\n", 1118 pdk->dk_name, error); 1119 goto out; 1120 } 1121 1122 error = cdevvp(pdev, &vp); 1123 if (error) { 1124 aprint_error("%s: unable to find vnode for pdev, error = %d\n", 1125 pdk->dk_name, error); 1126 goto out; 1127 } 1128 1129 error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1130 if (error) { 1131 aprint_error("%s: unable to lock vnode for pdev, error = %d\n", 1132 pdk->dk_name, error); 1133 vrele(vp); 1134 goto out; 1135 } 1136 1137 error = VOP_OPEN(vp, FREAD | FSILENT, NOCRED); 1138 if (error) { 1139 if (error != ENXIO) 1140 aprint_error("%s: unable to open device, error = %d\n", 1141 pdk->dk_name, error); 1142 vput(vp); 1143 goto out; 1144 } 1145 VOP_UNLOCK(vp); 1146 1147 /* 1148 * Remove unused wedges 1149 */ 1150 dkwedge_delidle(pdk); 1151 1152 /* 1153 * For each supported partition map type, look to see if 1154 * this map type exists. If so, parse it and add the 1155 * corresponding wedges. 1156 */ 1157 LIST_FOREACH(ddm, &dkwedge_discovery_methods, ddm_list) { 1158 error = (*ddm->ddm_discover)(pdk, vp); 1159 if (error == 0) { 1160 /* Successfully created wedges; we're done. */ 1161 break; 1162 } 1163 } 1164 1165 error = vn_close(vp, FREAD, NOCRED); 1166 if (error) { 1167 aprint_error("%s: unable to close device, error = %d\n", 1168 pdk->dk_name, error); 1169 /* We'll just assume the vnode has been cleaned up. */ 1170 } 1171 1172 out: 1173 rw_exit(&dkwedge_discovery_methods_lock); 1174 } 1175 1176 /* 1177 * dkwedge_read: 1178 * 1179 * Read some data from the specified disk, used for 1180 * partition discovery. 1181 */ 1182 int 1183 dkwedge_read(struct disk *pdk, struct vnode *vp, daddr_t blkno, 1184 void *tbuf, size_t len) 1185 { 1186 buf_t *bp; 1187 int error; 1188 bool isopen; 1189 dev_t bdev; 1190 struct vnode *bdvp; 1191 1192 /* 1193 * The kernel cannot read from a character device vnode 1194 * as physio() only handles user memory. 1195 * 1196 * If the block device has already been opened by a wedge 1197 * use that vnode and temporarily bump the open counter. 1198 * 1199 * Otherwise try to open the block device. 1200 */ 1201 1202 bdev = devsw_chr2blk(vp->v_rdev); 1203 1204 mutex_enter(&pdk->dk_rawlock); 1205 if (pdk->dk_rawopens != 0) { 1206 KASSERT(pdk->dk_rawvp != NULL); 1207 isopen = true; 1208 ++pdk->dk_rawopens; 1209 bdvp = pdk->dk_rawvp; 1210 error = 0; 1211 } else { 1212 isopen = false; 1213 error = dk_open_parent(bdev, FREAD, &bdvp); 1214 } 1215 mutex_exit(&pdk->dk_rawlock); 1216 1217 if (error) 1218 return error; 1219 1220 bp = getiobuf(bdvp, true); 1221 bp->b_flags = B_READ; 1222 bp->b_cflags = BC_BUSY; 1223 bp->b_dev = bdev; 1224 bp->b_data = tbuf; 1225 bp->b_bufsize = bp->b_bcount = len; 1226 bp->b_blkno = blkno; 1227 bp->b_cylinder = 0; 1228 bp->b_error = 0; 1229 1230 VOP_STRATEGY(bdvp, bp); 1231 error = biowait(bp); 1232 putiobuf(bp); 1233 1234 mutex_enter(&pdk->dk_rawlock); 1235 if (isopen) { 1236 --pdk->dk_rawopens; 1237 } else { 1238 dk_close_parent(bdvp, FREAD); 1239 } 1240 mutex_exit(&pdk->dk_rawlock); 1241 1242 return error; 1243 } 1244 1245 /* 1246 * dkwedge_lookup: 1247 * 1248 * Look up a dkwedge_softc based on the provided dev_t. 1249 * 1250 * Caller must guarantee the wedge is referenced. 1251 */ 1252 static struct dkwedge_softc * 1253 dkwedge_lookup(dev_t dev) 1254 { 1255 1256 return device_lookup_private(&dk_cd, minor(dev)); 1257 } 1258 1259 static struct dkwedge_softc * 1260 dkwedge_lookup_acquire(dev_t dev) 1261 { 1262 device_t dv = device_lookup_acquire(&dk_cd, minor(dev)); 1263 1264 if (dv == NULL) 1265 return NULL; 1266 return device_private(dv); 1267 } 1268 1269 static int 1270 dk_open_parent(dev_t dev, int mode, struct vnode **vpp) 1271 { 1272 struct vnode *vp; 1273 int error; 1274 1275 error = bdevvp(dev, &vp); 1276 if (error) 1277 return error; 1278 1279 error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1280 if (error) { 1281 vrele(vp); 1282 return error; 1283 } 1284 error = VOP_OPEN(vp, mode, NOCRED); 1285 if (error) { 1286 vput(vp); 1287 return error; 1288 } 1289 1290 /* VOP_OPEN() doesn't do this for us. */ 1291 if (mode & FWRITE) { 1292 mutex_enter(vp->v_interlock); 1293 vp->v_writecount++; 1294 mutex_exit(vp->v_interlock); 1295 } 1296 1297 VOP_UNLOCK(vp); 1298 1299 *vpp = vp; 1300 1301 return 0; 1302 } 1303 1304 static int 1305 dk_close_parent(struct vnode *vp, int mode) 1306 { 1307 int error; 1308 1309 error = vn_close(vp, mode, NOCRED); 1310 return error; 1311 } 1312 1313 /* 1314 * dkopen: [devsw entry point] 1315 * 1316 * Open a wedge. 1317 */ 1318 static int 1319 dkopen(dev_t dev, int flags, int fmt, struct lwp *l) 1320 { 1321 struct dkwedge_softc *sc = dkwedge_lookup(dev); 1322 int error = 0; 1323 1324 if (sc == NULL) 1325 return ENXIO; 1326 KASSERT(sc->sc_dev != NULL); 1327 KASSERT(sc->sc_state == DKW_STATE_RUNNING); 1328 1329 /* 1330 * We go through a complicated little dance to only open the parent 1331 * vnode once per wedge, no matter how many times the wedge is 1332 * opened. The reason? We see one dkopen() per open call, but 1333 * only dkclose() on the last close. 1334 */ 1335 mutex_enter(&sc->sc_dk.dk_openlock); 1336 mutex_enter(&sc->sc_parent->dk_rawlock); 1337 if (sc->sc_dk.dk_openmask == 0) { 1338 error = dkfirstopen(sc, flags); 1339 if (error) 1340 goto out; 1341 } else if (flags & ~sc->sc_mode & FWRITE) { 1342 /* 1343 * The parent is already open, but the previous attempt 1344 * to open it read/write failed and fell back to 1345 * read-only. In that case, we assume the medium is 1346 * read-only and fail to open the wedge read/write. 1347 */ 1348 error = EROFS; 1349 goto out; 1350 } 1351 KASSERT(sc->sc_mode != 0); 1352 KASSERTMSG(sc->sc_mode & FREAD, "%s: sc_mode=%x", 1353 device_xname(sc->sc_dev), sc->sc_mode); 1354 KASSERTMSG((flags & FWRITE) ? (sc->sc_mode & FWRITE) : 1, 1355 "%s: flags=%x sc_mode=%x", 1356 device_xname(sc->sc_dev), flags, sc->sc_mode); 1357 if (fmt == S_IFCHR) 1358 sc->sc_dk.dk_copenmask |= 1; 1359 else 1360 sc->sc_dk.dk_bopenmask |= 1; 1361 sc->sc_dk.dk_openmask = 1362 sc->sc_dk.dk_copenmask | sc->sc_dk.dk_bopenmask; 1363 1364 out: mutex_exit(&sc->sc_parent->dk_rawlock); 1365 mutex_exit(&sc->sc_dk.dk_openlock); 1366 return error; 1367 } 1368 1369 static int 1370 dkfirstopen(struct dkwedge_softc *sc, int flags) 1371 { 1372 struct dkwedge_softc *nsc; 1373 struct vnode *vp; 1374 int mode; 1375 int error; 1376 1377 KASSERT(mutex_owned(&sc->sc_dk.dk_openlock)); 1378 KASSERT(mutex_owned(&sc->sc_parent->dk_rawlock)); 1379 1380 if (sc->sc_parent->dk_rawopens == 0) { 1381 KASSERT(sc->sc_parent->dk_rawvp == NULL); 1382 /* 1383 * Try open read-write. If this fails for EROFS 1384 * and wedge is read-only, retry to open read-only. 1385 */ 1386 mode = FREAD | FWRITE; 1387 error = dk_open_parent(sc->sc_pdev, mode, &vp); 1388 if (error == EROFS && (flags & FWRITE) == 0) { 1389 mode &= ~FWRITE; 1390 error = dk_open_parent(sc->sc_pdev, mode, &vp); 1391 } 1392 if (error) 1393 return error; 1394 KASSERT(vp != NULL); 1395 sc->sc_parent->dk_rawvp = vp; 1396 } else { 1397 /* 1398 * Retrieve mode from an already opened wedge. 1399 * 1400 * At this point, dk_rawopens is bounded by the number 1401 * of dkwedge devices in the system, which is limited 1402 * by autoconf device numbering to INT_MAX. Since 1403 * dk_rawopens is unsigned, this can't overflow. 1404 */ 1405 KASSERT(sc->sc_parent->dk_rawopens < UINT_MAX); 1406 KASSERT(sc->sc_parent->dk_rawvp != NULL); 1407 mode = 0; 1408 mutex_enter(&sc->sc_parent->dk_openlock); 1409 LIST_FOREACH(nsc, &sc->sc_parent->dk_wedges, sc_plink) { 1410 if (nsc == sc || nsc->sc_dk.dk_openmask == 0) 1411 continue; 1412 mode = nsc->sc_mode; 1413 break; 1414 } 1415 mutex_exit(&sc->sc_parent->dk_openlock); 1416 } 1417 sc->sc_mode = mode; 1418 sc->sc_parent->dk_rawopens++; 1419 1420 return 0; 1421 } 1422 1423 static void 1424 dklastclose(struct dkwedge_softc *sc) 1425 { 1426 1427 KASSERT(mutex_owned(&sc->sc_dk.dk_openlock)); 1428 KASSERT(mutex_owned(&sc->sc_parent->dk_rawlock)); 1429 KASSERT(sc->sc_parent->dk_rawopens > 0); 1430 KASSERT(sc->sc_parent->dk_rawvp != NULL); 1431 1432 if (--sc->sc_parent->dk_rawopens == 0) { 1433 struct vnode *const vp = sc->sc_parent->dk_rawvp; 1434 const int mode = sc->sc_mode; 1435 1436 sc->sc_parent->dk_rawvp = NULL; 1437 sc->sc_mode = 0; 1438 1439 dk_close_parent(vp, mode); 1440 } 1441 } 1442 1443 /* 1444 * dkclose: [devsw entry point] 1445 * 1446 * Close a wedge. 1447 */ 1448 static int 1449 dkclose(dev_t dev, int flags, int fmt, struct lwp *l) 1450 { 1451 struct dkwedge_softc *sc = dkwedge_lookup(dev); 1452 1453 /* 1454 * dkclose can be called even if dkopen didn't succeed, so we 1455 * have to handle the same possibility that the wedge may not 1456 * exist. 1457 */ 1458 if (sc == NULL) 1459 return ENXIO; 1460 KASSERT(sc->sc_dev != NULL); 1461 KASSERT(sc->sc_state != DKW_STATE_LARVAL); 1462 KASSERT(sc->sc_state != DKW_STATE_DEAD); 1463 1464 mutex_enter(&sc->sc_dk.dk_openlock); 1465 mutex_enter(&sc->sc_parent->dk_rawlock); 1466 1467 KASSERT(sc->sc_dk.dk_openmask != 0); 1468 1469 if (fmt == S_IFCHR) 1470 sc->sc_dk.dk_copenmask &= ~1; 1471 else 1472 sc->sc_dk.dk_bopenmask &= ~1; 1473 sc->sc_dk.dk_openmask = 1474 sc->sc_dk.dk_copenmask | sc->sc_dk.dk_bopenmask; 1475 1476 if (sc->sc_dk.dk_openmask == 0) { 1477 dklastclose(sc); 1478 } 1479 1480 mutex_exit(&sc->sc_parent->dk_rawlock); 1481 mutex_exit(&sc->sc_dk.dk_openlock); 1482 1483 return 0; 1484 } 1485 1486 /* 1487 * dkcancel: [devsw entry point] 1488 * 1489 * Cancel any pending I/O operations waiting on a wedge. 1490 */ 1491 static int 1492 dkcancel(dev_t dev, int flags, int fmt, struct lwp *l) 1493 { 1494 struct dkwedge_softc *sc = dkwedge_lookup(dev); 1495 1496 KASSERT(sc != NULL); 1497 KASSERT(sc->sc_dev != NULL); 1498 KASSERT(sc->sc_state != DKW_STATE_LARVAL); 1499 KASSERT(sc->sc_state != DKW_STATE_DEAD); 1500 1501 /* 1502 * Disk I/O is expected to complete or fail within a reasonable 1503 * timeframe -- it's storage, not communication. Further, the 1504 * character and block device interface guarantees that prior 1505 * reads and writes have completed or failed by the time close 1506 * returns -- we are not to cancel them here. If the parent 1507 * device's hardware is gone, the parent driver can make them 1508 * fail. Nothing for dk(4) itself to do. 1509 */ 1510 1511 return 0; 1512 } 1513 1514 /* 1515 * dkstrategy: [devsw entry point] 1516 * 1517 * Perform I/O based on the wedge I/O strategy. 1518 */ 1519 static void 1520 dkstrategy(struct buf *bp) 1521 { 1522 struct dkwedge_softc *sc = dkwedge_lookup(bp->b_dev); 1523 uint64_t p_size, p_offset; 1524 1525 KASSERT(sc != NULL); 1526 KASSERT(sc->sc_dev != NULL); 1527 KASSERT(sc->sc_state != DKW_STATE_LARVAL); 1528 KASSERT(sc->sc_state != DKW_STATE_DEAD); 1529 KASSERT(sc->sc_parent->dk_rawvp != NULL); 1530 1531 /* If it's an empty transfer, wake up the top half now. */ 1532 if (bp->b_bcount == 0) 1533 goto done; 1534 1535 p_offset = sc->sc_offset << sc->sc_parent->dk_blkshift; 1536 p_size = dkwedge_size(sc) << sc->sc_parent->dk_blkshift; 1537 1538 /* Make sure it's in-range. */ 1539 if (bounds_check_with_mediasize(bp, DEV_BSIZE, p_size) <= 0) 1540 goto done; 1541 1542 /* Translate it to the parent's raw LBA. */ 1543 bp->b_rawblkno = bp->b_blkno + p_offset; 1544 1545 /* Place it in the queue and start I/O on the unit. */ 1546 mutex_enter(&sc->sc_iolock); 1547 disk_wait(&sc->sc_dk); 1548 bufq_put(sc->sc_bufq, bp); 1549 mutex_exit(&sc->sc_iolock); 1550 1551 dkstart(sc); 1552 return; 1553 1554 done: 1555 bp->b_resid = bp->b_bcount; 1556 biodone(bp); 1557 } 1558 1559 /* 1560 * dkstart: 1561 * 1562 * Start I/O that has been enqueued on the wedge. 1563 */ 1564 static void 1565 dkstart(struct dkwedge_softc *sc) 1566 { 1567 struct vnode *vp; 1568 struct buf *bp, *nbp; 1569 1570 mutex_enter(&sc->sc_iolock); 1571 1572 /* Do as much work as has been enqueued. */ 1573 while ((bp = bufq_peek(sc->sc_bufq)) != NULL) { 1574 if (sc->sc_iostop) { 1575 (void) bufq_get(sc->sc_bufq); 1576 mutex_exit(&sc->sc_iolock); 1577 bp->b_error = ENXIO; 1578 bp->b_resid = bp->b_bcount; 1579 biodone(bp); 1580 mutex_enter(&sc->sc_iolock); 1581 continue; 1582 } 1583 1584 /* fetch an I/O buf with sc_iolock dropped */ 1585 mutex_exit(&sc->sc_iolock); 1586 nbp = getiobuf(sc->sc_parent->dk_rawvp, false); 1587 mutex_enter(&sc->sc_iolock); 1588 if (nbp == NULL) { 1589 /* 1590 * No resources to run this request; leave the 1591 * buffer queued up, and schedule a timer to 1592 * restart the queue in 1/2 a second. 1593 */ 1594 if (!sc->sc_iostop) 1595 callout_schedule(&sc->sc_restart_ch, hz/2); 1596 break; 1597 } 1598 1599 /* 1600 * fetch buf, this can fail if another thread 1601 * has already processed the queue, it can also 1602 * return a completely different buf. 1603 */ 1604 bp = bufq_get(sc->sc_bufq); 1605 if (bp == NULL) { 1606 mutex_exit(&sc->sc_iolock); 1607 putiobuf(nbp); 1608 mutex_enter(&sc->sc_iolock); 1609 continue; 1610 } 1611 1612 /* Instrumentation. */ 1613 disk_busy(&sc->sc_dk); 1614 1615 /* release lock for VOP_STRATEGY */ 1616 mutex_exit(&sc->sc_iolock); 1617 1618 nbp->b_data = bp->b_data; 1619 nbp->b_flags = bp->b_flags; 1620 nbp->b_oflags = bp->b_oflags; 1621 nbp->b_cflags = bp->b_cflags; 1622 nbp->b_iodone = dkiodone; 1623 nbp->b_proc = bp->b_proc; 1624 nbp->b_blkno = bp->b_rawblkno; 1625 nbp->b_dev = sc->sc_parent->dk_rawvp->v_rdev; 1626 nbp->b_bcount = bp->b_bcount; 1627 nbp->b_private = bp; 1628 BIO_COPYPRIO(nbp, bp); 1629 1630 vp = nbp->b_vp; 1631 if ((nbp->b_flags & B_READ) == 0) { 1632 mutex_enter(vp->v_interlock); 1633 vp->v_numoutput++; 1634 mutex_exit(vp->v_interlock); 1635 } 1636 VOP_STRATEGY(vp, nbp); 1637 1638 mutex_enter(&sc->sc_iolock); 1639 } 1640 1641 mutex_exit(&sc->sc_iolock); 1642 } 1643 1644 /* 1645 * dkiodone: 1646 * 1647 * I/O to a wedge has completed; alert the top half. 1648 */ 1649 static void 1650 dkiodone(struct buf *bp) 1651 { 1652 struct buf *obp = bp->b_private; 1653 struct dkwedge_softc *sc = dkwedge_lookup(obp->b_dev); 1654 1655 KASSERT(sc != NULL); 1656 KASSERT(sc->sc_dev != NULL); 1657 1658 if (bp->b_error != 0) 1659 obp->b_error = bp->b_error; 1660 obp->b_resid = bp->b_resid; 1661 putiobuf(bp); 1662 1663 mutex_enter(&sc->sc_iolock); 1664 disk_unbusy(&sc->sc_dk, obp->b_bcount - obp->b_resid, 1665 obp->b_flags & B_READ); 1666 mutex_exit(&sc->sc_iolock); 1667 1668 biodone(obp); 1669 1670 /* Kick the queue in case there is more work we can do. */ 1671 dkstart(sc); 1672 } 1673 1674 /* 1675 * dkrestart: 1676 * 1677 * Restart the work queue after it was stalled due to 1678 * a resource shortage. Invoked via a callout. 1679 */ 1680 static void 1681 dkrestart(void *v) 1682 { 1683 struct dkwedge_softc *sc = v; 1684 1685 dkstart(sc); 1686 } 1687 1688 /* 1689 * dkminphys: 1690 * 1691 * Call parent's minphys function. 1692 */ 1693 static void 1694 dkminphys(struct buf *bp) 1695 { 1696 struct dkwedge_softc *sc = dkwedge_lookup(bp->b_dev); 1697 dev_t dev; 1698 1699 KASSERT(sc != NULL); 1700 KASSERT(sc->sc_dev != NULL); 1701 1702 dev = bp->b_dev; 1703 bp->b_dev = sc->sc_pdev; 1704 if (sc->sc_parent->dk_driver && sc->sc_parent->dk_driver->d_minphys) 1705 (*sc->sc_parent->dk_driver->d_minphys)(bp); 1706 else 1707 minphys(bp); 1708 bp->b_dev = dev; 1709 } 1710 1711 /* 1712 * dkread: [devsw entry point] 1713 * 1714 * Read from a wedge. 1715 */ 1716 static int 1717 dkread(dev_t dev, struct uio *uio, int flags) 1718 { 1719 struct dkwedge_softc *sc __diagused = dkwedge_lookup(dev); 1720 1721 KASSERT(sc != NULL); 1722 KASSERT(sc->sc_dev != NULL); 1723 KASSERT(sc->sc_state != DKW_STATE_LARVAL); 1724 KASSERT(sc->sc_state != DKW_STATE_DEAD); 1725 1726 return physio(dkstrategy, NULL, dev, B_READ, dkminphys, uio); 1727 } 1728 1729 /* 1730 * dkwrite: [devsw entry point] 1731 * 1732 * Write to a wedge. 1733 */ 1734 static int 1735 dkwrite(dev_t dev, struct uio *uio, int flags) 1736 { 1737 struct dkwedge_softc *sc __diagused = dkwedge_lookup(dev); 1738 1739 KASSERT(sc != NULL); 1740 KASSERT(sc->sc_dev != NULL); 1741 KASSERT(sc->sc_state != DKW_STATE_LARVAL); 1742 KASSERT(sc->sc_state != DKW_STATE_DEAD); 1743 1744 return physio(dkstrategy, NULL, dev, B_WRITE, dkminphys, uio); 1745 } 1746 1747 /* 1748 * dkioctl: [devsw entry point] 1749 * 1750 * Perform an ioctl request on a wedge. 1751 */ 1752 static int 1753 dkioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l) 1754 { 1755 struct dkwedge_softc *sc = dkwedge_lookup(dev); 1756 int error = 0; 1757 1758 KASSERT(sc != NULL); 1759 KASSERT(sc->sc_dev != NULL); 1760 KASSERT(sc->sc_state != DKW_STATE_LARVAL); 1761 KASSERT(sc->sc_state != DKW_STATE_DEAD); 1762 KASSERT(sc->sc_parent->dk_rawvp != NULL); 1763 1764 /* 1765 * We pass NODEV instead of our device to indicate we don't 1766 * want to handle disklabel ioctls 1767 */ 1768 error = disk_ioctl(&sc->sc_dk, NODEV, cmd, data, flag, l); 1769 if (error != EPASSTHROUGH) 1770 return error; 1771 1772 error = 0; 1773 1774 switch (cmd) { 1775 case DIOCGSTRATEGY: 1776 case DIOCGCACHE: 1777 case DIOCCACHESYNC: 1778 error = VOP_IOCTL(sc->sc_parent->dk_rawvp, cmd, data, flag, 1779 l != NULL ? l->l_cred : NOCRED); 1780 break; 1781 case DIOCGWEDGEINFO: { 1782 struct dkwedge_info *dkw = data; 1783 1784 strlcpy(dkw->dkw_devname, device_xname(sc->sc_dev), 1785 sizeof(dkw->dkw_devname)); 1786 memcpy(dkw->dkw_wname, sc->sc_wname, sizeof(dkw->dkw_wname)); 1787 dkw->dkw_wname[sizeof(dkw->dkw_wname) - 1] = '\0'; 1788 strlcpy(dkw->dkw_parent, sc->sc_parent->dk_name, 1789 sizeof(dkw->dkw_parent)); 1790 dkw->dkw_offset = sc->sc_offset; 1791 dkw->dkw_size = dkwedge_size(sc); 1792 strlcpy(dkw->dkw_ptype, sc->sc_ptype, sizeof(dkw->dkw_ptype)); 1793 1794 break; 1795 } 1796 case DIOCGSECTORALIGN: { 1797 struct disk_sectoralign *dsa = data; 1798 uint32_t r; 1799 1800 error = VOP_IOCTL(sc->sc_parent->dk_rawvp, cmd, dsa, flag, 1801 l != NULL ? l->l_cred : NOCRED); 1802 if (error) 1803 break; 1804 1805 r = sc->sc_offset % dsa->dsa_alignment; 1806 if (r < dsa->dsa_firstaligned) 1807 dsa->dsa_firstaligned = dsa->dsa_firstaligned - r; 1808 else 1809 dsa->dsa_firstaligned = (dsa->dsa_firstaligned + 1810 dsa->dsa_alignment) - r; 1811 dsa->dsa_firstaligned %= dsa->dsa_alignment; 1812 break; 1813 } 1814 default: 1815 error = ENOTTY; 1816 } 1817 1818 return error; 1819 } 1820 1821 /* 1822 * dkdiscard: [devsw entry point] 1823 * 1824 * Perform a discard-range request on a wedge. 1825 */ 1826 static int 1827 dkdiscard(dev_t dev, off_t pos, off_t len) 1828 { 1829 struct dkwedge_softc *sc = dkwedge_lookup(dev); 1830 uint64_t size = dkwedge_size(sc); 1831 unsigned shift; 1832 off_t offset, maxlen; 1833 int error; 1834 1835 KASSERT(sc != NULL); 1836 KASSERT(sc->sc_dev != NULL); 1837 KASSERT(sc->sc_state != DKW_STATE_LARVAL); 1838 KASSERT(sc->sc_state != DKW_STATE_DEAD); 1839 KASSERT(sc->sc_parent->dk_rawvp != NULL); 1840 1841 /* XXX check bounds on size/offset up front */ 1842 shift = (sc->sc_parent->dk_blkshift + DEV_BSHIFT); 1843 KASSERT(__type_fit(off_t, size)); 1844 KASSERT(__type_fit(off_t, sc->sc_offset)); 1845 KASSERT(0 <= sc->sc_offset); 1846 KASSERT(size <= (__type_max(off_t) >> shift)); 1847 KASSERT(sc->sc_offset <= ((__type_max(off_t) >> shift) - size)); 1848 offset = ((off_t)sc->sc_offset << shift); 1849 maxlen = ((off_t)size << shift); 1850 1851 if (len > maxlen) 1852 return EINVAL; 1853 if (pos > (maxlen - len)) 1854 return EINVAL; 1855 1856 pos += offset; 1857 1858 vn_lock(sc->sc_parent->dk_rawvp, LK_EXCLUSIVE | LK_RETRY); 1859 error = VOP_FDISCARD(sc->sc_parent->dk_rawvp, pos, len); 1860 VOP_UNLOCK(sc->sc_parent->dk_rawvp); 1861 1862 return error; 1863 } 1864 1865 /* 1866 * dksize: [devsw entry point] 1867 * 1868 * Query the size of a wedge for the purpose of performing a dump 1869 * or for swapping to. 1870 */ 1871 static int 1872 dksize(dev_t dev) 1873 { 1874 /* 1875 * Don't bother taking a reference because this is only used 1876 * either (a) while the device is open (for swap), or (b) while 1877 * any multiprocessing is quiescent (for crash dumps). 1878 */ 1879 struct dkwedge_softc *sc = dkwedge_lookup(dev); 1880 uint64_t p_size; 1881 int rv = -1; 1882 1883 if (sc == NULL) 1884 return -1; 1885 if (sc->sc_state != DKW_STATE_RUNNING) 1886 return -1; 1887 1888 /* Our content type is static, no need to open the device. */ 1889 1890 p_size = dkwedge_size(sc) << sc->sc_parent->dk_blkshift; 1891 if (strcmp(sc->sc_ptype, DKW_PTYPE_SWAP) == 0) { 1892 /* Saturate if we are larger than INT_MAX. */ 1893 if (p_size > INT_MAX) 1894 rv = INT_MAX; 1895 else 1896 rv = (int)p_size; 1897 } 1898 1899 return rv; 1900 } 1901 1902 /* 1903 * dkdump: [devsw entry point] 1904 * 1905 * Perform a crash dump to a wedge. 1906 */ 1907 static int 1908 dkdump(dev_t dev, daddr_t blkno, void *va, size_t size) 1909 { 1910 /* 1911 * Don't bother taking a reference because this is only used 1912 * while any multiprocessing is quiescent. 1913 */ 1914 struct dkwedge_softc *sc = dkwedge_lookup(dev); 1915 const struct bdevsw *bdev; 1916 uint64_t p_size, p_offset; 1917 1918 if (sc == NULL) 1919 return ENXIO; 1920 if (sc->sc_state != DKW_STATE_RUNNING) 1921 return ENXIO; 1922 1923 /* Our content type is static, no need to open the device. */ 1924 1925 if (strcmp(sc->sc_ptype, DKW_PTYPE_SWAP) != 0 && 1926 strcmp(sc->sc_ptype, DKW_PTYPE_RAID) != 0 && 1927 strcmp(sc->sc_ptype, DKW_PTYPE_CGD) != 0) 1928 return ENXIO; 1929 if (size % DEV_BSIZE != 0) 1930 return EINVAL; 1931 1932 p_offset = sc->sc_offset << sc->sc_parent->dk_blkshift; 1933 p_size = dkwedge_size(sc) << sc->sc_parent->dk_blkshift; 1934 1935 if (blkno < 0 || blkno + size/DEV_BSIZE > p_size) { 1936 printf("%s: blkno (%" PRIu64 ") + size / DEV_BSIZE (%zu) > " 1937 "p_size (%" PRIu64 ")\n", __func__, blkno, 1938 size/DEV_BSIZE, p_size); 1939 return EINVAL; 1940 } 1941 1942 bdev = bdevsw_lookup(sc->sc_pdev); 1943 return (*bdev->d_dump)(sc->sc_pdev, blkno + p_offset, va, size); 1944 } 1945 1946 /* 1947 * config glue 1948 */ 1949 1950 /* 1951 * dkwedge_find_partition 1952 * 1953 * Find wedge corresponding to the specified parent name 1954 * and offset/length. 1955 */ 1956 static device_t 1957 dkwedge_find_partition_acquire(device_t parent, daddr_t startblk, 1958 uint64_t nblks) 1959 { 1960 struct dkwedge_softc *sc; 1961 int i; 1962 device_t wedge = NULL; 1963 1964 rw_enter(&dkwedges_lock, RW_READER); 1965 for (i = 0; i < ndkwedges; i++) { 1966 if ((sc = dkwedges[i]) == NULL || sc->sc_dev == NULL) 1967 continue; 1968 if (strcmp(sc->sc_parent->dk_name, device_xname(parent)) == 0 && 1969 sc->sc_offset == startblk && 1970 dkwedge_size(sc) == nblks) { 1971 if (wedge) { 1972 printf("WARNING: double match for boot wedge " 1973 "(%s, %s)\n", 1974 device_xname(wedge), 1975 device_xname(sc->sc_dev)); 1976 continue; 1977 } 1978 wedge = sc->sc_dev; 1979 device_acquire(wedge); 1980 } 1981 } 1982 rw_exit(&dkwedges_lock); 1983 1984 return wedge; 1985 } 1986 1987 /* XXX unsafe */ 1988 device_t 1989 dkwedge_find_partition(device_t parent, daddr_t startblk, 1990 uint64_t nblks) 1991 { 1992 device_t dv; 1993 1994 if ((dv = dkwedge_find_partition_acquire(parent, startblk, nblks)) 1995 == NULL) 1996 return NULL; 1997 device_release(dv); 1998 return dv; 1999 } 2000 2001 const char * 2002 dkwedge_get_parent_name(dev_t dev) 2003 { 2004 /* XXX: perhaps do this in lookup? */ 2005 int bmaj = bdevsw_lookup_major(&dk_bdevsw); 2006 int cmaj = cdevsw_lookup_major(&dk_cdevsw); 2007 2008 if (major(dev) != bmaj && major(dev) != cmaj) 2009 return NULL; 2010 2011 struct dkwedge_softc *const sc = dkwedge_lookup_acquire(dev); 2012 if (sc == NULL) 2013 return NULL; 2014 const char *const name = sc->sc_parent->dk_name; 2015 device_release(sc->sc_dev); 2016 return name; 2017 } 2018