Home | History | Annotate | Line # | Download | only in kern
      1 /*	$NetBSD: subr_devsw.c,v 1.55 2026/01/04 03:15:58 riastradh Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 2001, 2002, 2007, 2008 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by MAEKAWA Masahide <gehenna (at) NetBSD.org>, and by Andrew Doran.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     29  * POSSIBILITY OF SUCH DAMAGE.
     30  */
     31 
     32 /*
     33  * Overview
     34  *
     35  *	subr_devsw.c: registers device drivers by name and by major
     36  *	number, and provides wrapper methods for performing I/O and
     37  *	other tasks on device drivers, keying on the device number
     38  *	(dev_t).
     39  *
     40  *	When the system is built, the config(8) command generates
     41  *	static tables of device drivers built into the kernel image
     42  *	along with their associated methods.  These are recorded in
     43  *	the cdevsw0 and bdevsw0 tables.  Drivers can also be added to
     44  *	and removed from the system dynamically.
     45  *
     46  * Allocation
     47  *
     48  *	When the system initially boots only the statically allocated
     49  *	indexes (bdevsw0, cdevsw0) are used.  If these overflow due to
     50  *	allocation, we allocate a fixed block of memory to hold the new,
     51  *	expanded index.  This "fork" of the table is only ever performed
     52  *	once in order to guarantee that other threads may safely access
     53  *	the device tables:
     54  *
     55  *	o Once a thread has a "reference" to the table via an earlier
     56  *	  open() call, we know that the entry in the table must exist
     57  *	  and so it is safe to access it.
     58  *
     59  *	o Regardless of whether other threads see the old or new
     60  *	  pointers, they will point to a correct device switch
     61  *	  structure for the operation being performed.
     62  *
     63  *	XXX Currently, the wrapper methods such as cdev_read() verify
     64  *	that a device driver does in fact exist before calling the
     65  *	associated driver method.  This should be changed so that
     66  *	once the device is has been referenced by a vnode (opened),
     67  *	calling	the other methods should be valid until that reference
     68  *	is dropped.
     69  */
     70 
     71 #include <sys/cdefs.h>
     72 __KERNEL_RCSID(0, "$NetBSD: subr_devsw.c,v 1.55 2026/01/04 03:15:58 riastradh Exp $");
     73 
     74 #ifdef _KERNEL_OPT
     75 #include "opt_dtrace.h"
     76 #endif
     77 
     78 #include <sys/param.h>
     79 #include <sys/types.h>
     80 
     81 #include <sys/atomic.h>
     82 #include <sys/buf.h>
     83 #include <sys/conf.h>
     84 #include <sys/cpu.h>
     85 #include <sys/device.h>
     86 #include <sys/kmem.h>
     87 #include <sys/localcount.h>
     88 #include <sys/poll.h>
     89 #include <sys/pserialize.h>
     90 #include <sys/reboot.h>
     91 #include <sys/sdt.h>
     92 #include <sys/systm.h>
     93 #include <sys/tty.h>
     94 #include <sys/xcall.h>
     95 
     96 #ifdef DEVSW_DEBUG
     97 #define	DPRINTF(x)	printf x
     98 #else /* DEVSW_DEBUG */
     99 #define	DPRINTF(x)
    100 #endif /* DEVSW_DEBUG */
    101 
    102 #define	MAXDEVSW	512	/* the maximum of major device number */
    103 #define	BDEVSW_SIZE	(sizeof(struct bdevsw *))
    104 #define	CDEVSW_SIZE	(sizeof(struct cdevsw *))
    105 #define	DEVSWCONV_SIZE	(sizeof(struct devsw_conv))
    106 
    107 struct devswref {
    108 	struct localcount	*dr_lc;
    109 };
    110 
    111 /* XXX bdevsw, cdevsw, max_bdevsws, and max_cdevsws should be volatile */
    112 extern const struct bdevsw **bdevsw, *bdevsw0[];
    113 extern const struct cdevsw **cdevsw, *cdevsw0[];
    114 extern struct devsw_conv *devsw_conv, devsw_conv0[];
    115 extern const int sys_bdevsws, sys_cdevsws;
    116 extern int max_bdevsws, max_cdevsws, max_devsw_convs;
    117 
    118 static struct devswref *cdevswref;
    119 static struct devswref *bdevswref;
    120 static kcondvar_t devsw_cv;
    121 
    122 static int bdevsw_attach(const struct bdevsw *, devmajor_t *);
    123 static int cdevsw_attach(const struct cdevsw *, devmajor_t *);
    124 static void devsw_detach_locked(const struct bdevsw *, const struct cdevsw *);
    125 
    126 kmutex_t device_lock;
    127 
    128 void (*biodone_vfs)(buf_t *) = (void *)nullop;
    129 
    130 /*
    131  * bdev probes
    132  */
    133 SDT_PROBE_DEFINE6(sdt, bdev, open, acquire,
    134     "struct bdevsw *"/*bdevsw*/,
    135     "dev_t"/*dev*/,
    136     "int"/*flag*/,
    137     "int"/*devtype*/,
    138     "int"/*unit*/,
    139     "device_t"/*dv*/);
    140 SDT_PROBE_DEFINE4(sdt, bdev, open, entry,
    141     "struct bdevsw *"/*bdevsw*/,
    142     "dev_t"/*dev*/,
    143     "int"/*flag*/,
    144     "int"/*devtype*/);
    145 SDT_PROBE_DEFINE5(sdt, bdev, open, return,
    146     "struct bdevsw *"/*bdevsw*/,
    147     "dev_t"/*dev*/,
    148     "int"/*flag*/,
    149     "int"/*devtype*/,
    150     "int"/*error*/);
    151 SDT_PROBE_DEFINE6(sdt, bdev, open, release,
    152     "struct bdevsw *"/*bdevsw*/,
    153     "dev_t"/*dev*/,
    154     "int"/*flag*/,
    155     "int"/*devtype*/,
    156     "int"/*unit*/,
    157     "device_t"/*dv*/);
    158 
    159 SDT_PROBE_DEFINE4(sdt, bdev, cancel, entry,
    160     "struct bdevsw *"/*bdevsw*/,
    161     "dev_t"/*dev*/,
    162     "int"/*flag*/,
    163     "int"/*devtype*/);
    164 SDT_PROBE_DEFINE5(sdt, bdev, cancel, return,
    165     "struct bdevsw *"/*bdevsw*/,
    166     "dev_t"/*dev*/,
    167     "int"/*flag*/,
    168     "int"/*devtype*/,
    169     "int"/*error*/);
    170 
    171 SDT_PROBE_DEFINE4(sdt, bdev, close, entry,
    172     "struct bdevsw *"/*bdevsw*/,
    173     "dev_t"/*dev*/,
    174     "int"/*flag*/,
    175     "int"/*devtype*/);
    176 SDT_PROBE_DEFINE5(sdt, bdev, close, return,
    177     "struct bdevsw *"/*bdevsw*/,
    178     "dev_t"/*dev*/,
    179     "int"/*flag*/,
    180     "int"/*devtype*/,
    181     "int"/*error*/);
    182 
    183 SDT_PROBE_DEFINE3(sdt, bdev, strategy, entry,
    184     "struct bdevsw *"/*bdevsw*/,
    185     "dev_t"/*dev*/,
    186     "struct buf *"/*bp*/);
    187 SDT_PROBE_DEFINE3(sdt, bdev, strategy, return,
    188     "struct bdevsw *"/*bdevsw*/,
    189     "dev_t"/*dev*/,
    190     "struct buf *"/*bp*/);
    191 
    192 SDT_PROBE_DEFINE5(sdt, bdev, ioctl, entry,
    193     "struct bdevsw *"/*bdevsw*/,
    194     "dev_t"/*dev*/,
    195     "unsigned long"/*cmd*/,
    196     "void *"/*data*/,
    197     "int"/*flag*/);
    198 SDT_PROBE_DEFINE6(sdt, bdev, ioctl, return,
    199     "struct bdevsw *"/*bdevsw*/,
    200     "dev_t"/*dev*/,
    201     "unsigned long"/*cmd*/,
    202     "void *"/*data*/,
    203     "int"/*flag*/,
    204     "int"/*error*/);
    205 
    206 SDT_PROBE_DEFINE2(sdt, bdev, psize, entry,
    207     "struct bdevsw *"/*bdevsw*/,
    208     "dev_t"/*dev*/);
    209 SDT_PROBE_DEFINE3(sdt, bdev, psize, return,
    210     "struct bdevsw *"/*bdevsw*/,
    211     "dev_t"/*dev*/,
    212     "int"/*psize*/);
    213 
    214 SDT_PROBE_DEFINE4(sdt, bdev, discard, entry,
    215     "struct bdevsw *"/*bdevsw*/,
    216     "dev_t"/*dev*/,
    217     "off_t"/*pos*/,
    218     "off_t"/*len*/);
    219 SDT_PROBE_DEFINE5(sdt, bdev, discard, return,
    220     "struct bdevsw *"/*bdevsw*/,
    221     "dev_t"/*dev*/,
    222     "off_t"/*pos*/,
    223     "off_t"/*len*/,
    224     "int"/*error*/);
    225 
    226 /*
    227  * cdev probes
    228  */
    229 SDT_PROBE_DEFINE6(sdt, cdev, open, acquire,
    230     "struct cdevsw *"/*cdevsw*/,
    231     "dev_t"/*dev*/,
    232     "int"/*flag*/,
    233     "int"/*devtype*/,
    234     "int"/*unit*/,
    235     "device_t"/*dv*/);
    236 SDT_PROBE_DEFINE4(sdt, cdev, open, entry,
    237     "struct cdevsw *"/*cdevsw*/,
    238     "dev_t"/*dev*/,
    239     "int"/*flag*/,
    240     "int"/*devtype*/);
    241 SDT_PROBE_DEFINE5(sdt, cdev, open, return,
    242     "struct cdevsw *"/*cdevsw*/,
    243     "dev_t"/*dev*/,
    244     "int"/*flag*/,
    245     "int"/*devtype*/,
    246     "int"/*error*/);
    247 SDT_PROBE_DEFINE6(sdt, cdev, open, release,
    248     "struct cdevsw *"/*cdevsw*/,
    249     "dev_t"/*dev*/,
    250     "int"/*flag*/,
    251     "int"/*devtype*/,
    252     "int"/*unit*/,
    253     "device_t"/*dv*/);
    254 
    255 SDT_PROBE_DEFINE4(sdt, cdev, cancel, entry,
    256     "struct cdevsw *"/*cdevsw*/,
    257     "dev_t"/*dev*/,
    258     "int"/*flag*/,
    259     "int"/*devtype*/);
    260 SDT_PROBE_DEFINE5(sdt, cdev, cancel, return,
    261     "struct cdevsw *"/*cdevsw*/,
    262     "dev_t"/*dev*/,
    263     "int"/*flag*/,
    264     "int"/*devtype*/,
    265     "int"/*error*/);
    266 
    267 SDT_PROBE_DEFINE4(sdt, cdev, close, entry,
    268     "struct cdevsw *"/*cdevsw*/,
    269     "dev_t"/*dev*/,
    270     "int"/*flag*/,
    271     "int"/*devtype*/);
    272 SDT_PROBE_DEFINE5(sdt, cdev, close, return,
    273     "struct cdevsw *"/*cdevsw*/,
    274     "dev_t"/*dev*/,
    275     "int"/*flag*/,
    276     "int"/*devtype*/,
    277     "int"/*error*/);
    278 
    279 SDT_PROBE_DEFINE4(sdt, cdev, read, entry,
    280     "struct cdevsw *"/*cdevsw*/,
    281     "dev_t"/*dev*/,
    282     "struct uio *"/*uio*/,
    283     "int"/*flag*/);
    284 SDT_PROBE_DEFINE5(sdt, cdev, read, return,
    285     "struct cdevsw *"/*cdevsw*/,
    286     "dev_t"/*dev*/,
    287     "struct uio *"/*uio*/,
    288     "int"/*flag*/,
    289     "int"/*error*/);
    290 
    291 SDT_PROBE_DEFINE4(sdt, cdev, write, entry,
    292     "struct cdevsw *"/*cdevsw*/,
    293     "dev_t"/*dev*/,
    294     "struct uio *"/*uio*/,
    295     "int"/*flag*/);
    296 SDT_PROBE_DEFINE5(sdt, cdev, write, return,
    297     "struct cdevsw *"/*cdevsw*/,
    298     "dev_t"/*dev*/,
    299     "struct uio *"/*uio*/,
    300     "int"/*flag*/,
    301     "int"/*error*/);
    302 
    303 SDT_PROBE_DEFINE5(sdt, cdev, ioctl, entry,
    304     "struct cdevsw *"/*cdevsw*/,
    305     "dev_t"/*dev*/,
    306     "unsigned long"/*cmd*/,
    307     "void *"/*data*/,
    308     "int"/*flag*/);
    309 SDT_PROBE_DEFINE6(sdt, cdev, ioctl, return,
    310     "struct cdevsw *"/*cdevsw*/,
    311     "dev_t"/*dev*/,
    312     "unsigned long"/*cmd*/,
    313     "void *"/*data*/,
    314     "int"/*flag*/,
    315     "int"/*error*/);
    316 
    317 SDT_PROBE_DEFINE4(sdt, cdev, stop, entry,
    318     "struct cdevsw *"/*cdevsw*/,
    319     "dev_t"/*dev*/,
    320     "struct tty *"/*tp*/,
    321     "int"/*flag*/);
    322 SDT_PROBE_DEFINE4(sdt, cdev, stop, return,
    323     "struct cdevsw *"/*cdevsw*/,
    324     "dev_t"/*dev*/,
    325     "struct tty *"/*tp*/,
    326     "int"/*flag*/);
    327 
    328 SDT_PROBE_DEFINE3(sdt, cdev, poll, entry,
    329     "struct cdevsw *"/*cdevsw*/,
    330     "dev_t"/*dev*/,
    331     "int"/*events*/);
    332 SDT_PROBE_DEFINE4(sdt, cdev, poll, return,
    333     "struct cdevsw *"/*cdevsw*/,
    334     "dev_t"/*dev*/,
    335     "int"/*events*/,
    336     "int"/*revents*/);
    337 
    338 SDT_PROBE_DEFINE4(sdt, cdev, mmap, entry,
    339     "struct cdevsw *"/*cdevsw*/,
    340     "dev_t"/*dev*/,
    341     "off_t"/*off*/,
    342     "int"/*flag*/);
    343 SDT_PROBE_DEFINE5(sdt, cdev, mmap, return,
    344     "struct cdevsw *"/*cdevsw*/,
    345     "dev_t"/*dev*/,
    346     "off_t"/*off*/,
    347     "int"/*flag*/,
    348     "paddr_t"/*mmapcookie*/);
    349 
    350 SDT_PROBE_DEFINE3(sdt, cdev, kqfilter, entry,
    351     "struct cdevsw *"/*cdevsw*/,
    352     "dev_t"/*dev*/,
    353     "struct knote *"/*kn*/);
    354 SDT_PROBE_DEFINE4(sdt, cdev, kqfilter, return,
    355     "struct cdevsw *"/*cdevsw*/,
    356     "dev_t"/*dev*/,
    357     "struct knote *"/*kn*/,
    358     "int"/*error*/);
    359 
    360 SDT_PROBE_DEFINE4(sdt, cdev, discard, entry,
    361     "struct cdevsw *"/*cdevsw*/,
    362     "dev_t"/*dev*/,
    363     "off_t"/*pos*/,
    364     "off_t"/*len*/);
    365 SDT_PROBE_DEFINE5(sdt, cdev, discard, return,
    366     "struct cdevsw *"/*cdevsw*/,
    367     "dev_t"/*dev*/,
    368     "off_t"/*pos*/,
    369     "off_t"/*len*/,
    370     "int"/*error*/);
    371 
    372 void
    373 devsw_init(void)
    374 {
    375 
    376 	KASSERT(sys_bdevsws < MAXDEVSW - 1);
    377 	KASSERT(sys_cdevsws < MAXDEVSW - 1);
    378 	mutex_init(&device_lock, MUTEX_DEFAULT, IPL_NONE);
    379 
    380 	cv_init(&devsw_cv, "devsw");
    381 }
    382 
    383 int
    384 devsw_attach(const char *devname,
    385 	     const struct bdevsw *bdev, devmajor_t *bmajor,
    386 	     const struct cdevsw *cdev, devmajor_t *cmajor)
    387 {
    388 	struct devsw_conv *conv;
    389 	char *name;
    390 	int error, i;
    391 
    392 	if (devname == NULL || cdev == NULL)
    393 		return SET_ERROR(EINVAL);
    394 
    395 	mutex_enter(&device_lock);
    396 
    397 	for (i = 0; i < max_devsw_convs; i++) {
    398 		conv = &devsw_conv[i];
    399 		if (conv->d_name == NULL || strcmp(devname, conv->d_name) != 0)
    400 			continue;
    401 
    402 		if ((bdev != NULL) && (*bmajor < 0))
    403 			*bmajor = conv->d_bmajor;
    404 		if (*cmajor < 0)
    405 			*cmajor = conv->d_cmajor;
    406 
    407 		if (*bmajor != conv->d_bmajor || *cmajor != conv->d_cmajor) {
    408 			error = SET_ERROR(EINVAL);
    409 			goto out;
    410 		}
    411 		if ((*bmajor >= 0 && bdev == NULL) || *cmajor < 0) {
    412 			error = SET_ERROR(EINVAL);
    413 			goto out;
    414 		}
    415 
    416 		if ((*bmajor >= 0 && bdevsw[*bmajor] != NULL) ||
    417 		    cdevsw[*cmajor] != NULL) {
    418 			error = SET_ERROR(EEXIST);
    419 			goto out;
    420 		}
    421 		break;
    422 	}
    423 
    424 	/*
    425 	 * XXX This should allocate what it needs up front so we never
    426 	 * need to flail around trying to unwind.
    427 	 */
    428 	error = bdevsw_attach(bdev, bmajor);
    429 	if (error != 0)
    430 		goto out;
    431 	error = cdevsw_attach(cdev, cmajor);
    432 	if (error != 0) {
    433 		devsw_detach_locked(bdev, NULL);
    434 		goto out;
    435 	}
    436 
    437 	/*
    438 	 * If we already found a conv, we're done.  Otherwise, find an
    439 	 * empty slot or extend the table.
    440 	 */
    441 	if (i < max_devsw_convs) {
    442 		error = 0;
    443 		goto out;
    444 	}
    445 
    446 	for (i = 0; i < max_devsw_convs; i++) {
    447 		if (devsw_conv[i].d_name == NULL)
    448 			break;
    449 	}
    450 	if (i == max_devsw_convs) {
    451 		struct devsw_conv *newptr;
    452 		int old_convs, new_convs;
    453 
    454 		old_convs = max_devsw_convs;
    455 		new_convs = old_convs + 1;
    456 
    457 		newptr = kmem_zalloc(new_convs * DEVSWCONV_SIZE, KM_NOSLEEP);
    458 		if (newptr == NULL) {
    459 			devsw_detach_locked(bdev, cdev);
    460 			error = SET_ERROR(ENOMEM);
    461 			goto out;
    462 		}
    463 		newptr[old_convs].d_name = NULL;
    464 		newptr[old_convs].d_bmajor = -1;
    465 		newptr[old_convs].d_cmajor = -1;
    466 		memcpy(newptr, devsw_conv, old_convs * DEVSWCONV_SIZE);
    467 		if (devsw_conv != devsw_conv0)
    468 			kmem_free(devsw_conv, old_convs * DEVSWCONV_SIZE);
    469 		devsw_conv = newptr;
    470 		max_devsw_convs = new_convs;
    471 	}
    472 
    473 	name = kmem_strdupsize(devname, NULL, KM_NOSLEEP);
    474 	if (name == NULL) {
    475 		devsw_detach_locked(bdev, cdev);
    476 		error = SET_ERROR(ENOMEM);
    477 		goto out;
    478 	}
    479 
    480 	devsw_conv[i].d_name = name;
    481 	devsw_conv[i].d_bmajor = *bmajor;
    482 	devsw_conv[i].d_cmajor = *cmajor;
    483 	error = 0;
    484 out:
    485 	mutex_exit(&device_lock);
    486 	return error;
    487 }
    488 
    489 static int
    490 bdevsw_attach(const struct bdevsw *devsw, devmajor_t *devmajor)
    491 {
    492 	const struct bdevsw **newbdevsw = NULL;
    493 	struct devswref *newbdevswref = NULL;
    494 	struct localcount *lc;
    495 	devmajor_t bmajor;
    496 	int i;
    497 
    498 	KASSERT(mutex_owned(&device_lock));
    499 
    500 	if (devsw == NULL)
    501 		return 0;
    502 
    503 	if (*devmajor < 0) {
    504 		for (bmajor = sys_bdevsws; bmajor < max_bdevsws; bmajor++) {
    505 			if (bdevsw[bmajor] != NULL)
    506 				continue;
    507 			for (i = 0; i < max_devsw_convs; i++) {
    508 				if (devsw_conv[i].d_bmajor == bmajor)
    509 					break;
    510 			}
    511 			if (i != max_devsw_convs)
    512 				continue;
    513 			break;
    514 		}
    515 		*devmajor = bmajor;
    516 	}
    517 
    518 	if (*devmajor >= MAXDEVSW) {
    519 		printf("%s: block majors exhausted\n", __func__);
    520 		return SET_ERROR(ENOMEM);
    521 	}
    522 
    523 	if (bdevswref == NULL) {
    524 		newbdevswref = kmem_zalloc(MAXDEVSW * sizeof(newbdevswref[0]),
    525 		    KM_NOSLEEP);
    526 		if (newbdevswref == NULL)
    527 			return SET_ERROR(ENOMEM);
    528 		atomic_store_release(&bdevswref, newbdevswref);
    529 	}
    530 
    531 	if (*devmajor >= max_bdevsws) {
    532 		KASSERT(bdevsw == bdevsw0);
    533 		newbdevsw = kmem_zalloc(MAXDEVSW * sizeof(newbdevsw[0]),
    534 		    KM_NOSLEEP);
    535 		if (newbdevsw == NULL)
    536 			return SET_ERROR(ENOMEM);
    537 		memcpy(newbdevsw, bdevsw, max_bdevsws * sizeof(bdevsw[0]));
    538 		atomic_store_release(&bdevsw, newbdevsw);
    539 		atomic_store_release(&max_bdevsws, MAXDEVSW);
    540 	}
    541 
    542 	if (bdevsw[*devmajor] != NULL)
    543 		return SET_ERROR(EEXIST);
    544 
    545 	KASSERT(bdevswref[*devmajor].dr_lc == NULL);
    546 	lc = kmem_zalloc(sizeof(*lc), KM_SLEEP);
    547 	localcount_init(lc);
    548 	bdevswref[*devmajor].dr_lc = lc;
    549 
    550 	atomic_store_release(&bdevsw[*devmajor], devsw);
    551 
    552 	return 0;
    553 }
    554 
    555 static int
    556 cdevsw_attach(const struct cdevsw *devsw, devmajor_t *devmajor)
    557 {
    558 	const struct cdevsw **newcdevsw = NULL;
    559 	struct devswref *newcdevswref = NULL;
    560 	struct localcount *lc;
    561 	devmajor_t cmajor;
    562 	int i;
    563 
    564 	KASSERT(mutex_owned(&device_lock));
    565 
    566 	if (*devmajor < 0) {
    567 		for (cmajor = sys_cdevsws; cmajor < max_cdevsws; cmajor++) {
    568 			if (cdevsw[cmajor] != NULL)
    569 				continue;
    570 			for (i = 0; i < max_devsw_convs; i++) {
    571 				if (devsw_conv[i].d_cmajor == cmajor)
    572 					break;
    573 			}
    574 			if (i != max_devsw_convs)
    575 				continue;
    576 			break;
    577 		}
    578 		*devmajor = cmajor;
    579 	}
    580 
    581 	if (*devmajor >= MAXDEVSW) {
    582 		printf("%s: character majors exhausted\n", __func__);
    583 		return SET_ERROR(ENOMEM);
    584 	}
    585 
    586 	if (cdevswref == NULL) {
    587 		newcdevswref = kmem_zalloc(MAXDEVSW * sizeof(newcdevswref[0]),
    588 		    KM_NOSLEEP);
    589 		if (newcdevswref == NULL)
    590 			return SET_ERROR(ENOMEM);
    591 		atomic_store_release(&cdevswref, newcdevswref);
    592 	}
    593 
    594 	if (*devmajor >= max_cdevsws) {
    595 		KASSERT(cdevsw == cdevsw0);
    596 		newcdevsw = kmem_zalloc(MAXDEVSW * sizeof(newcdevsw[0]),
    597 		    KM_NOSLEEP);
    598 		if (newcdevsw == NULL)
    599 			return SET_ERROR(ENOMEM);
    600 		memcpy(newcdevsw, cdevsw, max_cdevsws * sizeof(cdevsw[0]));
    601 		atomic_store_release(&cdevsw, newcdevsw);
    602 		atomic_store_release(&max_cdevsws, MAXDEVSW);
    603 	}
    604 
    605 	if (cdevsw[*devmajor] != NULL)
    606 		return SET_ERROR(EEXIST);
    607 
    608 	KASSERT(cdevswref[*devmajor].dr_lc == NULL);
    609 	lc = kmem_zalloc(sizeof(*lc), KM_SLEEP);
    610 	localcount_init(lc);
    611 	cdevswref[*devmajor].dr_lc = lc;
    612 
    613 	atomic_store_release(&cdevsw[*devmajor], devsw);
    614 
    615 	return 0;
    616 }
    617 
    618 static void
    619 devsw_detach_locked(const struct bdevsw *bdev, const struct cdevsw *cdev)
    620 {
    621 	int bi = -1, ci = -1/*XXXGCC*/, di;
    622 	struct cfdriver *cd;
    623 	device_t dv;
    624 
    625 	KASSERT(mutex_owned(&device_lock));
    626 
    627 	/*
    628 	 * If this is wired to an autoconf device, make sure the device
    629 	 * has no more instances.  No locking here because under
    630 	 * correct use of devsw_detach, none of this state can change
    631 	 * at this point.
    632 	 */
    633 	if (cdev != NULL && (cd = cdev->d_cfdriver) != NULL) {
    634 		for (di = 0; di < cd->cd_ndevs; di++) {
    635 			KASSERTMSG((dv = cd->cd_devs[di]) == NULL,
    636 			    "detaching character device driver %s"
    637 			    " still has attached unit %s",
    638 			    cd->cd_name, device_xname(dv));
    639 		}
    640 	}
    641 	if (bdev != NULL && (cd = bdev->d_cfdriver) != NULL) {
    642 		for (di = 0; di < cd->cd_ndevs; di++) {
    643 			KASSERTMSG((dv = cd->cd_devs[di]) == NULL,
    644 			    "detaching block device driver %s"
    645 			    " still has attached unit %s",
    646 			    cd->cd_name, device_xname(dv));
    647 		}
    648 	}
    649 
    650 	/* Prevent new references.  */
    651 	if (bdev != NULL) {
    652 		for (bi = 0; bi < max_bdevsws; bi++) {
    653 			if (bdevsw[bi] != bdev)
    654 				continue;
    655 			atomic_store_relaxed(&bdevsw[bi], NULL);
    656 			break;
    657 		}
    658 		KASSERT(bi < max_bdevsws);
    659 	}
    660 	if (cdev != NULL) {
    661 		for (ci = 0; ci < max_cdevsws; ci++) {
    662 			if (cdevsw[ci] != cdev)
    663 				continue;
    664 			atomic_store_relaxed(&cdevsw[ci], NULL);
    665 			break;
    666 		}
    667 		KASSERT(ci < max_cdevsws);
    668 	}
    669 
    670 	if (bdev == NULL && cdev == NULL) /* XXX possible? */
    671 		return;
    672 
    673 	/*
    674 	 * Wait for all bdevsw_lookup_acquire, cdevsw_lookup_acquire
    675 	 * calls to notice that the devsw is gone.
    676 	 *
    677 	 * XXX Despite the use of the pserialize_read_enter/exit API
    678 	 * elsewhere in this file, we use xc_barrier here instead of
    679 	 * pserialize_perform -- because devsw_init is too early for
    680 	 * pserialize_create.  Either pserialize_create should be made
    681 	 * to work earlier, or it should be nixed altogether.  Until
    682 	 * that is fixed, xc_barrier will serve the same purpose.
    683 	 */
    684 	xc_barrier(0);
    685 
    686 	/*
    687 	 * Wait for all references to drain.  It is the caller's
    688 	 * responsibility to ensure that at this point, there are no
    689 	 * extant open instances and all new d_open calls will fail.
    690 	 *
    691 	 * Note that localcount_drain may release and reacquire
    692 	 * device_lock.
    693 	 */
    694 	if (bdev != NULL) {
    695 		localcount_drain(bdevswref[bi].dr_lc,
    696 		    &devsw_cv, &device_lock);
    697 		localcount_fini(bdevswref[bi].dr_lc);
    698 		kmem_free(bdevswref[bi].dr_lc, sizeof(*bdevswref[bi].dr_lc));
    699 		bdevswref[bi].dr_lc = NULL;
    700 	}
    701 	if (cdev != NULL) {
    702 		localcount_drain(cdevswref[ci].dr_lc,
    703 		    &devsw_cv, &device_lock);
    704 		localcount_fini(cdevswref[ci].dr_lc);
    705 		kmem_free(cdevswref[ci].dr_lc, sizeof(*cdevswref[ci].dr_lc));
    706 		cdevswref[ci].dr_lc = NULL;
    707 	}
    708 }
    709 
    710 void
    711 devsw_detach(const struct bdevsw *bdev, const struct cdevsw *cdev)
    712 {
    713 
    714 	mutex_enter(&device_lock);
    715 	devsw_detach_locked(bdev, cdev);
    716 	mutex_exit(&device_lock);
    717 }
    718 
    719 /*
    720  * Look up a block device by number.
    721  *
    722  * => Caller must ensure that the device is attached.
    723  */
    724 const struct bdevsw *
    725 bdevsw_lookup(dev_t dev)
    726 {
    727 	devmajor_t bmajor;
    728 
    729 	if (dev == NODEV)
    730 		return NULL;
    731 	bmajor = major(dev);
    732 	if (bmajor < 0 || bmajor >= atomic_load_relaxed(&max_bdevsws))
    733 		return NULL;
    734 
    735 	return atomic_load_consume(&bdevsw)[bmajor];
    736 }
    737 
    738 static const struct bdevsw *
    739 bdevsw_lookup_acquire(dev_t dev, struct localcount **lcp)
    740 {
    741 	devmajor_t bmajor;
    742 	const struct bdevsw *bdev = NULL, *const *curbdevsw;
    743 	struct devswref *curbdevswref;
    744 	int s;
    745 
    746 	if (dev == NODEV)
    747 		return NULL;
    748 	bmajor = major(dev);
    749 	if (bmajor < 0)
    750 		return NULL;
    751 
    752 	s = pserialize_read_enter();
    753 
    754 	/*
    755 	 * max_bdevsws never goes down, so it is safe to rely on this
    756 	 * condition without any locking for the array access below.
    757 	 * Test sys_bdevsws first so we can avoid the memory barrier in
    758 	 * that case.
    759 	 */
    760 	if (bmajor >= sys_bdevsws &&
    761 	    bmajor >= atomic_load_acquire(&max_bdevsws))
    762 		goto out;
    763 	curbdevsw = atomic_load_consume(&bdevsw);
    764 	if ((bdev = atomic_load_consume(&curbdevsw[bmajor])) == NULL)
    765 		goto out;
    766 
    767 	curbdevswref = atomic_load_consume(&bdevswref);
    768 	if (curbdevswref == NULL) {
    769 		*lcp = NULL;
    770 	} else if ((*lcp = curbdevswref[bmajor].dr_lc) != NULL) {
    771 		localcount_acquire(*lcp);
    772 	}
    773 out:
    774 	pserialize_read_exit(s);
    775 	return bdev;
    776 }
    777 
    778 static void
    779 bdevsw_release(const struct bdevsw *bdev, struct localcount *lc)
    780 {
    781 
    782 	if (lc == NULL)
    783 		return;
    784 	localcount_release(lc, &devsw_cv, &device_lock);
    785 }
    786 
    787 /*
    788  * Look up a character device by number.
    789  *
    790  * => Caller must ensure that the device is attached.
    791  */
    792 const struct cdevsw *
    793 cdevsw_lookup(dev_t dev)
    794 {
    795 	devmajor_t cmajor;
    796 
    797 	if (dev == NODEV)
    798 		return NULL;
    799 	cmajor = major(dev);
    800 	if (cmajor < 0 || cmajor >= atomic_load_relaxed(&max_cdevsws))
    801 		return NULL;
    802 
    803 	return atomic_load_consume(&cdevsw)[cmajor];
    804 }
    805 
    806 static const struct cdevsw *
    807 cdevsw_lookup_acquire(dev_t dev, struct localcount **lcp)
    808 {
    809 	devmajor_t cmajor;
    810 	const struct cdevsw *cdev = NULL, *const *curcdevsw;
    811 	struct devswref *curcdevswref;
    812 	int s;
    813 
    814 	if (dev == NODEV)
    815 		return NULL;
    816 	cmajor = major(dev);
    817 	if (cmajor < 0)
    818 		return NULL;
    819 
    820 	s = pserialize_read_enter();
    821 
    822 	/*
    823 	 * max_cdevsws never goes down, so it is safe to rely on this
    824 	 * condition without any locking for the array access below.
    825 	 * Test sys_cdevsws first so we can avoid the memory barrier in
    826 	 * that case.
    827 	 */
    828 	if (cmajor >= sys_cdevsws &&
    829 	    cmajor >= atomic_load_acquire(&max_cdevsws))
    830 		goto out;
    831 	curcdevsw = atomic_load_consume(&cdevsw);
    832 	if ((cdev = atomic_load_consume(&curcdevsw[cmajor])) == NULL)
    833 		goto out;
    834 
    835 	curcdevswref = atomic_load_consume(&cdevswref);
    836 	if (curcdevswref == NULL) {
    837 		*lcp = NULL;
    838 	} else if ((*lcp = curcdevswref[cmajor].dr_lc) != NULL) {
    839 		localcount_acquire(*lcp);
    840 	}
    841 out:
    842 	pserialize_read_exit(s);
    843 	return cdev;
    844 }
    845 
    846 static void
    847 cdevsw_release(const struct cdevsw *cdev, struct localcount *lc)
    848 {
    849 
    850 	if (lc == NULL)
    851 		return;
    852 	localcount_release(lc, &devsw_cv, &device_lock);
    853 }
    854 
    855 /*
    856  * Look up a block device by reference to its operations set.
    857  *
    858  * => Caller must ensure that the device is not detached, and therefore
    859  *    that the returned major is still valid when dereferenced.
    860  */
    861 devmajor_t
    862 bdevsw_lookup_major(const struct bdevsw *bdev)
    863 {
    864 	const struct bdevsw *const *curbdevsw;
    865 	devmajor_t bmajor, bmax;
    866 
    867 	bmax = atomic_load_acquire(&max_bdevsws);
    868 	curbdevsw = atomic_load_consume(&bdevsw);
    869 	for (bmajor = 0; bmajor < bmax; bmajor++) {
    870 		if (atomic_load_relaxed(&curbdevsw[bmajor]) == bdev)
    871 			return bmajor;
    872 	}
    873 
    874 	return NODEVMAJOR;
    875 }
    876 
    877 /*
    878  * Look up a character device by reference to its operations set.
    879  *
    880  * => Caller must ensure that the device is not detached, and therefore
    881  *    that the returned major is still valid when dereferenced.
    882  */
    883 devmajor_t
    884 cdevsw_lookup_major(const struct cdevsw *cdev)
    885 {
    886 	const struct cdevsw *const *curcdevsw;
    887 	devmajor_t cmajor, cmax;
    888 
    889 	cmax = atomic_load_acquire(&max_cdevsws);
    890 	curcdevsw = atomic_load_consume(&cdevsw);
    891 	for (cmajor = 0; cmajor < cmax; cmajor++) {
    892 		if (atomic_load_relaxed(&curcdevsw[cmajor]) == cdev)
    893 			return cmajor;
    894 	}
    895 
    896 	return NODEVMAJOR;
    897 }
    898 
    899 /*
    900  * Convert from block major number to name.
    901  *
    902  * => Caller must ensure that the device is not detached, and therefore
    903  *    that the name pointer is still valid when dereferenced.
    904  */
    905 const char *
    906 devsw_blk2name(devmajor_t bmajor)
    907 {
    908 	const char *name;
    909 	devmajor_t cmajor;
    910 	int i;
    911 
    912 	name = NULL;
    913 	cmajor = -1;
    914 
    915 	mutex_enter(&device_lock);
    916 	if (bmajor < 0 || bmajor >= max_bdevsws || bdevsw[bmajor] == NULL) {
    917 		mutex_exit(&device_lock);
    918 		return NULL;
    919 	}
    920 	for (i = 0; i < max_devsw_convs; i++) {
    921 		if (devsw_conv[i].d_bmajor == bmajor) {
    922 			cmajor = devsw_conv[i].d_cmajor;
    923 			break;
    924 		}
    925 	}
    926 	if (cmajor >= 0 && cmajor < max_cdevsws && cdevsw[cmajor] != NULL)
    927 		name = devsw_conv[i].d_name;
    928 	mutex_exit(&device_lock);
    929 
    930 	return name;
    931 }
    932 
    933 /*
    934  * Convert char major number to device driver name.
    935  */
    936 const char *
    937 cdevsw_getname(devmajor_t major)
    938 {
    939 	const char *name;
    940 	int i;
    941 
    942 	name = NULL;
    943 
    944 	if (major < 0)
    945 		return NULL;
    946 
    947 	mutex_enter(&device_lock);
    948 	for (i = 0; i < max_devsw_convs; i++) {
    949 		if (devsw_conv[i].d_cmajor == major) {
    950 			name = devsw_conv[i].d_name;
    951 			break;
    952 		}
    953 	}
    954 	mutex_exit(&device_lock);
    955 	return name;
    956 }
    957 
    958 /*
    959  * Convert block major number to device driver name.
    960  */
    961 const char *
    962 bdevsw_getname(devmajor_t major)
    963 {
    964 	const char *name;
    965 	int i;
    966 
    967 	name = NULL;
    968 
    969 	if (major < 0)
    970 		return NULL;
    971 
    972 	mutex_enter(&device_lock);
    973 	for (i = 0; i < max_devsw_convs; i++) {
    974 		if (devsw_conv[i].d_bmajor == major) {
    975 			name = devsw_conv[i].d_name;
    976 			break;
    977 		}
    978 	}
    979 	mutex_exit(&device_lock);
    980 	return name;
    981 }
    982 
    983 /*
    984  * Convert from device name to block major number.
    985  *
    986  * => Caller must ensure that the device is not detached, and therefore
    987  *    that the major number is still valid when dereferenced.
    988  */
    989 devmajor_t
    990 devsw_name2blk(const char *name, char *devname, size_t devnamelen)
    991 {
    992 	struct devsw_conv *conv;
    993 	devmajor_t bmajor;
    994 	int i;
    995 
    996 	if (name == NULL)
    997 		return NODEVMAJOR;
    998 
    999 	mutex_enter(&device_lock);
   1000 	for (i = 0; i < max_devsw_convs; i++) {
   1001 		size_t len;
   1002 
   1003 		conv = &devsw_conv[i];
   1004 		if (conv->d_name == NULL)
   1005 			continue;
   1006 		len = strlen(conv->d_name);
   1007 		if (strncmp(conv->d_name, name, len) != 0)
   1008 			continue;
   1009 		if (name[len] != '\0' && !isdigit((unsigned char)name[len]))
   1010 			continue;
   1011 		bmajor = conv->d_bmajor;
   1012 		if (bmajor < 0 || bmajor >= max_bdevsws ||
   1013 		    bdevsw[bmajor] == NULL)
   1014 			break;
   1015 		if (devname != NULL) {
   1016 #ifdef DEVSW_DEBUG
   1017 			if (strlen(conv->d_name) >= devnamelen)
   1018 				printf("%s: too short buffer\n", __func__);
   1019 #endif /* DEVSW_DEBUG */
   1020 			strncpy(devname, conv->d_name, devnamelen);
   1021 			devname[devnamelen - 1] = '\0';
   1022 		}
   1023 		mutex_exit(&device_lock);
   1024 		return bmajor;
   1025 	}
   1026 
   1027 	mutex_exit(&device_lock);
   1028 	return NODEVMAJOR;
   1029 }
   1030 
   1031 /*
   1032  * Convert from device name to char major number.
   1033  *
   1034  * => Caller must ensure that the device is not detached, and therefore
   1035  *    that the major number is still valid when dereferenced.
   1036  */
   1037 devmajor_t
   1038 devsw_name2chr(const char *name, char *devname, size_t devnamelen)
   1039 {
   1040 	struct devsw_conv *conv;
   1041 	devmajor_t cmajor;
   1042 	int i;
   1043 
   1044 	if (name == NULL)
   1045 		return NODEVMAJOR;
   1046 
   1047 	mutex_enter(&device_lock);
   1048 	for (i = 0; i < max_devsw_convs; i++) {
   1049 		size_t len;
   1050 
   1051 		conv = &devsw_conv[i];
   1052 		if (conv->d_name == NULL)
   1053 			continue;
   1054 		len = strlen(conv->d_name);
   1055 		if (strncmp(conv->d_name, name, len) != 0)
   1056 			continue;
   1057 		if (name[len] != '\0' && !isdigit((unsigned char)name[len]))
   1058 			continue;
   1059 		cmajor = conv->d_cmajor;
   1060 		if (cmajor < 0 || cmajor >= max_cdevsws ||
   1061 		    cdevsw[cmajor] == NULL)
   1062 			break;
   1063 		if (devname != NULL) {
   1064 #ifdef DEVSW_DEBUG
   1065 			if (strlen(conv->d_name) >= devnamelen)
   1066 				printf("%s: too short buffer", __func__);
   1067 #endif /* DEVSW_DEBUG */
   1068 			strncpy(devname, conv->d_name, devnamelen);
   1069 			devname[devnamelen - 1] = '\0';
   1070 		}
   1071 		mutex_exit(&device_lock);
   1072 		return cmajor;
   1073 	}
   1074 
   1075 	mutex_exit(&device_lock);
   1076 	return NODEVMAJOR;
   1077 }
   1078 
   1079 /*
   1080  * Convert from character dev_t to block dev_t.
   1081  *
   1082  * => Caller must ensure that the device is not detached, and therefore
   1083  *    that the major number is still valid when dereferenced.
   1084  */
   1085 dev_t
   1086 devsw_chr2blk(dev_t cdev)
   1087 {
   1088 	devmajor_t bmajor, cmajor;
   1089 	int i;
   1090 	dev_t rv;
   1091 
   1092 	cmajor = major(cdev);
   1093 	bmajor = NODEVMAJOR;
   1094 	rv = NODEV;
   1095 
   1096 	mutex_enter(&device_lock);
   1097 	if (cmajor < 0 || cmajor >= max_cdevsws || cdevsw[cmajor] == NULL) {
   1098 		mutex_exit(&device_lock);
   1099 		return NODEV;
   1100 	}
   1101 	for (i = 0; i < max_devsw_convs; i++) {
   1102 		if (devsw_conv[i].d_cmajor == cmajor) {
   1103 			bmajor = devsw_conv[i].d_bmajor;
   1104 			break;
   1105 		}
   1106 	}
   1107 	if (bmajor >= 0 && bmajor < max_bdevsws && bdevsw[bmajor] != NULL)
   1108 		rv = makedev(bmajor, minor(cdev));
   1109 	mutex_exit(&device_lock);
   1110 
   1111 	return rv;
   1112 }
   1113 
   1114 /*
   1115  * Convert from block dev_t to character dev_t.
   1116  *
   1117  * => Caller must ensure that the device is not detached, and therefore
   1118  *    that the major number is still valid when dereferenced.
   1119  */
   1120 dev_t
   1121 devsw_blk2chr(dev_t bdev)
   1122 {
   1123 	devmajor_t bmajor, cmajor;
   1124 	int i;
   1125 	dev_t rv;
   1126 
   1127 	bmajor = major(bdev);
   1128 	cmajor = NODEVMAJOR;
   1129 	rv = NODEV;
   1130 
   1131 	mutex_enter(&device_lock);
   1132 	if (bmajor < 0 || bmajor >= max_bdevsws || bdevsw[bmajor] == NULL) {
   1133 		mutex_exit(&device_lock);
   1134 		return NODEV;
   1135 	}
   1136 	for (i = 0; i < max_devsw_convs; i++) {
   1137 		if (devsw_conv[i].d_bmajor == bmajor) {
   1138 			cmajor = devsw_conv[i].d_cmajor;
   1139 			break;
   1140 		}
   1141 	}
   1142 	if (cmajor >= 0 && cmajor < max_cdevsws && cdevsw[cmajor] != NULL)
   1143 		rv = makedev(cmajor, minor(bdev));
   1144 	mutex_exit(&device_lock);
   1145 
   1146 	return rv;
   1147 }
   1148 
   1149 /*
   1150  * Device access methods.
   1151  */
   1152 
   1153 #define	DEV_LOCK(d)						\
   1154 	if ((mpflag = (d->d_flag & D_MPSAFE)) == 0) {		\
   1155 		KERNEL_LOCK(1, NULL);				\
   1156 	}
   1157 
   1158 #define	DEV_UNLOCK(d)						\
   1159 	if (mpflag == 0) {					\
   1160 		KERNEL_UNLOCK_ONE(NULL);			\
   1161 	}
   1162 
   1163 int
   1164 bdev_open(dev_t dev, int flag, int devtype, lwp_t *l)
   1165 {
   1166 	const struct bdevsw *d;
   1167 	struct localcount *lc;
   1168 	device_t dv = NULL/*XXXGCC*/;
   1169 	int unit = -1/*XXXGCC*/, rv, mpflag;
   1170 
   1171 	d = bdevsw_lookup_acquire(dev, &lc);
   1172 	if (d == NULL)
   1173 		return SET_ERROR(ENXIO);
   1174 
   1175 	if (d->d_devtounit) {
   1176 		/*
   1177 		 * If the device node corresponds to an autoconf device
   1178 		 * instance, acquire a reference to it so that during
   1179 		 * d_open, device_lookup is stable.
   1180 		 *
   1181 		 * XXX This should also arrange to instantiate cloning
   1182 		 * pseudo-devices if appropriate, but that requires
   1183 		 * reviewing them all to find and verify a common
   1184 		 * pattern.
   1185 		 */
   1186 		if ((unit = (*d->d_devtounit)(dev)) == -1) {
   1187 			rv = SET_ERROR(ENXIO);
   1188 			goto out;
   1189 		}
   1190 		if ((dv = device_lookup_acquire(d->d_cfdriver, unit)) ==
   1191 		    NULL) {
   1192 			rv = SET_ERROR(ENXIO);
   1193 			goto out;
   1194 		}
   1195 		SDT_PROBE6(sdt, bdev, open, acquire,
   1196 		    d, dev, flag, devtype, unit, dv);
   1197 	}
   1198 
   1199 	DEV_LOCK(d);
   1200 	SDT_PROBE4(sdt, bdev, open, entry,  d, dev, flag, devtype);
   1201 	rv = (*d->d_open)(dev, flag, devtype, l);
   1202 	SDT_PROBE5(sdt, bdev, open, return,  d, dev, flag, devtype, rv);
   1203 	DEV_UNLOCK(d);
   1204 
   1205 	if (d->d_devtounit) {
   1206 		SDT_PROBE6(sdt, bdev, open, release,
   1207 		    d, dev, flag, devtype, unit, dv);
   1208 		device_release(dv);
   1209 	}
   1210 
   1211 out:	bdevsw_release(d, lc);
   1212 
   1213 	return rv;
   1214 }
   1215 
   1216 int
   1217 bdev_cancel(dev_t dev, int flag, int devtype, struct lwp *l)
   1218 {
   1219 	const struct bdevsw *d;
   1220 	int rv, mpflag;
   1221 
   1222 	if ((d = bdevsw_lookup(dev)) == NULL)
   1223 		return SET_ERROR(ENXIO);
   1224 	if (d->d_cancel == NULL)
   1225 		return SET_ERROR(ENODEV);
   1226 
   1227 	DEV_LOCK(d);
   1228 	SDT_PROBE4(sdt, bdev, cancel, entry,  d, dev, flag, devtype);
   1229 	rv = (*d->d_cancel)(dev, flag, devtype, l);
   1230 	SDT_PROBE5(sdt, bdev, cancel, return,  d, dev, flag, devtype, rv);
   1231 	DEV_UNLOCK(d);
   1232 
   1233 	return rv;
   1234 }
   1235 
   1236 int
   1237 bdev_close(dev_t dev, int flag, int devtype, lwp_t *l)
   1238 {
   1239 	const struct bdevsw *d;
   1240 	int rv, mpflag;
   1241 
   1242 	if ((d = bdevsw_lookup(dev)) == NULL)
   1243 		return SET_ERROR(ENXIO);
   1244 
   1245 	DEV_LOCK(d);
   1246 	SDT_PROBE4(sdt, bdev, close, entry,  d, dev, flag, devtype);
   1247 	rv = (*d->d_close)(dev, flag, devtype, l);
   1248 	SDT_PROBE5(sdt, bdev, close, return,  d, dev, flag, devtype, rv);
   1249 	DEV_UNLOCK(d);
   1250 
   1251 	return rv;
   1252 }
   1253 
   1254 SDT_PROVIDER_DECLARE(io);
   1255 SDT_PROBE_DEFINE1(io, kernel, , start, "struct buf *"/*bp*/);
   1256 
   1257 void
   1258 bdev_strategy(struct buf *bp)
   1259 {
   1260 	const struct bdevsw *d;
   1261 	int mpflag;
   1262 
   1263 	SDT_PROBE1(io, kernel, , start, bp);
   1264 
   1265 	if ((d = bdevsw_lookup(bp->b_dev)) == NULL) {
   1266 		bp->b_error = SET_ERROR(ENXIO);
   1267 		bp->b_resid = bp->b_bcount;
   1268 		biodone_vfs(bp); /* biodone() iff vfs present */
   1269 		return;
   1270 	}
   1271 
   1272 	DEV_LOCK(d);
   1273 	SDT_PROBE3(sdt, bdev, strategy, entry,  d, bp->b_dev, bp);
   1274 	(*d->d_strategy)(bp);
   1275 	SDT_PROBE3(sdt, bdev, strategy, return,  d, bp->b_dev, bp);
   1276 	DEV_UNLOCK(d);
   1277 }
   1278 
   1279 int
   1280 bdev_ioctl(dev_t dev, u_long cmd, void *data, int flag, lwp_t *l)
   1281 {
   1282 	const struct bdevsw *d;
   1283 	int rv, mpflag;
   1284 
   1285 	if ((d = bdevsw_lookup(dev)) == NULL)
   1286 		return SET_ERROR(ENXIO);
   1287 
   1288 	DEV_LOCK(d);
   1289 	SDT_PROBE5(sdt, bdev, ioctl, entry,  d, dev, cmd, data, flag);
   1290 	rv = (*d->d_ioctl)(dev, cmd, data, flag, l);
   1291 	SDT_PROBE6(sdt, bdev, ioctl, return,  d, dev, cmd, data, flag, rv);
   1292 	DEV_UNLOCK(d);
   1293 
   1294 	return rv;
   1295 }
   1296 
   1297 int
   1298 bdev_dump(dev_t dev, daddr_t addr, void *data, size_t sz)
   1299 {
   1300 	const struct bdevsw *d;
   1301 	int rv;
   1302 
   1303 	/*
   1304 	 * Dump can be called without the device open.  Since it can
   1305 	 * currently only be called with the system paused (and in a
   1306 	 * potentially unstable state), we don't perform any locking.
   1307 	 */
   1308 	if ((d = bdevsw_lookup(dev)) == NULL)
   1309 		return SET_ERROR(ENXIO);
   1310 
   1311 	/* DEV_LOCK(d); */
   1312 	rv = (*d->d_dump)(dev, addr, data, sz);
   1313 	/* DEV_UNLOCK(d); */
   1314 
   1315 	return rv;
   1316 }
   1317 
   1318 int
   1319 bdev_flags(dev_t dev)
   1320 {
   1321 	const struct bdevsw *d;
   1322 
   1323 	if ((d = bdevsw_lookup(dev)) == NULL)
   1324 		return 0;
   1325 	return d->d_flag & ~D_TYPEMASK;
   1326 }
   1327 
   1328 int
   1329 bdev_type(dev_t dev)
   1330 {
   1331 	const struct bdevsw *d;
   1332 
   1333 	if ((d = bdevsw_lookup(dev)) == NULL)
   1334 		return D_OTHER;
   1335 	return d->d_flag & D_TYPEMASK;
   1336 }
   1337 
   1338 int
   1339 bdev_size(dev_t dev)
   1340 {
   1341 	const struct bdevsw *d;
   1342 	int rv, mpflag = 0;
   1343 
   1344 	if ((d = bdevsw_lookup(dev)) == NULL ||
   1345 	    d->d_psize == NULL)
   1346 		return -1;
   1347 
   1348 	/*
   1349 	 * Don't to try lock the device if we're dumping.
   1350 	 * XXX: is there a better way to test this?
   1351 	 */
   1352 	if ((boothowto & RB_DUMP) == 0)
   1353 		DEV_LOCK(d);
   1354 	SDT_PROBE2(sdt, bdev, psize, entry,  d, dev);
   1355 	rv = (*d->d_psize)(dev);
   1356 	SDT_PROBE3(sdt, bdev, psize, return,  d, dev, rv);
   1357 	if ((boothowto & RB_DUMP) == 0)
   1358 		DEV_UNLOCK(d);
   1359 
   1360 	return rv;
   1361 }
   1362 
   1363 int
   1364 bdev_discard(dev_t dev, off_t pos, off_t len)
   1365 {
   1366 	const struct bdevsw *d;
   1367 	int rv, mpflag;
   1368 
   1369 	if ((d = bdevsw_lookup(dev)) == NULL)
   1370 		return SET_ERROR(ENXIO);
   1371 
   1372 	DEV_LOCK(d);
   1373 	SDT_PROBE4(sdt, bdev, discard, entry,  d, dev, pos, len);
   1374 	rv = (*d->d_discard)(dev, pos, len);
   1375 	SDT_PROBE5(sdt, bdev, discard, return,  d, dev, pos, len, rv);
   1376 	DEV_UNLOCK(d);
   1377 
   1378 	return rv;
   1379 }
   1380 
   1381 void
   1382 bdev_detached(dev_t dev)
   1383 {
   1384 	const struct bdevsw *d;
   1385 	device_t dv;
   1386 	int unit;
   1387 
   1388 	if ((d = bdevsw_lookup(dev)) == NULL)
   1389 		return;
   1390 	if (d->d_devtounit == NULL)
   1391 		return;
   1392 	if ((unit = (*d->d_devtounit)(dev)) == -1)
   1393 		return;
   1394 	if ((dv = device_lookup(d->d_cfdriver, unit)) == NULL)
   1395 		return;
   1396 	config_detach_commit(dv);
   1397 }
   1398 
   1399 int
   1400 cdev_open(dev_t dev, int flag, int devtype, lwp_t *l)
   1401 {
   1402 	const struct cdevsw *d;
   1403 	struct localcount *lc;
   1404 	device_t dv = NULL/*XXXGCC*/;
   1405 	int unit = -1/*XXXGCC*/, rv, mpflag;
   1406 
   1407 	d = cdevsw_lookup_acquire(dev, &lc);
   1408 	if (d == NULL)
   1409 		return SET_ERROR(ENXIO);
   1410 
   1411 	if (d->d_devtounit) {
   1412 		/*
   1413 		 * If the device node corresponds to an autoconf device
   1414 		 * instance, acquire a reference to it so that during
   1415 		 * d_open, device_lookup is stable.
   1416 		 *
   1417 		 * XXX This should also arrange to instantiate cloning
   1418 		 * pseudo-devices if appropriate, but that requires
   1419 		 * reviewing them all to find and verify a common
   1420 		 * pattern.
   1421 		 */
   1422 		if ((unit = (*d->d_devtounit)(dev)) == -1) {
   1423 			rv = SET_ERROR(ENXIO);
   1424 			goto out;
   1425 		}
   1426 		if ((dv = device_lookup_acquire(d->d_cfdriver, unit)) ==
   1427 		    NULL) {
   1428 			rv = SET_ERROR(ENXIO);
   1429 			goto out;
   1430 		}
   1431 		SDT_PROBE6(sdt, cdev, open, acquire,
   1432 		    d, dev, flag, devtype, unit, dv);
   1433 	}
   1434 
   1435 	DEV_LOCK(d);
   1436 	SDT_PROBE4(sdt, cdev, open, entry,  d, dev, flag, devtype);
   1437 	rv = (*d->d_open)(dev, flag, devtype, l);
   1438 	SDT_PROBE5(sdt, cdev, open, return,  d, dev, flag, devtype, rv);
   1439 	DEV_UNLOCK(d);
   1440 
   1441 	if (d->d_devtounit) {
   1442 		SDT_PROBE6(sdt, cdev, open, release,
   1443 		    d, dev, flag, devtype, unit, dv);
   1444 		device_release(dv);
   1445 	}
   1446 
   1447 out:	cdevsw_release(d, lc);
   1448 
   1449 	return rv;
   1450 }
   1451 
   1452 int
   1453 cdev_cancel(dev_t dev, int flag, int devtype, struct lwp *l)
   1454 {
   1455 	const struct cdevsw *d;
   1456 	int rv, mpflag;
   1457 
   1458 	if ((d = cdevsw_lookup(dev)) == NULL)
   1459 		return SET_ERROR(ENXIO);
   1460 	if (d->d_cancel == NULL)
   1461 		return SET_ERROR(ENODEV);
   1462 
   1463 	DEV_LOCK(d);
   1464 	SDT_PROBE4(sdt, cdev, cancel, entry,  d, dev, flag, devtype);
   1465 	rv = (*d->d_cancel)(dev, flag, devtype, l);
   1466 	SDT_PROBE5(sdt, cdev, cancel, return,  d, dev, flag, devtype, rv);
   1467 	DEV_UNLOCK(d);
   1468 
   1469 	return rv;
   1470 }
   1471 
   1472 int
   1473 cdev_close(dev_t dev, int flag, int devtype, lwp_t *l)
   1474 {
   1475 	const struct cdevsw *d;
   1476 	int rv, mpflag;
   1477 
   1478 	if ((d = cdevsw_lookup(dev)) == NULL)
   1479 		return SET_ERROR(ENXIO);
   1480 
   1481 	DEV_LOCK(d);
   1482 	SDT_PROBE4(sdt, cdev, close, entry,  d, dev, flag, devtype);
   1483 	rv = (*d->d_close)(dev, flag, devtype, l);
   1484 	SDT_PROBE5(sdt, cdev, close, return,  d, dev, flag, devtype, rv);
   1485 	DEV_UNLOCK(d);
   1486 
   1487 	return rv;
   1488 }
   1489 
   1490 int
   1491 cdev_read(dev_t dev, struct uio *uio, int flag)
   1492 {
   1493 	const struct cdevsw *d;
   1494 	int rv, mpflag;
   1495 
   1496 	if ((d = cdevsw_lookup(dev)) == NULL)
   1497 		return SET_ERROR(ENXIO);
   1498 
   1499 	DEV_LOCK(d);
   1500 	SDT_PROBE4(sdt, cdev, read, entry,  d, dev, uio, flag);
   1501 	rv = (*d->d_read)(dev, uio, flag);
   1502 	SDT_PROBE5(sdt, cdev, read, return,  d, dev, uio, flag, rv);
   1503 	DEV_UNLOCK(d);
   1504 
   1505 	return rv;
   1506 }
   1507 
   1508 int
   1509 cdev_write(dev_t dev, struct uio *uio, int flag)
   1510 {
   1511 	const struct cdevsw *d;
   1512 	int rv, mpflag;
   1513 
   1514 	if ((d = cdevsw_lookup(dev)) == NULL)
   1515 		return SET_ERROR(ENXIO);
   1516 
   1517 	DEV_LOCK(d);
   1518 	SDT_PROBE4(sdt, cdev, write, entry,  d, dev, uio, flag);
   1519 	rv = (*d->d_write)(dev, uio, flag);
   1520 	SDT_PROBE5(sdt, cdev, write, return,  d, dev, uio, flag, rv);
   1521 	DEV_UNLOCK(d);
   1522 
   1523 	return rv;
   1524 }
   1525 
   1526 int
   1527 cdev_ioctl(dev_t dev, u_long cmd, void *data, int flag, lwp_t *l)
   1528 {
   1529 	const struct cdevsw *d;
   1530 	int rv, mpflag;
   1531 
   1532 	if ((d = cdevsw_lookup(dev)) == NULL)
   1533 		return SET_ERROR(ENXIO);
   1534 
   1535 	DEV_LOCK(d);
   1536 	SDT_PROBE5(sdt, cdev, ioctl, entry,  d, dev, cmd, data, flag);
   1537 	rv = (*d->d_ioctl)(dev, cmd, data, flag, l);
   1538 	SDT_PROBE6(sdt, cdev, ioctl, return,  d, dev, cmd, data, flag, rv);
   1539 	DEV_UNLOCK(d);
   1540 
   1541 	return rv;
   1542 }
   1543 
   1544 void
   1545 cdev_stop(struct tty *tp, int flag)
   1546 {
   1547 	const struct cdevsw *d;
   1548 	int mpflag;
   1549 
   1550 	if ((d = cdevsw_lookup(tp->t_dev)) == NULL)
   1551 		return;
   1552 
   1553 	DEV_LOCK(d);
   1554 	SDT_PROBE4(sdt, cdev, stop, entry,  d, tp->t_dev, tp, flag);
   1555 	(*d->d_stop)(tp, flag);
   1556 	SDT_PROBE4(sdt, cdev, stop, return,  d, tp->t_dev, tp, flag);
   1557 	DEV_UNLOCK(d);
   1558 }
   1559 
   1560 struct tty *
   1561 cdev_tty(dev_t dev)
   1562 {
   1563 	const struct cdevsw *d;
   1564 
   1565 	if ((d = cdevsw_lookup(dev)) == NULL)
   1566 		return NULL;
   1567 
   1568 	/* XXX Check if necessary. */
   1569 	if (d->d_tty == NULL)
   1570 		return NULL;
   1571 
   1572 	return (*d->d_tty)(dev);
   1573 }
   1574 
   1575 int
   1576 cdev_poll(dev_t dev, int flag, lwp_t *l)
   1577 {
   1578 	const struct cdevsw *d;
   1579 	int rv, mpflag;
   1580 
   1581 	if ((d = cdevsw_lookup(dev)) == NULL)
   1582 		return POLLERR;
   1583 
   1584 	DEV_LOCK(d);
   1585 	SDT_PROBE3(sdt, cdev, poll, entry,  d, dev, flag);
   1586 	rv = (*d->d_poll)(dev, flag, l);
   1587 	SDT_PROBE4(sdt, cdev, poll, return,  d, dev, flag, rv);
   1588 	DEV_UNLOCK(d);
   1589 
   1590 	return rv;
   1591 }
   1592 
   1593 paddr_t
   1594 cdev_mmap(dev_t dev, off_t off, int flag)
   1595 {
   1596 	const struct cdevsw *d;
   1597 	paddr_t rv;
   1598 	int mpflag;
   1599 
   1600 	if ((d = cdevsw_lookup(dev)) == NULL)
   1601 		return (paddr_t)-1LL;
   1602 
   1603 	DEV_LOCK(d);
   1604 	SDT_PROBE4(sdt, cdev, mmap, entry,  d, dev, off, flag);
   1605 	rv = (*d->d_mmap)(dev, off, flag);
   1606 	SDT_PROBE5(sdt, cdev, mmap, return,  d, dev, off, flag, rv);
   1607 	DEV_UNLOCK(d);
   1608 
   1609 	return rv;
   1610 }
   1611 
   1612 int
   1613 cdev_kqfilter(dev_t dev, struct knote *kn)
   1614 {
   1615 	const struct cdevsw *d;
   1616 	int rv, mpflag;
   1617 
   1618 	if ((d = cdevsw_lookup(dev)) == NULL)
   1619 		return SET_ERROR(ENXIO);
   1620 
   1621 	DEV_LOCK(d);
   1622 	SDT_PROBE3(sdt, cdev, kqfilter, entry,  d, dev, kn);
   1623 	rv = (*d->d_kqfilter)(dev, kn);
   1624 	SDT_PROBE4(sdt, cdev, kqfilter, return,  d, dev, kn, rv);
   1625 	DEV_UNLOCK(d);
   1626 
   1627 	return rv;
   1628 }
   1629 
   1630 int
   1631 cdev_discard(dev_t dev, off_t pos, off_t len)
   1632 {
   1633 	const struct cdevsw *d;
   1634 	int rv, mpflag;
   1635 
   1636 	if ((d = cdevsw_lookup(dev)) == NULL)
   1637 		return SET_ERROR(ENXIO);
   1638 
   1639 	DEV_LOCK(d);
   1640 	SDT_PROBE4(sdt, cdev, discard, entry,  d, dev, pos, len);
   1641 	rv = (*d->d_discard)(dev, pos, len);
   1642 	SDT_PROBE5(sdt, cdev, discard, return,  d, dev, pos, len, rv);
   1643 	DEV_UNLOCK(d);
   1644 
   1645 	return rv;
   1646 }
   1647 
   1648 int
   1649 cdev_flags(dev_t dev)
   1650 {
   1651 	const struct cdevsw *d;
   1652 
   1653 	if ((d = cdevsw_lookup(dev)) == NULL)
   1654 		return 0;
   1655 	return d->d_flag & ~D_TYPEMASK;
   1656 }
   1657 
   1658 int
   1659 cdev_type(dev_t dev)
   1660 {
   1661 	const struct cdevsw *d;
   1662 
   1663 	if ((d = cdevsw_lookup(dev)) == NULL)
   1664 		return D_OTHER;
   1665 	return d->d_flag & D_TYPEMASK;
   1666 }
   1667 
   1668 void
   1669 cdev_detached(dev_t dev)
   1670 {
   1671 	const struct cdevsw *d;
   1672 	device_t dv;
   1673 	int unit;
   1674 
   1675 	if ((d = cdevsw_lookup(dev)) == NULL)
   1676 		return;
   1677 	if (d->d_devtounit == NULL)
   1678 		return;
   1679 	if ((unit = (*d->d_devtounit)(dev)) == -1)
   1680 		return;
   1681 	if ((dv = device_lookup(d->d_cfdriver, unit)) == NULL)
   1682 		return;
   1683 	config_detach_commit(dv);
   1684 }
   1685 
   1686 /*
   1687  * nommap(dev, off, prot)
   1688  *
   1689  *	mmap routine that always fails, for non-mmappable devices.
   1690  */
   1691 paddr_t
   1692 nommap(dev_t dev, off_t off, int prot)
   1693 {
   1694 
   1695 	return (paddr_t)-1;
   1696 }
   1697 
   1698 /*
   1699  * dev_minor_unit(dev)
   1700  *
   1701  *	Returns minor(dev) as an int.  Intended for use with struct
   1702  *	bdevsw, cdevsw::d_devtounit for drivers whose /dev nodes are
   1703  *	implemented by reference to an autoconf instance with the minor
   1704  *	number.
   1705  */
   1706 int
   1707 dev_minor_unit(dev_t dev)
   1708 {
   1709 
   1710 	return minor(dev);
   1711 }
   1712