Home | History | Annotate | Line # | Download | only in pci
ld_virtio.c revision 1.37
      1 /*	$NetBSD: ld_virtio.c,v 1.37 2025/02/20 18:34:00 jakllsch Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 2010 Minoura Makoto.
      5  * All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  *
     16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     19  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
     20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
     21  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     22  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     23  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
     25  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     26  */
     27 
     28 #include <sys/cdefs.h>
     29 __KERNEL_RCSID(0, "$NetBSD: ld_virtio.c,v 1.37 2025/02/20 18:34:00 jakllsch Exp $");
     30 
     31 #include <sys/param.h>
     32 #include <sys/systm.h>
     33 #include <sys/kernel.h>
     34 #include <sys/buf.h>
     35 #include <sys/bufq.h>
     36 #include <sys/bus.h>
     37 #include <sys/device.h>
     38 #include <sys/disk.h>
     39 #include <sys/mutex.h>
     40 #include <sys/module.h>
     41 #include <sys/kmem.h>
     42 
     43 #include <dev/ldvar.h>
     44 #include <dev/pci/virtioreg.h>
     45 #include <dev/pci/virtiovar.h>
     46 
     47 #include "ioconf.h"
     48 
     49 /*
     50  * ld_virtioreg:
     51  */
     52 /* Configuration registers */
     53 #define VIRTIO_BLK_CONFIG_CAPACITY	0 /* 64bit */
     54 #define VIRTIO_BLK_CONFIG_SIZE_MAX	8 /* 32bit */
     55 #define VIRTIO_BLK_CONFIG_SEG_MAX	12 /* 32bit */
     56 #define VIRTIO_BLK_CONFIG_GEOMETRY_C	16 /* 16bit */
     57 #define VIRTIO_BLK_CONFIG_GEOMETRY_H	18 /* 8bit */
     58 #define VIRTIO_BLK_CONFIG_GEOMETRY_S	19 /* 8bit */
     59 #define VIRTIO_BLK_CONFIG_BLK_SIZE	20 /* 32bit */
     60 #define VIRTIO_BLK_CONFIG_WRITEBACK	32 /* 8bit */
     61 #define VIRTIO_BLK_CONFIG_NUM_QUEUES			34 /* 16bit */
     62 #define VIRTIO_BLK_CONFIG_MAX_DISCARD_SECTORS		36 /* 32bit */
     63 #define VIRTIO_BLK_CONFIG_MAX_DISCARD_SEG		40 /* 32bit */
     64 #define VIRTIO_BLK_CONFIG_DISCARD_SECTOR_ALIGNMENT	44 /* 32bit */
     65 
     66 /* Feature bits */
     67 #define VIRTIO_BLK_F_BARRIER	(1<<0)
     68 #define VIRTIO_BLK_F_SIZE_MAX	(1<<1)
     69 #define VIRTIO_BLK_F_SEG_MAX	(1<<2)
     70 #define VIRTIO_BLK_F_GEOMETRY	(1<<4)
     71 #define VIRTIO_BLK_F_RO		(1<<5)
     72 #define VIRTIO_BLK_F_BLK_SIZE	(1<<6)
     73 #define VIRTIO_BLK_F_SCSI	(1<<7)
     74 #define VIRTIO_BLK_F_FLUSH	(1<<9)
     75 #define VIRTIO_BLK_F_TOPOLOGY	(1<<10)
     76 #define VIRTIO_BLK_F_CONFIG_WCE	(1<<11)
     77 #define VIRTIO_BLK_F_MQ			(1<<12)
     78 #define VIRTIO_BLK_F_DISCARD		(1<<13)
     79 #define VIRTIO_BLK_F_WRITE_ZEROES	(1<<14)
     80 #define VIRTIO_BLK_F_LIFETIME		(1<<15)
     81 #define VIRTIO_BLK_F_SECURE_ERASE	(1<<16)
     82 
     83 /*
     84  * Each block request uses at least two segments - one for the header
     85  * and one for the status.
     86 */
     87 #define	VIRTIO_BLK_CTRL_SEGMENTS	2
     88 
     89 #define VIRTIO_BLK_FLAG_BITS			\
     90 	VIRTIO_COMMON_FLAG_BITS			\
     91 	"b\x10" "SECURE_ERASE\0"		\
     92 	"b\x0f" "LIFETIME\0"			\
     93 	"b\x0e" "WRITE_ZEROES\0"		\
     94 	"b\x0d" "DISCARD\0"			\
     95 	"b\x0c" "MQ\0"				\
     96 	"b\x0b" "CONFIG_WCE\0"			\
     97 	"b\x0a" "TOPOLOGY\0"			\
     98 	"b\x09" "FLUSH\0"			\
     99 	"b\x07" "SCSI\0"			\
    100 	"b\x06" "BLK_SIZE\0"			\
    101 	"b\x05" "RO\0"				\
    102 	"b\x04" "GEOMETRY\0"			\
    103 	"b\x02" "SEG_MAX\0"			\
    104 	"b\x01" "SIZE_MAX\0"			\
    105 	"b\x00" "BARRIER\0"
    106 
    107 /* Command */
    108 #define VIRTIO_BLK_T_IN		0
    109 #define VIRTIO_BLK_T_OUT	1
    110 #define VIRTIO_BLK_T_FLUSH	4
    111 #define VIRTIO_BLK_T_GET_ID		8
    112 #define VIRTIO_BLK_T_GET_LIFETIME	10
    113 #define VIRTIO_BLK_T_DISCARD		11
    114 #define VIRTIO_BLK_T_WRITE_ZEROES	13
    115 #define VIRTIO_BLK_T_SECURE_ERASE	14
    116 #define VIRTIO_BLK_T_BARRIER	0x80000000
    117 
    118 /* Sector */
    119 #define VIRTIO_BLK_BSIZE	512
    120 
    121 /* Status */
    122 #define VIRTIO_BLK_S_OK		0
    123 #define VIRTIO_BLK_S_IOERR	1
    124 #define VIRTIO_BLK_S_UNSUPP	2
    125 
    126 /* Request header structure */
    127 struct virtio_blk_req_hdr {
    128 	uint32_t	type;	/* VIRTIO_BLK_T_* */
    129 	uint32_t	ioprio;
    130 	uint64_t	sector;
    131 } __packed;
    132 /* payload and 1 byte status follows */
    133 
    134 struct virtio_blk_discard_write_zeroes {
    135 	uint64_t	sector;
    136 	uint32_t	num_sectors;
    137 	union {
    138 		uint32_t	flags;
    139 		struct {
    140 			uint32_t	unmap:1;
    141 			uint32_t	reserved:31;
    142 		};
    143 	};
    144 } __packed;
    145 
    146 /*
    147  * ld_virtiovar:
    148  */
    149 struct virtio_blk_req {
    150 	struct virtio_blk_req_hdr	vr_hdr;
    151 	uint8_t				vr_status;
    152 	struct buf			*vr_bp;
    153 #define DUMMY_VR_BP				((void *)1)
    154 	bus_dmamap_t			vr_cmdsts;
    155 	bus_dmamap_t			vr_payload;
    156 	void *				vr_datap;
    157 	size_t				vr_datas;
    158 };
    159 
    160 struct ld_virtio_softc {
    161 	struct ld_softc		sc_ld;
    162 	device_t		sc_dev;
    163 
    164 	uint32_t		sc_seg_max; /* max number of segs in xfer */
    165 	uint32_t		sc_size_max; /* max size of single seg */
    166 
    167 	struct virtio_softc	*sc_virtio;
    168 	struct virtqueue	sc_vq;
    169 
    170 	struct virtio_blk_req	*sc_reqs;
    171 	bus_dma_segment_t	sc_reqs_seg;
    172 
    173 	int			sc_readonly;
    174 
    175 	enum {
    176 		SYNC_FREE, SYNC_BUSY, SYNC_DONE
    177 	}			sc_sync_use;
    178 	kcondvar_t		sc_sync_wait;
    179 	kmutex_t		sc_sync_wait_lock;
    180 	uint8_t			sc_sync_status;
    181 
    182 	uint32_t		sc_max_discard_sectors;
    183 	uint32_t		sc_max_discard_seg;
    184 #if 0
    185 	uint32_t		sc_discard_sector_alignment;
    186 #endif
    187 };
    188 
    189 static int	ld_virtio_match(device_t, cfdata_t, void *);
    190 static void	ld_virtio_attach(device_t, device_t, void *);
    191 static int	ld_virtio_detach(device_t, int);
    192 
    193 CFATTACH_DECL_NEW(ld_virtio, sizeof(struct ld_virtio_softc),
    194     ld_virtio_match, ld_virtio_attach, ld_virtio_detach, NULL);
    195 
    196 static int
    197 ld_virtio_match(device_t parent, cfdata_t match, void *aux)
    198 {
    199 	struct virtio_attach_args *va = aux;
    200 
    201 	if (va->sc_childdevid == VIRTIO_DEVICE_ID_BLOCK)
    202 		return 1;
    203 
    204 	return 0;
    205 }
    206 
    207 static int ld_virtio_vq_done(struct virtqueue *);
    208 static int ld_virtio_dump(struct ld_softc *, void *, int, int);
    209 static int ld_virtio_start(struct ld_softc *, struct buf *);
    210 static int ld_virtio_ioctl(struct ld_softc *, u_long, void *, int32_t, bool);
    211 static int ld_virtio_discard(struct ld_softc *, struct buf *);
    212 
    213 static int
    214 ld_virtio_alloc_reqs(struct ld_virtio_softc *sc, int qsize)
    215 {
    216 	int allocsize, r, rsegs, i;
    217 	struct ld_softc *ld = &sc->sc_ld;
    218 	void *vaddr;
    219 
    220 	allocsize = sizeof(struct virtio_blk_req) * qsize;
    221 	r = bus_dmamem_alloc(virtio_dmat(sc->sc_virtio), allocsize, 0, 0,
    222 			     &sc->sc_reqs_seg, 1, &rsegs, BUS_DMA_WAITOK);
    223 	if (r != 0) {
    224 		aprint_error_dev(sc->sc_dev,
    225 				 "DMA memory allocation failed, size %d, "
    226 				 "error code %d\n", allocsize, r);
    227 		goto err_none;
    228 	}
    229 	r = bus_dmamem_map(virtio_dmat(sc->sc_virtio),
    230 			   &sc->sc_reqs_seg, 1, allocsize,
    231 			   &vaddr, BUS_DMA_WAITOK);
    232 	if (r != 0) {
    233 		aprint_error_dev(sc->sc_dev,
    234 				 "DMA memory map failed, "
    235 				 "error code %d\n", r);
    236 		goto err_dmamem_alloc;
    237 	}
    238 	sc->sc_reqs = vaddr;
    239 	memset(vaddr, 0, allocsize);
    240 	for (i = 0; i < qsize; i++) {
    241 		struct virtio_blk_req *vr = &sc->sc_reqs[i];
    242 		r = bus_dmamap_create(virtio_dmat(sc->sc_virtio),
    243 				      offsetof(struct virtio_blk_req, vr_bp),
    244 				      1,
    245 				      offsetof(struct virtio_blk_req, vr_bp),
    246 				      0,
    247 				      BUS_DMA_WAITOK|BUS_DMA_ALLOCNOW,
    248 				      &vr->vr_cmdsts);
    249 		if (r != 0) {
    250 			aprint_error_dev(sc->sc_dev,
    251 					 "command dmamap creation failed, "
    252 					 "error code %d\n", r);
    253 			goto err_reqs;
    254 		}
    255 		r = bus_dmamap_load(virtio_dmat(sc->sc_virtio), vr->vr_cmdsts,
    256 				    &vr->vr_hdr,
    257 				    offsetof(struct virtio_blk_req, vr_bp),
    258 				    NULL, BUS_DMA_WAITOK);
    259 		if (r != 0) {
    260 			aprint_error_dev(sc->sc_dev,
    261 					 "command dmamap load failed, "
    262 					 "error code %d\n", r);
    263 			goto err_reqs;
    264 		}
    265 		r = bus_dmamap_create(virtio_dmat(sc->sc_virtio),
    266 				      /*size*/ld->sc_maxxfer,
    267 				      /*nseg*/sc->sc_seg_max,
    268 				      /*maxsegsz*/sc->sc_size_max,
    269 				      /*boundary*/0,
    270 				      BUS_DMA_WAITOK|BUS_DMA_ALLOCNOW,
    271 				      &vr->vr_payload);
    272 		if (r != 0) {
    273 			aprint_error_dev(sc->sc_dev,
    274 					 "payload dmamap creation failed, "
    275 					 "error code %d\n", r);
    276 			goto err_reqs;
    277 		}
    278 		vr->vr_datap = NULL;
    279 		vr->vr_datas = 0;
    280 	}
    281 	return 0;
    282 
    283 err_reqs:
    284 	for (i = 0; i < qsize; i++) {
    285 		struct virtio_blk_req *vr = &sc->sc_reqs[i];
    286 		if (vr->vr_cmdsts) {
    287 			bus_dmamap_destroy(virtio_dmat(sc->sc_virtio),
    288 					   vr->vr_cmdsts);
    289 			vr->vr_cmdsts = 0;
    290 		}
    291 		if (vr->vr_payload) {
    292 			bus_dmamap_destroy(virtio_dmat(sc->sc_virtio),
    293 					   vr->vr_payload);
    294 			vr->vr_payload = 0;
    295 		}
    296 	}
    297 	bus_dmamem_unmap(virtio_dmat(sc->sc_virtio), sc->sc_reqs, allocsize);
    298 err_dmamem_alloc:
    299 	bus_dmamem_free(virtio_dmat(sc->sc_virtio), &sc->sc_reqs_seg, 1);
    300 err_none:
    301 	return -1;
    302 }
    303 
    304 static void
    305 ld_virtio_attach(device_t parent, device_t self, void *aux)
    306 {
    307 	struct ld_virtio_softc *sc = device_private(self);
    308 	struct ld_softc *ld = &sc->sc_ld;
    309 	struct virtio_softc *vsc = device_private(parent);
    310 	uint64_t features;
    311 	int qsize;
    312 
    313 	if (virtio_child(vsc) != NULL) {
    314 		aprint_normal(": child already attached for %s; "
    315 			      "something wrong...\n", device_xname(parent));
    316 		return;
    317 	}
    318 
    319 	sc->sc_dev = self;
    320 	sc->sc_virtio = vsc;
    321 
    322 	virtio_child_attach_start(vsc, self, IPL_BIO,
    323 	    (VIRTIO_BLK_F_SIZE_MAX | VIRTIO_BLK_F_SEG_MAX |
    324 	     VIRTIO_BLK_F_GEOMETRY | VIRTIO_BLK_F_RO | VIRTIO_BLK_F_BLK_SIZE |
    325 	     VIRTIO_BLK_F_FLUSH | VIRTIO_BLK_F_CONFIG_WCE |
    326 	     VIRTIO_BLK_F_DISCARD),
    327 	    VIRTIO_BLK_FLAG_BITS);
    328 
    329 	features = virtio_features(vsc);
    330 	if (features == 0)
    331 		goto err;
    332 
    333 	if (features & VIRTIO_BLK_F_RO)
    334 		sc->sc_readonly = 1;
    335 	else
    336 		sc->sc_readonly = 0;
    337 
    338 	if (features & VIRTIO_BLK_F_BLK_SIZE) {
    339 		ld->sc_secsize = virtio_read_device_config_4(vsc,
    340 					VIRTIO_BLK_CONFIG_BLK_SIZE);
    341 	} else
    342 		ld->sc_secsize = VIRTIO_BLK_BSIZE;
    343 
    344 	if (features & VIRTIO_BLK_F_SEG_MAX) {
    345 		sc->sc_seg_max = virtio_read_device_config_4(vsc,
    346 		    VIRTIO_BLK_CONFIG_SEG_MAX);
    347 		if (sc->sc_seg_max == 0) {
    348 			aprint_error_dev(sc->sc_dev,
    349 			    "Invalid SEG_MAX %d\n", sc->sc_seg_max);
    350 			goto err;
    351 		}
    352 	} else {
    353 		sc->sc_seg_max = 1;
    354 		aprint_verbose_dev(sc->sc_dev,
    355 		    "Unknown SEG_MAX, assuming %"PRIu32"\n", sc->sc_seg_max);
    356 	}
    357 
    358 	/* At least genfs_io assumes size_max*seg_max >= MAXPHYS. */
    359 	if (features & VIRTIO_BLK_F_SIZE_MAX) {
    360 		sc->sc_size_max = virtio_read_device_config_4(vsc,
    361 		    VIRTIO_BLK_CONFIG_SIZE_MAX);
    362 		if (sc->sc_size_max < MAXPHYS/sc->sc_seg_max) {
    363 			aprint_error_dev(sc->sc_dev,
    364 			    "Too small SIZE_MAX %d minimum is %d\n",
    365 			    sc->sc_size_max, MAXPHYS/sc->sc_seg_max);
    366 			// goto err;
    367 			sc->sc_size_max = MAXPHYS/sc->sc_seg_max;
    368 		} else if (sc->sc_size_max > MAXPHYS) {
    369 			aprint_verbose_dev(sc->sc_dev,
    370 			    "Clip SIZE_MAX from %d to %d\n",
    371 			    sc->sc_size_max, MAXPHYS);
    372 			sc->sc_size_max = MAXPHYS;
    373 		}
    374 	} else {
    375 		sc->sc_size_max = MAXPHYS;
    376 		aprint_verbose_dev(sc->sc_dev,
    377 		    "Unknown SIZE_MAX, assuming %"PRIu32"\n",
    378 		    sc->sc_size_max);
    379 	}
    380 
    381 	aprint_normal_dev(sc->sc_dev, "max %"PRIu32" segs"
    382 	    " of max %"PRIu32" bytes\n",
    383 	    sc->sc_seg_max, sc->sc_size_max);
    384 
    385 	virtio_init_vq_vqdone(vsc, &sc->sc_vq, 0,
    386 	    ld_virtio_vq_done);
    387 
    388 	if (virtio_alloc_vq(vsc, &sc->sc_vq, sc->sc_size_max,
    389 		sc->sc_seg_max + VIRTIO_BLK_CTRL_SEGMENTS, "I/O request") != 0)
    390 		goto err;
    391 	qsize = sc->sc_vq.vq_num;
    392 
    393 	if (virtio_child_attach_finish(vsc, &sc->sc_vq, 1,
    394 	    NULL, VIRTIO_F_INTR_MSIX) != 0)
    395 		goto err;
    396 
    397 	ld->sc_dv = self;
    398 	ld->sc_secperunit = virtio_read_device_config_8(vsc,
    399 	    VIRTIO_BLK_CONFIG_CAPACITY) / (ld->sc_secsize / VIRTIO_BLK_BSIZE);
    400 
    401 	/*
    402 	 * Clamp ld->sc_maxxfer to MAXPHYS before ld_virtio_alloc_reqs
    403 	 * allocates DMA maps of at most ld->sc_maxxfer bytes.
    404 	 * ldattach will also clamp to MAXPHYS, but not until after
    405 	 * ld_virtio_alloc_reqs is done, so that doesn't help.
    406 	 */
    407 	ld->sc_maxxfer = MIN(MAXPHYS, sc->sc_size_max * sc->sc_seg_max);
    408 
    409 	if (features & VIRTIO_BLK_F_GEOMETRY) {
    410 		ld->sc_ncylinders = virtio_read_device_config_2(vsc,
    411 					VIRTIO_BLK_CONFIG_GEOMETRY_C);
    412 		ld->sc_nheads     = virtio_read_device_config_1(vsc,
    413 					VIRTIO_BLK_CONFIG_GEOMETRY_H);
    414 		ld->sc_nsectors   = virtio_read_device_config_1(vsc,
    415 					VIRTIO_BLK_CONFIG_GEOMETRY_S);
    416 	}
    417 	ld->sc_maxqueuecnt = qsize - 1; /* reserve slot for dumps, flushes */
    418 
    419 	if (ld_virtio_alloc_reqs(sc, qsize) < 0)
    420 		goto err;
    421 
    422 	cv_init(&sc->sc_sync_wait, "vblksync");
    423 	mutex_init(&sc->sc_sync_wait_lock, MUTEX_DEFAULT, IPL_BIO);
    424 	sc->sc_sync_use = SYNC_FREE;
    425 
    426 	ld->sc_dump = ld_virtio_dump;
    427 	ld->sc_start = ld_virtio_start;
    428 	ld->sc_ioctl = ld_virtio_ioctl;
    429 
    430 	if (features & VIRTIO_BLK_F_DISCARD) {
    431 		ld->sc_discard = ld_virtio_discard;
    432 		sc->sc_max_discard_sectors = virtio_read_device_config_4(vsc,
    433 		    VIRTIO_BLK_CONFIG_MAX_DISCARD_SECTORS);
    434 		sc->sc_max_discard_seg = virtio_read_device_config_4(vsc,
    435 		    VIRTIO_BLK_CONFIG_MAX_DISCARD_SEG);
    436 #if 0
    437 		sc->sc_discard_sector_alignment =
    438 		    virtio_read_device_config_4(vsc,
    439 		    VIRTIO_BLK_CONFIG_DISCARD_SECTOR_ALIGNMENT);
    440 #endif
    441 	}
    442 
    443 	ld->sc_flags = LDF_ENABLED | LDF_MPSAFE;
    444 	ldattach(ld, BUFQ_DISK_DEFAULT_STRAT);
    445 
    446 	return;
    447 
    448 err:
    449 	virtio_child_attach_failed(vsc);
    450 	return;
    451 }
    452 
    453 static int
    454 ld_virtio_start(struct ld_softc *ld, struct buf *bp)
    455 {
    456 	/* splbio */
    457 	struct ld_virtio_softc *sc = device_private(ld->sc_dv);
    458 	struct virtio_softc *vsc = sc->sc_virtio;
    459 	struct virtqueue *vq = &sc->sc_vq;
    460 	struct virtio_blk_req *vr;
    461 	int r;
    462 	int isread = (bp->b_flags & B_READ);
    463 	int slot;
    464 
    465 	if (sc->sc_readonly && !isread)
    466 		return EIO;
    467 
    468 	r = virtio_enqueue_prep(vsc, vq, &slot);
    469 	if (r != 0)
    470 		return r;
    471 
    472 	vr = &sc->sc_reqs[slot];
    473 	KASSERT(vr->vr_bp == NULL);
    474 
    475 	r = bus_dmamap_load(virtio_dmat(vsc), vr->vr_payload,
    476 			    bp->b_data, bp->b_bcount, NULL,
    477 			    ((isread?BUS_DMA_READ:BUS_DMA_WRITE)
    478 			     |BUS_DMA_NOWAIT));
    479 	if (r != 0) {
    480 		aprint_error_dev(sc->sc_dev,
    481 		    "payload dmamap failed, error code %d\n", r);
    482 		virtio_enqueue_abort(vsc, vq, slot);
    483 		return r;
    484 	}
    485 
    486 	KASSERT(vr->vr_payload->dm_nsegs <= sc->sc_seg_max);
    487 	r = virtio_enqueue_reserve(vsc, vq, slot, vr->vr_payload->dm_nsegs +
    488 	    VIRTIO_BLK_CTRL_SEGMENTS);
    489 	if (r != 0) {
    490 		bus_dmamap_unload(virtio_dmat(vsc), vr->vr_payload);
    491 		return r;
    492 	}
    493 
    494 	vr->vr_bp = bp;
    495 	vr->vr_hdr.type   = virtio_rw32(vsc,
    496 			isread ? VIRTIO_BLK_T_IN : VIRTIO_BLK_T_OUT);
    497 	vr->vr_hdr.ioprio = virtio_rw32(vsc, 0);
    498 	vr->vr_hdr.sector = virtio_rw64(vsc,
    499 			bp->b_rawblkno * sc->sc_ld.sc_secsize /
    500 			VIRTIO_BLK_BSIZE);
    501 
    502 	bus_dmamap_sync(virtio_dmat(vsc), vr->vr_cmdsts,
    503 			0, sizeof(struct virtio_blk_req_hdr),
    504 			BUS_DMASYNC_PREWRITE);
    505 	bus_dmamap_sync(virtio_dmat(vsc), vr->vr_payload,
    506 			0, bp->b_bcount,
    507 			isread?BUS_DMASYNC_PREREAD:BUS_DMASYNC_PREWRITE);
    508 	bus_dmamap_sync(virtio_dmat(vsc), vr->vr_cmdsts,
    509 			offsetof(struct virtio_blk_req, vr_status),
    510 			sizeof(uint8_t),
    511 			BUS_DMASYNC_PREREAD);
    512 
    513 	virtio_enqueue_p(vsc, vq, slot, vr->vr_cmdsts,
    514 			 0, sizeof(struct virtio_blk_req_hdr),
    515 			 true);
    516 	virtio_enqueue(vsc, vq, slot, vr->vr_payload, !isread);
    517 	virtio_enqueue_p(vsc, vq, slot, vr->vr_cmdsts,
    518 			 offsetof(struct virtio_blk_req, vr_status),
    519 			 sizeof(uint8_t),
    520 			 false);
    521 	virtio_enqueue_commit(vsc, vq, slot, true);
    522 
    523 	return 0;
    524 }
    525 
    526 static void
    527 ld_virtio_vq_done1(struct ld_virtio_softc *sc, struct virtio_softc *vsc,
    528 		   struct virtqueue *vq, int slot)
    529 {
    530 	struct virtio_blk_req *vr = &sc->sc_reqs[slot];
    531 	struct buf *bp = vr->vr_bp;
    532 	const uint32_t rt = virtio_rw32(vsc, vr->vr_hdr.type);
    533 
    534 	vr->vr_bp = NULL;
    535 
    536 	bus_dmamap_sync(virtio_dmat(vsc), vr->vr_cmdsts,
    537 			0, sizeof(struct virtio_blk_req_hdr),
    538 			BUS_DMASYNC_POSTWRITE);
    539 	bus_dmamap_sync(virtio_dmat(vsc), vr->vr_cmdsts,
    540 			sizeof(struct virtio_blk_req_hdr), sizeof(uint8_t),
    541 			BUS_DMASYNC_POSTREAD);
    542 	if (bp == DUMMY_VR_BP) {
    543 		mutex_enter(&sc->sc_sync_wait_lock);
    544 		sc->sc_sync_status = vr->vr_status;
    545 		sc->sc_sync_use = SYNC_DONE;
    546 		cv_broadcast(&sc->sc_sync_wait);
    547 		mutex_exit(&sc->sc_sync_wait_lock);
    548 		virtio_dequeue_commit(vsc, vq, slot);
    549 		return;
    550 	}
    551 	switch (rt) {
    552 	case VIRTIO_BLK_T_OUT:
    553 	case VIRTIO_BLK_T_IN:
    554 		bus_dmamap_sync(virtio_dmat(vsc), vr->vr_payload,
    555 				0, bp->b_bcount,
    556 				(bp->b_flags & B_READ)?BUS_DMASYNC_POSTREAD
    557 						      :BUS_DMASYNC_POSTWRITE);
    558 		break;
    559 	default:
    560 		if (vr->vr_datap == NULL)
    561 			break;
    562 		bus_dmamap_sync(virtio_dmat(vsc), vr->vr_payload,
    563 				0, vr->vr_datas, BUS_DMASYNC_POSTREAD |
    564 				BUS_DMASYNC_POSTWRITE);
    565 		break;
    566 	}
    567 	bus_dmamap_unload(virtio_dmat(vsc), vr->vr_payload);
    568 
    569 	if (vr->vr_status != VIRTIO_BLK_S_OK) {
    570 		bp->b_error = EIO;
    571 		bp->b_resid = bp->b_bcount;
    572 	} else {
    573 		bp->b_error = 0;
    574 		bp->b_resid = 0;
    575 	}
    576 
    577 	if (vr->vr_datap != NULL) {
    578 		kmem_free(vr->vr_datap, vr->vr_datas);
    579 		vr->vr_datap = NULL;
    580 		vr->vr_datas = 0;
    581 	}
    582 
    583 	virtio_dequeue_commit(vsc, vq, slot);
    584 
    585 	switch (rt) {
    586 	case VIRTIO_BLK_T_OUT:
    587 	case VIRTIO_BLK_T_IN:
    588 		lddone(&sc->sc_ld, bp);
    589 		break;
    590 	case VIRTIO_BLK_T_DISCARD:
    591 		lddiscardend(&sc->sc_ld, bp);
    592 		break;
    593 	}
    594 }
    595 
    596 static int
    597 ld_virtio_vq_done(struct virtqueue *vq)
    598 {
    599 	struct virtio_softc *vsc = vq->vq_owner;
    600 	struct ld_virtio_softc *sc = device_private(virtio_child(vsc));
    601 	int r = 0;
    602 	int slot;
    603 
    604 again:
    605 	if (virtio_dequeue(vsc, vq, &slot, NULL))
    606 		return r;
    607 	r = 1;
    608 
    609 	ld_virtio_vq_done1(sc, vsc, vq, slot);
    610 	goto again;
    611 }
    612 
    613 static int
    614 ld_virtio_dump(struct ld_softc *ld, void *data, int blkno, int blkcnt)
    615 {
    616 	struct ld_virtio_softc *sc = device_private(ld->sc_dv);
    617 	struct virtio_softc *vsc = sc->sc_virtio;
    618 	struct virtqueue *vq = &sc->sc_vq;
    619 	struct virtio_blk_req *vr;
    620 	int slot, r;
    621 
    622 	if (sc->sc_readonly)
    623 		return EIO;
    624 
    625 	r = virtio_enqueue_prep(vsc, vq, &slot);
    626 	if (r != 0) {
    627 		if (r == EAGAIN) { /* no free slot; dequeue first */
    628 			delay(100);
    629 			ld_virtio_vq_done(vq);
    630 			r = virtio_enqueue_prep(vsc, vq, &slot);
    631 			if (r != 0)
    632 				return r;
    633 		}
    634 		return r;
    635 	}
    636 	vr = &sc->sc_reqs[slot];
    637 	r = bus_dmamap_load(virtio_dmat(vsc), vr->vr_payload,
    638 			    data, blkcnt*ld->sc_secsize, NULL,
    639 			    BUS_DMA_WRITE|BUS_DMA_NOWAIT);
    640 	if (r != 0)
    641 		return r;
    642 
    643 	r = virtio_enqueue_reserve(vsc, vq, slot, vr->vr_payload->dm_nsegs +
    644 	    VIRTIO_BLK_CTRL_SEGMENTS);
    645 	if (r != 0) {
    646 		bus_dmamap_unload(virtio_dmat(vsc), vr->vr_payload);
    647 		return r;
    648 	}
    649 
    650 	vr->vr_bp = (void*)0xdeadbeef;
    651 	vr->vr_hdr.type   = virtio_rw32(vsc, VIRTIO_BLK_T_OUT);
    652 	vr->vr_hdr.ioprio = virtio_rw32(vsc, 0);
    653 	vr->vr_hdr.sector = virtio_rw64(vsc,
    654 			(daddr_t) blkno * ld->sc_secsize /
    655 			VIRTIO_BLK_BSIZE);
    656 
    657 	bus_dmamap_sync(virtio_dmat(vsc), vr->vr_cmdsts,
    658 			0, sizeof(struct virtio_blk_req_hdr),
    659 			BUS_DMASYNC_PREWRITE);
    660 	bus_dmamap_sync(virtio_dmat(vsc), vr->vr_payload,
    661 			0, blkcnt*ld->sc_secsize,
    662 			BUS_DMASYNC_PREWRITE);
    663 	bus_dmamap_sync(virtio_dmat(vsc), vr->vr_cmdsts,
    664 			offsetof(struct virtio_blk_req, vr_status),
    665 			sizeof(uint8_t),
    666 			BUS_DMASYNC_PREREAD);
    667 
    668 	virtio_enqueue_p(vsc, vq, slot, vr->vr_cmdsts,
    669 			 0, sizeof(struct virtio_blk_req_hdr),
    670 			 true);
    671 	virtio_enqueue(vsc, vq, slot, vr->vr_payload, true);
    672 	virtio_enqueue_p(vsc, vq, slot, vr->vr_cmdsts,
    673 			 offsetof(struct virtio_blk_req, vr_status),
    674 			 sizeof(uint8_t),
    675 			 false);
    676 	virtio_enqueue_commit(vsc, vq, slot, true);
    677 
    678 	for ( ; ; ) {
    679 		int dslot;
    680 
    681 		r = virtio_dequeue(vsc, vq, &dslot, NULL);
    682 		if (r != 0)
    683 			continue;
    684 		if (dslot != slot) {
    685 			ld_virtio_vq_done1(sc, vsc, vq, dslot);
    686 			continue;
    687 		} else
    688 			break;
    689 	}
    690 
    691 	bus_dmamap_sync(virtio_dmat(vsc), vr->vr_cmdsts,
    692 			0, sizeof(struct virtio_blk_req_hdr),
    693 			BUS_DMASYNC_POSTWRITE);
    694 	bus_dmamap_sync(virtio_dmat(vsc), vr->vr_payload,
    695 			0, blkcnt*ld->sc_secsize,
    696 			BUS_DMASYNC_POSTWRITE);
    697 	bus_dmamap_sync(virtio_dmat(vsc), vr->vr_cmdsts,
    698 			offsetof(struct virtio_blk_req, vr_status),
    699 			sizeof(uint8_t),
    700 			BUS_DMASYNC_POSTREAD);
    701 	if (vr->vr_status == VIRTIO_BLK_S_OK)
    702 		r = 0;
    703 	else
    704 		r = EIO;
    705 	virtio_dequeue_commit(vsc, vq, slot);
    706 
    707 	return r;
    708 }
    709 
    710 static int
    711 ld_virtio_detach(device_t self, int flags)
    712 {
    713 	struct ld_virtio_softc *sc = device_private(self);
    714 	struct ld_softc *ld = &sc->sc_ld;
    715 	bus_dma_tag_t dmat = virtio_dmat(sc->sc_virtio);
    716 	int r, i, qsize;
    717 
    718 	qsize = sc->sc_vq.vq_num;
    719 	r = ldbegindetach(ld, flags);
    720 	if (r != 0)
    721 		return r;
    722 	virtio_reset(sc->sc_virtio);
    723 	virtio_free_vq(sc->sc_virtio, &sc->sc_vq);
    724 
    725 	for (i = 0; i < qsize; i++) {
    726 		bus_dmamap_destroy(dmat,
    727 				   sc->sc_reqs[i].vr_cmdsts);
    728 		bus_dmamap_destroy(dmat,
    729 				   sc->sc_reqs[i].vr_payload);
    730 	}
    731 	bus_dmamem_unmap(dmat, sc->sc_reqs,
    732 			 sizeof(struct virtio_blk_req) * qsize);
    733 	bus_dmamem_free(dmat, &sc->sc_reqs_seg, 1);
    734 
    735 	ldenddetach(ld);
    736 
    737 	cv_destroy(&sc->sc_sync_wait);
    738 	mutex_destroy(&sc->sc_sync_wait_lock);
    739 
    740 	virtio_child_detach(sc->sc_virtio);
    741 
    742 	return 0;
    743 }
    744 
    745 static int
    746 ld_virtio_flush(struct ld_softc *ld, bool poll)
    747 {
    748 	struct ld_virtio_softc * const sc = device_private(ld->sc_dv);
    749 	struct virtio_softc * const vsc = sc->sc_virtio;
    750 	const uint64_t features = virtio_features(vsc);
    751 	struct virtqueue *vq = &sc->sc_vq;
    752 	struct virtio_blk_req *vr;
    753 	int slot;
    754 	int r;
    755 
    756 	if ((features & VIRTIO_BLK_F_FLUSH) == 0)
    757 		return 0;
    758 
    759 	mutex_enter(&sc->sc_sync_wait_lock);
    760 	while (sc->sc_sync_use != SYNC_FREE) {
    761 		if (poll) {
    762 			mutex_exit(&sc->sc_sync_wait_lock);
    763 			ld_virtio_vq_done(vq);
    764 			mutex_enter(&sc->sc_sync_wait_lock);
    765 			continue;
    766 		}
    767 		cv_wait(&sc->sc_sync_wait, &sc->sc_sync_wait_lock);
    768 	}
    769 	sc->sc_sync_use = SYNC_BUSY;
    770 	mutex_exit(&sc->sc_sync_wait_lock);
    771 
    772 	r = virtio_enqueue_prep(vsc, vq, &slot);
    773 	if (r != 0) {
    774 		return r;
    775 	}
    776 
    777 	vr = &sc->sc_reqs[slot];
    778 	KASSERT(vr->vr_bp == NULL);
    779 
    780 	r = virtio_enqueue_reserve(vsc, vq, slot, VIRTIO_BLK_CTRL_SEGMENTS);
    781 	if (r != 0) {
    782 		return r;
    783 	}
    784 
    785 	vr->vr_bp = DUMMY_VR_BP;
    786 	vr->vr_hdr.type   = virtio_rw32(vsc, VIRTIO_BLK_T_FLUSH);
    787 	vr->vr_hdr.ioprio = virtio_rw32(vsc, 0);
    788 	vr->vr_hdr.sector = virtio_rw64(vsc, 0);
    789 
    790 	bus_dmamap_sync(virtio_dmat(vsc), vr->vr_cmdsts,
    791 			0, sizeof(struct virtio_blk_req_hdr),
    792 			BUS_DMASYNC_PREWRITE);
    793 	bus_dmamap_sync(virtio_dmat(vsc), vr->vr_cmdsts,
    794 			offsetof(struct virtio_blk_req, vr_status),
    795 			sizeof(uint8_t),
    796 			BUS_DMASYNC_PREREAD);
    797 
    798 	virtio_enqueue_p(vsc, vq, slot, vr->vr_cmdsts,
    799 			 0, sizeof(struct virtio_blk_req_hdr),
    800 			 true);
    801 	virtio_enqueue_p(vsc, vq, slot, vr->vr_cmdsts,
    802 			 offsetof(struct virtio_blk_req, vr_status),
    803 			 sizeof(uint8_t),
    804 			 false);
    805 	virtio_enqueue_commit(vsc, vq, slot, true);
    806 
    807 	mutex_enter(&sc->sc_sync_wait_lock);
    808 	while (sc->sc_sync_use != SYNC_DONE) {
    809 		if (poll) {
    810 			mutex_exit(&sc->sc_sync_wait_lock);
    811 			ld_virtio_vq_done(vq);
    812 			mutex_enter(&sc->sc_sync_wait_lock);
    813 			continue;
    814 		}
    815 		cv_wait(&sc->sc_sync_wait, &sc->sc_sync_wait_lock);
    816 	}
    817 
    818 	if (sc->sc_sync_status == VIRTIO_BLK_S_OK)
    819 		r = 0;
    820 	else
    821 		r = EIO;
    822 
    823 	sc->sc_sync_use = SYNC_FREE;
    824 	cv_broadcast(&sc->sc_sync_wait);
    825 	mutex_exit(&sc->sc_sync_wait_lock);
    826 
    827 	return r;
    828 }
    829 
    830 static int
    831 ld_virtio_getcache(struct ld_softc *ld, int *bitsp)
    832 {
    833 	struct ld_virtio_softc * const sc = device_private(ld->sc_dv);
    834 	struct virtio_softc * const vsc = sc->sc_virtio;
    835 	const uint64_t features = virtio_features(vsc);
    836 
    837 	*bitsp = DKCACHE_READ;
    838 	if ((features & VIRTIO_BLK_F_CONFIG_WCE) != 0)
    839 		*bitsp |= DKCACHE_WCHANGE;
    840 	if (virtio_read_device_config_1(vsc,
    841 	    VIRTIO_BLK_CONFIG_WRITEBACK) != 0x00)
    842 		*bitsp |= DKCACHE_WRITE;
    843 
    844 	return 0;
    845 }
    846 
    847 static int
    848 ld_virtio_setcache(struct ld_softc *ld, int bits)
    849 {
    850 	struct ld_virtio_softc * const sc = device_private(ld->sc_dv);
    851 	struct virtio_softc * const vsc = sc->sc_virtio;
    852 	const uint8_t wce = (bits & DKCACHE_WRITE) ? 0x01 : 0x00;
    853 
    854 	virtio_write_device_config_1(vsc,
    855 	    VIRTIO_BLK_CONFIG_WRITEBACK, wce);
    856 	if (virtio_read_device_config_1(vsc,
    857 	    VIRTIO_BLK_CONFIG_WRITEBACK) != wce)
    858 		return EIO;
    859 
    860 	return 0;
    861 }
    862 
    863 static int
    864 ld_virtio_ioctl(struct ld_softc *ld, u_long cmd, void *addr, int32_t flag, bool poll)
    865 {
    866 	int error;
    867 
    868 	switch (cmd) {
    869 	case DIOCCACHESYNC:
    870 		error = ld_virtio_flush(ld, poll);
    871 		break;
    872 
    873 	case DIOCGCACHE:
    874 		error = ld_virtio_getcache(ld, (int *)addr);
    875 		break;
    876 
    877 	case DIOCSCACHE:
    878 		error = ld_virtio_setcache(ld, *(int *)addr);
    879 		break;
    880 
    881 	default:
    882 		error = EPASSTHROUGH;
    883 		break;
    884 	}
    885 
    886 	return error;
    887 }
    888 
    889 static int
    890 ld_virtio_discard(struct ld_softc *ld, struct buf *bp)
    891 {
    892 	struct ld_virtio_softc * const sc = device_private(ld->sc_dv);
    893 	struct virtio_softc * const vsc = sc->sc_virtio;
    894 	struct virtqueue * const vq = &sc->sc_vq;
    895 	struct virtio_blk_req *vr;
    896 	const uint64_t features = virtio_features(vsc);
    897 	int r;
    898 	int slot;
    899 	uint64_t blkno;
    900 	uint32_t nblks;
    901 	struct virtio_blk_discard_write_zeroes * dwz;
    902 
    903 	if ((features & VIRTIO_BLK_F_DISCARD) == 0 ||
    904 	    sc->sc_max_discard_seg < 1)
    905 		return EINVAL;
    906 
    907 	if (sc->sc_readonly)
    908 		return EIO;
    909 
    910 	blkno = bp->b_rawblkno * sc->sc_ld.sc_secsize / VIRTIO_BLK_BSIZE;
    911 	nblks = bp->b_bcount / VIRTIO_BLK_BSIZE;
    912 
    913 	if (nblks > sc->sc_max_discard_sectors)
    914 		return ERANGE;
    915 
    916 	r = virtio_enqueue_prep(vsc, vq, &slot);
    917 	if (r != 0) {
    918 		return r;
    919 	}
    920 
    921 	vr = &sc->sc_reqs[slot];
    922 	KASSERT(vr->vr_bp == NULL);
    923 
    924 	dwz = kmem_alloc(sizeof(*dwz), KM_SLEEP);
    925 
    926 	r = bus_dmamap_load(virtio_dmat(vsc), vr->vr_payload,
    927 	    dwz, sizeof(*dwz), NULL, BUS_DMA_WRITE | BUS_DMA_NOWAIT);
    928 	if (r != 0) {
    929 		device_printf(sc->sc_dev,
    930 		    "discard payload dmamap failed, error code %d\n", r);
    931 		virtio_enqueue_abort(vsc, vq, slot);
    932 		kmem_free(dwz, sizeof(*dwz));
    933 		return r;
    934 	}
    935 
    936 	KASSERT(vr->vr_payload->dm_nsegs <= sc->sc_seg_max);
    937 	r = virtio_enqueue_reserve(vsc, vq, slot, vr->vr_payload->dm_nsegs +
    938 	    VIRTIO_BLK_CTRL_SEGMENTS);
    939 	if (r != 0) {
    940 		bus_dmamap_unload(virtio_dmat(vsc), vr->vr_payload);
    941 		kmem_free(dwz, sizeof(*dwz));
    942 		return r;
    943 	}
    944 
    945 	vr->vr_hdr.type = virtio_rw32(vsc, VIRTIO_BLK_T_DISCARD);
    946 	vr->vr_hdr.ioprio = virtio_rw32(vsc, 0);
    947 	vr->vr_hdr.sector = virtio_rw64(vsc, 0);
    948 	vr->vr_bp = bp;
    949 
    950 	KASSERT(vr->vr_datap == NULL);
    951 	vr->vr_datap = dwz;
    952 	vr->vr_datas = sizeof(*dwz);
    953 
    954 	dwz->sector = virtio_rw64(vsc, blkno);
    955 	dwz->num_sectors = virtio_rw32(vsc, nblks);
    956 	dwz->flags = virtio_rw32(vsc, 0);
    957 
    958 	bus_dmamap_sync(virtio_dmat(vsc), vr->vr_cmdsts,
    959 			0, sizeof(struct virtio_blk_req_hdr),
    960 			BUS_DMASYNC_PREWRITE);
    961 	bus_dmamap_sync(virtio_dmat(vsc), vr->vr_payload,
    962 			0, vr->vr_datas, BUS_DMASYNC_PREWRITE);
    963 	bus_dmamap_sync(virtio_dmat(vsc), vr->vr_cmdsts,
    964 			offsetof(struct virtio_blk_req, vr_status),
    965 			sizeof(uint8_t),
    966 			BUS_DMASYNC_PREREAD);
    967 
    968 	virtio_enqueue_p(vsc, vq, slot, vr->vr_cmdsts,
    969 			 0, sizeof(struct virtio_blk_req_hdr),
    970 			 true);
    971 	virtio_enqueue(vsc, vq, slot, vr->vr_payload, true);
    972 	virtio_enqueue_p(vsc, vq, slot, vr->vr_cmdsts,
    973 			 offsetof(struct virtio_blk_req, vr_status),
    974 			 sizeof(uint8_t),
    975 			 false);
    976 	virtio_enqueue_commit(vsc, vq, slot, true);
    977 
    978 	return 0;
    979 }
    980 
    981 MODULE(MODULE_CLASS_DRIVER, ld_virtio, "ld,virtio");
    982 
    983 #ifdef _MODULE
    984 /*
    985  * XXX Don't allow ioconf.c to redefine the "struct cfdriver ld_cd"
    986  * XXX it will be defined in the common-code module
    987  */
    988 #undef  CFDRIVER_DECL
    989 #define CFDRIVER_DECL(name, class, attr)
    990 #include "ioconf.c"
    991 #endif
    992 
    993 static int
    994 ld_virtio_modcmd(modcmd_t cmd, void *opaque)
    995 {
    996 #ifdef _MODULE
    997 	/*
    998 	 * We ignore the cfdriver_vec[] that ioconf provides, since
    999 	 * the cfdrivers are attached already.
   1000 	 */
   1001 	static struct cfdriver * const no_cfdriver_vec[] = { NULL };
   1002 #endif
   1003 	int error = 0;
   1004 
   1005 #ifdef _MODULE
   1006 	switch (cmd) {
   1007 	case MODULE_CMD_INIT:
   1008 		error = config_init_component(no_cfdriver_vec,
   1009 		    cfattach_ioconf_ld_virtio, cfdata_ioconf_ld_virtio);
   1010 		break;
   1011 	case MODULE_CMD_FINI:
   1012 		error = config_fini_component(no_cfdriver_vec,
   1013 		    cfattach_ioconf_ld_virtio, cfdata_ioconf_ld_virtio);
   1014 		break;
   1015 	default:
   1016 		error = ENOTTY;
   1017 		break;
   1018 	}
   1019 #endif
   1020 
   1021 	return error;
   1022 }
   1023