Home | History | Annotate | Line # | Download | only in pci
virtio.c revision 1.63.2.5
      1 /*	$NetBSD: virtio.c,v 1.63.2.5 2023/06/03 14:40:25 martin Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 2020 The NetBSD Foundation, Inc.
      5  * Copyright (c) 2012 Stefan Fritsch, Alexander Fiveg.
      6  * Copyright (c) 2010 Minoura Makoto.
      7  * All rights reserved.
      8  *
      9  * Redistribution and use in source and binary forms, with or without
     10  * modification, are permitted provided that the following conditions
     11  * are met:
     12  * 1. Redistributions of source code must retain the above copyright
     13  *    notice, this list of conditions and the following disclaimer.
     14  * 2. Redistributions in binary form must reproduce the above copyright
     15  *    notice, this list of conditions and the following disclaimer in the
     16  *    documentation and/or other materials provided with the distribution.
     17  *
     18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     19  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     20  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     21  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
     22  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
     23  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     24  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     25  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     26  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
     27  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     28  */
     29 
     30 #include <sys/cdefs.h>
     31 __KERNEL_RCSID(0, "$NetBSD: virtio.c,v 1.63.2.5 2023/06/03 14:40:25 martin Exp $");
     32 
     33 #include <sys/param.h>
     34 #include <sys/systm.h>
     35 #include <sys/kernel.h>
     36 #include <sys/atomic.h>
     37 #include <sys/bus.h>
     38 #include <sys/device.h>
     39 #include <sys/kmem.h>
     40 #include <sys/module.h>
     41 
     42 #define VIRTIO_PRIVATE
     43 
     44 #include <dev/pci/virtioreg.h> /* XXX: move to non-pci */
     45 #include <dev/pci/virtiovar.h> /* XXX: move to non-pci */
     46 
     47 #define MINSEG_INDIRECT		2 /* use indirect if nsegs >= this value */
     48 
     49 /*
     50  * The maximum descriptor size is 2^15. Use that value as the end of
     51  * descriptor chain terminator since it will never be a valid index
     52  * in the descriptor table.
     53  */
     54 #define VRING_DESC_CHAIN_END		32768
     55 
     56 /* incomplete list */
     57 static const char *virtio_device_name[] = {
     58 	"unknown (0)",			/*  0 */
     59 	"network",			/*  1 */
     60 	"block",			/*  2 */
     61 	"console",			/*  3 */
     62 	"entropy",			/*  4 */
     63 	"memory balloon",		/*  5 */
     64 	"I/O memory",			/*  6 */
     65 	"remote processor messaging",	/*  7 */
     66 	"SCSI",				/*  8 */
     67 	"9P transport",			/*  9 */
     68 };
     69 #define NDEVNAMES	__arraycount(virtio_device_name)
     70 
     71 static void	virtio_reset_vq(struct virtio_softc *,
     72 		    struct virtqueue *);
     73 
     74 void
     75 virtio_set_status(struct virtio_softc *sc, int status)
     76 {
     77 	sc->sc_ops->set_status(sc, status);
     78 }
     79 
     80 /*
     81  * Reset the device.
     82  */
     83 /*
     84  * To reset the device to a known state, do following:
     85  *	virtio_reset(sc);	     // this will stop the device activity
     86  *	<dequeue finished requests>; // virtio_dequeue() still can be called
     87  *	<revoke pending requests in the vqs if any>;
     88  *	virtio_reinit_start(sc);     // dequeue prohibitted
     89  *	newfeatures = virtio_negotiate_features(sc, requestedfeatures);
     90  *	<some other initialization>;
     91  *	virtio_reinit_end(sc);	     // device activated; enqueue allowed
     92  * Once attached, feature negotiation can only be allowed after virtio_reset.
     93  */
     94 void
     95 virtio_reset(struct virtio_softc *sc)
     96 {
     97 	virtio_device_reset(sc);
     98 }
     99 
    100 int
    101 virtio_reinit_start(struct virtio_softc *sc)
    102 {
    103 	int i, r;
    104 
    105 	virtio_set_status(sc, VIRTIO_CONFIG_DEVICE_STATUS_ACK);
    106 	virtio_set_status(sc, VIRTIO_CONFIG_DEVICE_STATUS_DRIVER);
    107 	for (i = 0; i < sc->sc_nvqs; i++) {
    108 		int n;
    109 		struct virtqueue *vq = &sc->sc_vqs[i];
    110 		n = sc->sc_ops->read_queue_size(sc, vq->vq_index);
    111 		if (n == 0)	/* vq disappeared */
    112 			continue;
    113 		if (n != vq->vq_num) {
    114 			panic("%s: virtqueue size changed, vq index %d\n",
    115 			    device_xname(sc->sc_dev),
    116 			    vq->vq_index);
    117 		}
    118 		virtio_reset_vq(sc, vq);
    119 		sc->sc_ops->setup_queue(sc, vq->vq_index,
    120 		    vq->vq_dmamap->dm_segs[0].ds_addr);
    121 	}
    122 
    123 	r = sc->sc_ops->setup_interrupts(sc, 1);
    124 	if (r != 0)
    125 		goto fail;
    126 
    127 	return 0;
    128 
    129 fail:
    130 	virtio_set_status(sc, VIRTIO_CONFIG_DEVICE_STATUS_FAILED);
    131 
    132 	return 1;
    133 }
    134 
    135 void
    136 virtio_reinit_end(struct virtio_softc *sc)
    137 {
    138 	virtio_set_status(sc, VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK);
    139 }
    140 
    141 /*
    142  * Feature negotiation.
    143  */
    144 void
    145 virtio_negotiate_features(struct virtio_softc *sc, uint64_t guest_features)
    146 {
    147 	if (!(device_cfdata(sc->sc_dev)->cf_flags & 1) &&
    148 	    !(device_cfdata(sc->sc_child)->cf_flags & 1)) /* XXX */
    149 		guest_features |= VIRTIO_F_RING_INDIRECT_DESC;
    150 	sc->sc_ops->neg_features(sc, guest_features);
    151 	if (sc->sc_active_features & VIRTIO_F_RING_INDIRECT_DESC)
    152 		sc->sc_indirect = true;
    153 	else
    154 		sc->sc_indirect = false;
    155 }
    156 
    157 
    158 /*
    159  * Device configuration registers readers/writers
    160  */
    161 #if 0
    162 #define DPRINTFR(n, fmt, val, index, num) \
    163 	printf("\n%s (", n); \
    164 	for (int i = 0; i < num; i++) \
    165 		printf("%02x ", bus_space_read_1(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, index+i)); \
    166 	printf(") -> "); printf(fmt, val); printf("\n");
    167 #define DPRINTFR2(n, fmt, val_s, val_n) \
    168 	printf("%s ", n); \
    169 	printf("\n        stream "); printf(fmt, val_s); printf(" norm "); printf(fmt, val_n); printf("\n");
    170 #else
    171 #define DPRINTFR(n, fmt, val, index, num)
    172 #define DPRINTFR2(n, fmt, val_s, val_n)
    173 #endif
    174 
    175 
    176 uint8_t
    177 virtio_read_device_config_1(struct virtio_softc *sc, int index)
    178 {
    179 	bus_space_tag_t	   iot = sc->sc_devcfg_iot;
    180 	bus_space_handle_t ioh = sc->sc_devcfg_ioh;
    181 	uint8_t val;
    182 
    183 	val = bus_space_read_1(iot, ioh, index);
    184 
    185 	DPRINTFR("read_1", "%02x", val, index, 1);
    186 	return val;
    187 }
    188 
    189 uint16_t
    190 virtio_read_device_config_2(struct virtio_softc *sc, int index)
    191 {
    192 	bus_space_tag_t	   iot = sc->sc_devcfg_iot;
    193 	bus_space_handle_t ioh = sc->sc_devcfg_ioh;
    194 	uint16_t val;
    195 
    196 	val = bus_space_read_2(iot, ioh, index);
    197 	if (BYTE_ORDER != sc->sc_bus_endian)
    198 		val = bswap16(val);
    199 
    200 	DPRINTFR("read_2", "%04x", val, index, 2);
    201 	DPRINTFR2("read_2", "%04x",
    202 	    bus_space_read_stream_2(sc->sc_devcfg_iot, sc->sc_devcfg_ioh,
    203 		index),
    204 	    bus_space_read_2(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, index));
    205 	return val;
    206 }
    207 
    208 uint32_t
    209 virtio_read_device_config_4(struct virtio_softc *sc, int index)
    210 {
    211 	bus_space_tag_t	   iot = sc->sc_devcfg_iot;
    212 	bus_space_handle_t ioh = sc->sc_devcfg_ioh;
    213 	uint32_t val;
    214 
    215 	val = bus_space_read_4(iot, ioh, index);
    216 	if (BYTE_ORDER != sc->sc_bus_endian)
    217 		val = bswap32(val);
    218 
    219 	DPRINTFR("read_4", "%08x", val, index, 4);
    220 	DPRINTFR2("read_4", "%08x",
    221 	    bus_space_read_stream_4(sc->sc_devcfg_iot, sc->sc_devcfg_ioh,
    222 		index),
    223 	    bus_space_read_4(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, index));
    224 	return val;
    225 }
    226 
    227 /*
    228  * The Virtio spec explicitly tells that reading and writing 8 bytes are not
    229  * considered atomic and no triggers may be connected to reading or writing
    230  * it. We access it using two 32 reads. See virtio spec 4.1.3.1.
    231  */
    232 uint64_t
    233 virtio_read_device_config_8(struct virtio_softc *sc, int index)
    234 {
    235 	bus_space_tag_t	   iot = sc->sc_devcfg_iot;
    236 	bus_space_handle_t ioh = sc->sc_devcfg_ioh;
    237 	union {
    238 		uint64_t u64;
    239 		uint32_t l[2];
    240 	} v;
    241 	uint64_t val;
    242 
    243 	v.l[0] = bus_space_read_4(iot, ioh, index);
    244 	v.l[1] = bus_space_read_4(iot, ioh, index + 4);
    245 	if (sc->sc_bus_endian != sc->sc_struct_endian) {
    246 		v.l[0] = bswap32(v.l[0]);
    247 		v.l[1] = bswap32(v.l[1]);
    248 	}
    249 	val = v.u64;
    250 
    251 	if (BYTE_ORDER != sc->sc_struct_endian)
    252 		val = bswap64(val);
    253 
    254 	DPRINTFR("read_8", "%08"PRIx64, val, index, 8);
    255 	DPRINTFR2("read_8 low ", "%08x",
    256 	    bus_space_read_stream_4(sc->sc_devcfg_iot, sc->sc_devcfg_ioh,
    257 		index),
    258 	    bus_space_read_4(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, index));
    259 	DPRINTFR2("read_8 high ", "%08x",
    260 	    bus_space_read_stream_4(sc->sc_devcfg_iot, sc->sc_devcfg_ioh,
    261 		index + 4),
    262 	    bus_space_read_4(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, index + 4));
    263 	return val;
    264 }
    265 
    266 /*
    267  * In the older virtio spec, device config registers are host endian. On newer
    268  * they are little endian. Some newer devices however explicitly specify their
    269  * register to always be little endian. These functions cater for these.
    270  */
    271 uint16_t
    272 virtio_read_device_config_le_2(struct virtio_softc *sc, int index)
    273 {
    274 	bus_space_tag_t	   iot = sc->sc_devcfg_iot;
    275 	bus_space_handle_t ioh = sc->sc_devcfg_ioh;
    276 	uint16_t val;
    277 
    278 	val = bus_space_read_2(iot, ioh, index);
    279 	if (sc->sc_bus_endian != LITTLE_ENDIAN)
    280 		val = bswap16(val);
    281 
    282 	DPRINTFR("read_le_2", "%04x", val, index, 2);
    283 	DPRINTFR2("read_le_2", "%04x",
    284 	    bus_space_read_stream_2(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, 0),
    285 	    bus_space_read_2(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, 0));
    286 	return val;
    287 }
    288 
    289 uint32_t
    290 virtio_read_device_config_le_4(struct virtio_softc *sc, int index)
    291 {
    292 	bus_space_tag_t	   iot = sc->sc_devcfg_iot;
    293 	bus_space_handle_t ioh = sc->sc_devcfg_ioh;
    294 	uint32_t val;
    295 
    296 	val = bus_space_read_4(iot, ioh, index);
    297 	if (sc->sc_bus_endian != LITTLE_ENDIAN)
    298 		val = bswap32(val);
    299 
    300 	DPRINTFR("read_le_4", "%08x", val, index, 4);
    301 	DPRINTFR2("read_le_4", "%08x",
    302 	    bus_space_read_stream_4(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, 0),
    303 	    bus_space_read_4(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, 0));
    304 	return val;
    305 }
    306 
    307 void
    308 virtio_write_device_config_1(struct virtio_softc *sc, int index, uint8_t value)
    309 {
    310 	bus_space_tag_t	   iot = sc->sc_devcfg_iot;
    311 	bus_space_handle_t ioh = sc->sc_devcfg_ioh;
    312 
    313 	bus_space_write_1(iot, ioh, index, value);
    314 }
    315 
    316 void
    317 virtio_write_device_config_2(struct virtio_softc *sc, int index,
    318     uint16_t value)
    319 {
    320 	bus_space_tag_t	   iot = sc->sc_devcfg_iot;
    321 	bus_space_handle_t ioh = sc->sc_devcfg_ioh;
    322 
    323 	if (BYTE_ORDER != sc->sc_bus_endian)
    324 		value = bswap16(value);
    325 	bus_space_write_2(iot, ioh, index, value);
    326 }
    327 
    328 void
    329 virtio_write_device_config_4(struct virtio_softc *sc, int index,
    330     uint32_t value)
    331 {
    332 	bus_space_tag_t	   iot = sc->sc_devcfg_iot;
    333 	bus_space_handle_t ioh = sc->sc_devcfg_ioh;
    334 
    335 	if (BYTE_ORDER != sc->sc_bus_endian)
    336 		value = bswap32(value);
    337 	bus_space_write_4(iot, ioh, index, value);
    338 }
    339 
    340 /*
    341  * The Virtio spec explicitly tells that reading and writing 8 bytes are not
    342  * considered atomic and no triggers may be connected to reading or writing
    343  * it. We access it using two 32 bit writes. For good measure it is stated to
    344  * always write lsb first just in case of a hypervisor bug. See See virtio
    345  * spec 4.1.3.1.
    346  */
    347 void
    348 virtio_write_device_config_8(struct virtio_softc *sc, int index,
    349     uint64_t value)
    350 {
    351 	bus_space_tag_t	   iot = sc->sc_devcfg_iot;
    352 	bus_space_handle_t ioh = sc->sc_devcfg_ioh;
    353 	union {
    354 		uint64_t u64;
    355 		uint32_t l[2];
    356 	} v;
    357 
    358 	if (BYTE_ORDER != sc->sc_struct_endian)
    359 		value = bswap64(value);
    360 
    361 	v.u64 = value;
    362 	if (sc->sc_bus_endian != sc->sc_struct_endian) {
    363 		v.l[0] = bswap32(v.l[0]);
    364 		v.l[1] = bswap32(v.l[1]);
    365 	}
    366 
    367 	if (sc->sc_struct_endian == LITTLE_ENDIAN) {
    368 		bus_space_write_4(iot, ioh, index,     v.l[0]);
    369 		bus_space_write_4(iot, ioh, index + 4, v.l[1]);
    370 	} else {
    371 		bus_space_write_4(iot, ioh, index + 4, v.l[1]);
    372 		bus_space_write_4(iot, ioh, index,     v.l[0]);
    373 	}
    374 }
    375 
    376 /*
    377  * In the older virtio spec, device config registers are host endian. On newer
    378  * they are little endian. Some newer devices however explicitly specify their
    379  * register to always be little endian. These functions cater for these.
    380  */
    381 void
    382 virtio_write_device_config_le_2(struct virtio_softc *sc, int index,
    383     uint16_t value)
    384 {
    385 	bus_space_tag_t	   iot = sc->sc_devcfg_iot;
    386 	bus_space_handle_t ioh = sc->sc_devcfg_ioh;
    387 
    388 	if (sc->sc_bus_endian != LITTLE_ENDIAN)
    389 		value = bswap16(value);
    390 	bus_space_write_2(iot, ioh, index, value);
    391 }
    392 
    393 void
    394 virtio_write_device_config_le_4(struct virtio_softc *sc, int index,
    395     uint32_t value)
    396 {
    397 	bus_space_tag_t	   iot = sc->sc_devcfg_iot;
    398 	bus_space_handle_t ioh = sc->sc_devcfg_ioh;
    399 
    400 	if (sc->sc_bus_endian != LITTLE_ENDIAN)
    401 		value = bswap32(value);
    402 	bus_space_write_4(iot, ioh, index, value);
    403 }
    404 
    405 
    406 /*
    407  * data structures endian helpers
    408  */
    409 uint16_t
    410 virtio_rw16(struct virtio_softc *sc, uint16_t val)
    411 {
    412 	KASSERT(sc);
    413 	return BYTE_ORDER != sc->sc_struct_endian ? bswap16(val) : val;
    414 }
    415 
    416 uint32_t
    417 virtio_rw32(struct virtio_softc *sc, uint32_t val)
    418 {
    419 	KASSERT(sc);
    420 	return BYTE_ORDER != sc->sc_struct_endian ? bswap32(val) : val;
    421 }
    422 
    423 uint64_t
    424 virtio_rw64(struct virtio_softc *sc, uint64_t val)
    425 {
    426 	KASSERT(sc);
    427 	return BYTE_ORDER != sc->sc_struct_endian ? bswap64(val) : val;
    428 }
    429 
    430 
    431 /*
    432  * Interrupt handler.
    433  */
    434 static void
    435 virtio_soft_intr(void *arg)
    436 {
    437 	struct virtio_softc *sc = arg;
    438 
    439 	KASSERT(sc->sc_intrhand != NULL);
    440 
    441 	(*sc->sc_intrhand)(sc);
    442 }
    443 
    444 /* set to vq->vq_intrhand in virtio_init_vq_vqdone() */
    445 static int
    446 virtio_vq_done(void *xvq)
    447 {
    448 	struct virtqueue *vq = xvq;
    449 
    450 	return vq->vq_done(vq);
    451 }
    452 
    453 static int
    454 virtio_vq_intr(struct virtio_softc *sc)
    455 {
    456 	struct virtqueue *vq;
    457 	int i, r = 0;
    458 
    459 	for (i = 0; i < sc->sc_nvqs; i++) {
    460 		vq = &sc->sc_vqs[i];
    461 		if (virtio_vq_is_enqueued(sc, vq) == 1) {
    462 			r |= (*vq->vq_intrhand)(vq->vq_intrhand_arg);
    463 		}
    464 	}
    465 
    466 	return r;
    467 }
    468 
    469 /*
    470  * dmamap sync operations for a virtqueue.
    471  */
    472 static inline void
    473 vq_sync_descs(struct virtio_softc *sc, struct virtqueue *vq, int ops)
    474 {
    475 
    476 	/* availoffset == sizeof(vring_desc) * vq_num */
    477 	bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap, 0, vq->vq_availoffset,
    478 	    ops);
    479 }
    480 
    481 static inline void
    482 vq_sync_aring_all(struct virtio_softc *sc, struct virtqueue *vq, int ops)
    483 {
    484 	uint16_t hdrlen = offsetof(struct vring_avail, ring);
    485 	size_t payloadlen = vq->vq_num * sizeof(uint16_t);
    486 	size_t usedlen = 0;
    487 
    488 	if (sc->sc_active_features & VIRTIO_F_RING_EVENT_IDX)
    489 		usedlen = sizeof(uint16_t);
    490 	bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap,
    491 	    vq->vq_availoffset, hdrlen + payloadlen + usedlen, ops);
    492 }
    493 
    494 static inline void
    495 vq_sync_aring_header(struct virtio_softc *sc, struct virtqueue *vq, int ops)
    496 {
    497 	uint16_t hdrlen = offsetof(struct vring_avail, ring);
    498 
    499 	bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap,
    500 	    vq->vq_availoffset, hdrlen, ops);
    501 }
    502 
    503 static inline void
    504 vq_sync_aring_payload(struct virtio_softc *sc, struct virtqueue *vq, int ops)
    505 {
    506 	uint16_t hdrlen = offsetof(struct vring_avail, ring);
    507 	size_t payloadlen = vq->vq_num * sizeof(uint16_t);
    508 
    509 	bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap,
    510 	    vq->vq_availoffset + hdrlen, payloadlen, ops);
    511 }
    512 
    513 static inline void
    514 vq_sync_aring_used(struct virtio_softc *sc, struct virtqueue *vq, int ops)
    515 {
    516 	uint16_t hdrlen = offsetof(struct vring_avail, ring);
    517 	size_t payloadlen = vq->vq_num * sizeof(uint16_t);
    518 	size_t usedlen = sizeof(uint16_t);
    519 
    520 	if ((sc->sc_active_features & VIRTIO_F_RING_EVENT_IDX) == 0)
    521 		return;
    522 	bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap,
    523 	    vq->vq_availoffset + hdrlen + payloadlen, usedlen, ops);
    524 }
    525 
    526 static inline void
    527 vq_sync_uring_all(struct virtio_softc *sc, struct virtqueue *vq, int ops)
    528 {
    529 	uint16_t hdrlen = offsetof(struct vring_used, ring);
    530 	size_t payloadlen = vq->vq_num * sizeof(struct vring_used_elem);
    531 	size_t availlen = 0;
    532 
    533 	if (sc->sc_active_features & VIRTIO_F_RING_EVENT_IDX)
    534 		availlen = sizeof(uint16_t);
    535 	bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap,
    536 	    vq->vq_usedoffset, hdrlen + payloadlen + availlen, ops);
    537 }
    538 
    539 static inline void
    540 vq_sync_uring_header(struct virtio_softc *sc, struct virtqueue *vq, int ops)
    541 {
    542 	uint16_t hdrlen = offsetof(struct vring_used, ring);
    543 
    544 	bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap,
    545 	    vq->vq_usedoffset, hdrlen, ops);
    546 }
    547 
    548 static inline void
    549 vq_sync_uring_payload(struct virtio_softc *sc, struct virtqueue *vq, int ops)
    550 {
    551 	uint16_t hdrlen = offsetof(struct vring_used, ring);
    552 	size_t payloadlen = vq->vq_num * sizeof(struct vring_used_elem);
    553 
    554 	bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap,
    555 	    vq->vq_usedoffset + hdrlen, payloadlen, ops);
    556 }
    557 
    558 static inline void
    559 vq_sync_uring_avail(struct virtio_softc *sc, struct virtqueue *vq, int ops)
    560 {
    561 	uint16_t hdrlen = offsetof(struct vring_used, ring);
    562 	size_t payloadlen = vq->vq_num * sizeof(struct vring_used_elem);
    563 	size_t availlen = sizeof(uint16_t);
    564 
    565 	if ((sc->sc_active_features & VIRTIO_F_RING_EVENT_IDX) == 0)
    566 		return;
    567 	bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap,
    568 	    vq->vq_usedoffset + hdrlen + payloadlen, availlen, ops);
    569 }
    570 
    571 static inline void
    572 vq_sync_indirect(struct virtio_softc *sc, struct virtqueue *vq, int slot,
    573     int ops)
    574 {
    575 	int offset = vq->vq_indirectoffset +
    576 	    sizeof(struct vring_desc) * vq->vq_maxnsegs * slot;
    577 
    578 	bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap,
    579 	    offset, sizeof(struct vring_desc) * vq->vq_maxnsegs, ops);
    580 }
    581 
    582 bool
    583 virtio_vq_is_enqueued(struct virtio_softc *sc, struct virtqueue *vq)
    584 {
    585 
    586 	if (vq->vq_queued) {
    587 		vq->vq_queued = 0;
    588 		vq_sync_aring_all(sc, vq, BUS_DMASYNC_POSTWRITE);
    589 	}
    590 
    591 	vq_sync_uring_header(sc, vq, BUS_DMASYNC_POSTREAD);
    592 	if (vq->vq_used_idx == virtio_rw16(sc, vq->vq_used->idx))
    593 		return 0;
    594 	vq_sync_uring_payload(sc, vq, BUS_DMASYNC_POSTREAD);
    595 	return 1;
    596 }
    597 
    598 /*
    599  * Increase the event index in order to delay interrupts.
    600  */
    601 int
    602 virtio_postpone_intr(struct virtio_softc *sc, struct virtqueue *vq,
    603     uint16_t nslots)
    604 {
    605 	uint16_t	idx, nused;
    606 
    607 	idx = vq->vq_used_idx + nslots;
    608 
    609 	/* set the new event index: avail_ring->used_event = idx */
    610 	*vq->vq_used_event = virtio_rw16(sc, idx);
    611 	vq_sync_aring_used(vq->vq_owner, vq, BUS_DMASYNC_PREWRITE);
    612 	vq->vq_queued++;
    613 
    614 	nused = (uint16_t)
    615 	    (virtio_rw16(sc, vq->vq_used->idx) - vq->vq_used_idx);
    616 	KASSERT(nused <= vq->vq_num);
    617 
    618 	return nslots < nused;
    619 }
    620 
    621 /*
    622  * Postpone interrupt until 3/4 of the available descriptors have been
    623  * consumed.
    624  */
    625 int
    626 virtio_postpone_intr_smart(struct virtio_softc *sc, struct virtqueue *vq)
    627 {
    628 	uint16_t	nslots;
    629 
    630 	nslots = (uint16_t)
    631 	    (virtio_rw16(sc, vq->vq_avail->idx) - vq->vq_used_idx) * 3 / 4;
    632 
    633 	return virtio_postpone_intr(sc, vq, nslots);
    634 }
    635 
    636 /*
    637  * Postpone interrupt until all of the available descriptors have been
    638  * consumed.
    639  */
    640 int
    641 virtio_postpone_intr_far(struct virtio_softc *sc, struct virtqueue *vq)
    642 {
    643 	uint16_t	nslots;
    644 
    645 	nslots = (uint16_t)
    646 	    (virtio_rw16(sc, vq->vq_avail->idx) - vq->vq_used_idx);
    647 
    648 	return virtio_postpone_intr(sc, vq, nslots);
    649 }
    650 
    651 /*
    652  * Start/stop vq interrupt.  No guarantee.
    653  */
    654 void
    655 virtio_stop_vq_intr(struct virtio_softc *sc, struct virtqueue *vq)
    656 {
    657 
    658 	if (sc->sc_active_features & VIRTIO_F_RING_EVENT_IDX) {
    659 		/*
    660 		 * No way to disable the interrupt completely with
    661 		 * RingEventIdx. Instead advance used_event by half the
    662 		 * possible value. This won't happen soon and is far enough in
    663 		 * the past to not trigger a spurios interrupt.
    664 		 */
    665 		*vq->vq_used_event = virtio_rw16(sc, vq->vq_used_idx + 0x8000);
    666 		vq_sync_aring_used(sc, vq, BUS_DMASYNC_PREWRITE);
    667 	} else {
    668 		vq->vq_avail->flags |=
    669 		    virtio_rw16(sc, VRING_AVAIL_F_NO_INTERRUPT);
    670 		vq_sync_aring_header(sc, vq, BUS_DMASYNC_PREWRITE);
    671 	}
    672 	vq->vq_queued++;
    673 }
    674 
    675 int
    676 virtio_start_vq_intr(struct virtio_softc *sc, struct virtqueue *vq)
    677 {
    678 
    679 	if (sc->sc_active_features & VIRTIO_F_RING_EVENT_IDX) {
    680 		/*
    681 		 * If event index feature is negotiated, enabling interrupts
    682 		 * is done through setting the latest consumed index in the
    683 		 * used_event field
    684 		 */
    685 		*vq->vq_used_event = virtio_rw16(sc, vq->vq_used_idx);
    686 		vq_sync_aring_used(sc, vq, BUS_DMASYNC_PREWRITE);
    687 	} else {
    688 		vq->vq_avail->flags &=
    689 		    ~virtio_rw16(sc, VRING_AVAIL_F_NO_INTERRUPT);
    690 		vq_sync_aring_header(sc, vq, BUS_DMASYNC_PREWRITE);
    691 	}
    692 	vq->vq_queued++;
    693 
    694 	vq_sync_uring_header(sc, vq, BUS_DMASYNC_POSTREAD);
    695 	if (vq->vq_used_idx == virtio_rw16(sc, vq->vq_used->idx))
    696 		return 0;
    697 	vq_sync_uring_payload(sc, vq, BUS_DMASYNC_POSTREAD);
    698 	return 1;
    699 }
    700 
    701 /*
    702  * Initialize vq structure.
    703  */
    704 /*
    705  * Reset virtqueue parameters
    706  */
    707 static void
    708 virtio_reset_vq(struct virtio_softc *sc, struct virtqueue *vq)
    709 {
    710 	struct vring_desc *vds;
    711 	int i, j;
    712 	int vq_size = vq->vq_num;
    713 
    714 	memset(vq->vq_vaddr, 0, vq->vq_bytesize);
    715 
    716 	/* build the descriptor chain for free slot management */
    717 	vds = vq->vq_desc;
    718 	for (i = 0; i < vq_size - 1; i++) {
    719 		vds[i].next = virtio_rw16(sc, i + 1);
    720 	}
    721 	vds[i].next = virtio_rw16(sc, VRING_DESC_CHAIN_END);
    722 	vq->vq_free_idx = 0;
    723 
    724 	/* build the indirect descriptor chain */
    725 	if (vq->vq_indirect != NULL) {
    726 		struct vring_desc *vd;
    727 
    728 		for (i = 0; i < vq_size; i++) {
    729 			vd = vq->vq_indirect;
    730 			vd += vq->vq_maxnsegs * i;
    731 			for (j = 0; j < vq->vq_maxnsegs - 1; j++) {
    732 				vd[j].next = virtio_rw16(sc, j + 1);
    733 			}
    734 		}
    735 	}
    736 
    737 	/* enqueue/dequeue status */
    738 	vq->vq_avail_idx = 0;
    739 	vq->vq_used_idx = 0;
    740 	vq->vq_queued = 0;
    741 	vq_sync_uring_all(sc, vq, BUS_DMASYNC_PREREAD);
    742 	vq->vq_queued++;
    743 }
    744 
    745 /* Initialize vq */
    746 void
    747 virtio_init_vq_vqdone(struct virtio_softc *sc, struct virtqueue *vq,
    748     int index, int (*vq_done)(struct virtqueue *))
    749 {
    750 
    751 	virtio_init_vq(sc, vq, index, virtio_vq_done, vq);
    752 	vq->vq_done = vq_done;
    753 }
    754 
    755 void
    756 virtio_init_vq(struct virtio_softc *sc, struct virtqueue *vq, int index,
    757    int (*func)(void *), void *arg)
    758 {
    759 
    760 	memset(vq, 0, sizeof(*vq));
    761 
    762 	vq->vq_owner = sc;
    763 	vq->vq_num = sc->sc_ops->read_queue_size(sc, index);
    764 	vq->vq_index = index;
    765 	vq->vq_intrhand = func;
    766 	vq->vq_intrhand_arg = arg;
    767 }
    768 
    769 /*
    770  * Allocate/free a vq.
    771  */
    772 int
    773 virtio_alloc_vq(struct virtio_softc *sc, struct virtqueue *vq,
    774     int maxsegsize, int maxnsegs, const char *name)
    775 {
    776 	bus_size_t size_desc, size_avail, size_used, size_indirect;
    777 	bus_size_t allocsize = 0, size_desc_avail;
    778 	int rsegs, r, hdrlen;
    779 	unsigned int vq_num;
    780 #define VIRTQUEUE_ALIGN(n)	roundup(n, VIRTIO_PAGE_SIZE)
    781 
    782 	vq_num = vq->vq_num;
    783 
    784 	if (vq_num == 0) {
    785 		aprint_error_dev(sc->sc_dev,
    786 		    "virtqueue not exist, index %d for %s\n",
    787 		    vq->vq_index, name);
    788 		goto err;
    789 	}
    790 
    791 	hdrlen = sc->sc_active_features & VIRTIO_F_RING_EVENT_IDX ? 3 : 2;
    792 
    793 	size_desc = sizeof(vq->vq_desc[0]) * vq_num;
    794 	size_avail = sizeof(uint16_t) * hdrlen
    795 	    + sizeof(vq->vq_avail[0].ring[0]) * vq_num;
    796 	size_used = sizeof(uint16_t) *hdrlen
    797 	    + sizeof(vq->vq_used[0].ring[0]) * vq_num;
    798 	size_indirect = (sc->sc_indirect && maxnsegs >= MINSEG_INDIRECT) ?
    799 	    sizeof(struct vring_desc) * maxnsegs * vq_num : 0;
    800 
    801 	size_desc_avail = VIRTQUEUE_ALIGN(size_desc + size_avail);
    802 	size_used = VIRTQUEUE_ALIGN(size_used);
    803 
    804 	allocsize = size_desc_avail + size_used + size_indirect;
    805 
    806 	/* alloc and map the memory */
    807 	r = bus_dmamem_alloc(sc->sc_dmat, allocsize, VIRTIO_PAGE_SIZE, 0,
    808 	    &vq->vq_segs[0], 1, &rsegs, BUS_DMA_WAITOK);
    809 	if (r != 0) {
    810 		aprint_error_dev(sc->sc_dev,
    811 		    "virtqueue %d for %s allocation failed, "
    812 		    "error code %d\n", vq->vq_index, name, r);
    813 		goto err;
    814 	}
    815 
    816 	r = bus_dmamem_map(sc->sc_dmat, &vq->vq_segs[0], rsegs, allocsize,
    817 	    &vq->vq_vaddr, BUS_DMA_WAITOK);
    818 	if (r != 0) {
    819 		aprint_error_dev(sc->sc_dev,
    820 		    "virtqueue %d for %s map failed, "
    821 		    "error code %d\n", vq->vq_index, name, r);
    822 		goto err;
    823 	}
    824 
    825 	r = bus_dmamap_create(sc->sc_dmat, allocsize, 1, allocsize, 0,
    826 	    BUS_DMA_WAITOK, &vq->vq_dmamap);
    827 	if (r != 0) {
    828 		aprint_error_dev(sc->sc_dev,
    829 		    "virtqueue %d for %s dmamap creation failed, "
    830 		    "error code %d\n", vq->vq_index, name, r);
    831 		goto err;
    832 	}
    833 
    834 	r = bus_dmamap_load(sc->sc_dmat, vq->vq_dmamap,
    835 	    vq->vq_vaddr, allocsize, NULL, BUS_DMA_WAITOK);
    836 	if (r != 0) {
    837 		aprint_error_dev(sc->sc_dev,
    838 		    "virtqueue %d for %s dmamap load failed, "
    839 		    "error code %d\n", vq->vq_index, name, r);
    840 		goto err;
    841 	}
    842 
    843 	vq->vq_bytesize = allocsize;
    844 	vq->vq_maxsegsize = maxsegsize;
    845 	vq->vq_maxnsegs = maxnsegs;
    846 
    847 #define VIRTIO_PTR(base, offset)	(void *)((intptr_t)(base) + (offset))
    848 	/* initialize vring pointers */
    849 	vq->vq_desc = VIRTIO_PTR(vq->vq_vaddr, 0);
    850 	vq->vq_availoffset = size_desc;
    851 	vq->vq_avail = VIRTIO_PTR(vq->vq_vaddr, vq->vq_availoffset);
    852 	vq->vq_used_event = VIRTIO_PTR(vq->vq_avail,
    853 	    offsetof(struct vring_avail, ring[vq_num]));
    854 	vq->vq_usedoffset = size_desc_avail;
    855 	vq->vq_used = VIRTIO_PTR(vq->vq_vaddr, vq->vq_usedoffset);
    856 	vq->vq_avail_event = VIRTIO_PTR(vq->vq_used,
    857 	    offsetof(struct vring_used, ring[vq_num]));
    858 
    859 	if (size_indirect > 0) {
    860 		vq->vq_indirectoffset = size_desc_avail + size_used;
    861 		vq->vq_indirect = VIRTIO_PTR(vq->vq_vaddr,
    862 		    vq->vq_indirectoffset);
    863 	}
    864 #undef VIRTIO_PTR
    865 
    866 	vq->vq_descx = kmem_zalloc(sizeof(vq->vq_descx[0]) * vq_num,
    867 	    KM_SLEEP);
    868 
    869 	mutex_init(&vq->vq_freedesc_lock, MUTEX_SPIN, sc->sc_ipl);
    870 	mutex_init(&vq->vq_aring_lock, MUTEX_SPIN, sc->sc_ipl);
    871 	mutex_init(&vq->vq_uring_lock, MUTEX_SPIN, sc->sc_ipl);
    872 
    873 	virtio_reset_vq(sc, vq);
    874 
    875 	aprint_verbose_dev(sc->sc_dev,
    876 	    "allocated %" PRIuBUSSIZE " byte for virtqueue %d for %s, "
    877 	    "size %d\n", allocsize, vq->vq_index, name, vq_num);
    878 	if (size_indirect > 0)
    879 		aprint_verbose_dev(sc->sc_dev,
    880 		    "using %" PRIuBUSSIZE " byte (%d entries) indirect "
    881 		    "descriptors\n", size_indirect, maxnsegs * vq_num);
    882 
    883 	return 0;
    884 
    885 err:
    886 	sc->sc_ops->setup_queue(sc, vq->vq_index, 0);
    887 	if (vq->vq_dmamap)
    888 		bus_dmamap_destroy(sc->sc_dmat, vq->vq_dmamap);
    889 	if (vq->vq_vaddr)
    890 		bus_dmamem_unmap(sc->sc_dmat, vq->vq_vaddr, allocsize);
    891 	if (vq->vq_segs[0].ds_addr)
    892 		bus_dmamem_free(sc->sc_dmat, &vq->vq_segs[0], 1);
    893 	memset(vq, 0, sizeof(*vq));
    894 
    895 	return -1;
    896 }
    897 
    898 int
    899 virtio_free_vq(struct virtio_softc *sc, struct virtqueue *vq)
    900 {
    901 	uint16_t s;
    902 	size_t i;
    903 
    904 	if (vq->vq_vaddr == NULL)
    905 		return 0;
    906 
    907 	/* device must be already deactivated */
    908 	/* confirm the vq is empty */
    909 	s = vq->vq_free_idx;
    910 	i = 0;
    911 	while (s != virtio_rw16(sc, VRING_DESC_CHAIN_END)) {
    912 		s = vq->vq_desc[s].next;
    913 		i++;
    914 	}
    915 	if (i != vq->vq_num) {
    916 		printf("%s: freeing non-empty vq, index %d\n",
    917 		    device_xname(sc->sc_dev), vq->vq_index);
    918 		return EBUSY;
    919 	}
    920 
    921 	/* tell device that there's no virtqueue any longer */
    922 	sc->sc_ops->setup_queue(sc, vq->vq_index, 0);
    923 
    924 	vq_sync_aring_all(sc, vq, BUS_DMASYNC_POSTWRITE);
    925 
    926 	kmem_free(vq->vq_descx, sizeof(vq->vq_descx[0]) * vq->vq_num);
    927 	bus_dmamap_unload(sc->sc_dmat, vq->vq_dmamap);
    928 	bus_dmamap_destroy(sc->sc_dmat, vq->vq_dmamap);
    929 	bus_dmamem_unmap(sc->sc_dmat, vq->vq_vaddr, vq->vq_bytesize);
    930 	bus_dmamem_free(sc->sc_dmat, &vq->vq_segs[0], 1);
    931 	mutex_destroy(&vq->vq_freedesc_lock);
    932 	mutex_destroy(&vq->vq_uring_lock);
    933 	mutex_destroy(&vq->vq_aring_lock);
    934 	memset(vq, 0, sizeof(*vq));
    935 
    936 	return 0;
    937 }
    938 
    939 /*
    940  * Free descriptor management.
    941  */
    942 static int
    943 vq_alloc_slot_locked(struct virtio_softc *sc, struct virtqueue *vq,
    944     size_t nslots)
    945 {
    946 	struct vring_desc *vd;
    947 	uint16_t head, tail;
    948 	size_t i;
    949 
    950 	KASSERT(mutex_owned(&vq->vq_freedesc_lock));
    951 
    952 	head = tail = virtio_rw16(sc, vq->vq_free_idx);
    953 	for (i = 0; i < nslots - 1; i++) {
    954 		if (tail == VRING_DESC_CHAIN_END)
    955 			return VRING_DESC_CHAIN_END;
    956 
    957 		vd = &vq->vq_desc[tail];
    958 		vd->flags = virtio_rw16(sc, VRING_DESC_F_NEXT);
    959 		tail = virtio_rw16(sc, vd->next);
    960 	}
    961 
    962 	if (tail == VRING_DESC_CHAIN_END)
    963 		return VRING_DESC_CHAIN_END;
    964 
    965 	vd = &vq->vq_desc[tail];
    966 	vd->flags = virtio_rw16(sc, 0);
    967 	vq->vq_free_idx = vd->next;
    968 
    969 	return head;
    970 }
    971 static uint16_t
    972 vq_alloc_slot(struct virtio_softc *sc, struct virtqueue *vq, size_t nslots)
    973 {
    974 	uint16_t rv;
    975 
    976 	mutex_enter(&vq->vq_freedesc_lock);
    977 	rv = vq_alloc_slot_locked(sc, vq, nslots);
    978 	mutex_exit(&vq->vq_freedesc_lock);
    979 
    980 	return rv;
    981 }
    982 
    983 static void
    984 vq_free_slot(struct virtio_softc *sc, struct virtqueue *vq, uint16_t slot)
    985 {
    986 	struct vring_desc *vd;
    987 	uint16_t s;
    988 
    989 	mutex_enter(&vq->vq_freedesc_lock);
    990 	vd = &vq->vq_desc[slot];
    991 	while ((vd->flags & virtio_rw16(sc, VRING_DESC_F_NEXT)) != 0) {
    992 		s = virtio_rw16(sc, vd->next);
    993 		vd = &vq->vq_desc[s];
    994 	}
    995 	vd->next = vq->vq_free_idx;
    996 	vq->vq_free_idx = virtio_rw16(sc, slot);
    997 	mutex_exit(&vq->vq_freedesc_lock);
    998 }
    999 
   1000 /*
   1001  * Enqueue several dmamaps as a single request.
   1002  */
   1003 /*
   1004  * Typical usage:
   1005  *  <queue size> number of followings are stored in arrays
   1006  *  - command blocks (in dmamem) should be pre-allocated and mapped
   1007  *  - dmamaps for command blocks should be pre-allocated and loaded
   1008  *  - dmamaps for payload should be pre-allocated
   1009  *      r = virtio_enqueue_prep(sc, vq, &slot);		// allocate a slot
   1010  *	if (r)		// currently 0 or EAGAIN
   1011  *		return r;
   1012  *	r = bus_dmamap_load(dmat, dmamap_payload[slot], data, count, ..);
   1013  *	if (r) {
   1014  *		virtio_enqueue_abort(sc, vq, slot);
   1015  *		return r;
   1016  *	}
   1017  *	r = virtio_enqueue_reserve(sc, vq, slot,
   1018  *	    dmamap_payload[slot]->dm_nsegs + 1);
   1019  *							// ^ +1 for command
   1020  *	if (r) {	// currently 0 or EAGAIN
   1021  *		bus_dmamap_unload(dmat, dmamap_payload[slot]);
   1022  *		return r;				// do not call abort()
   1023  *	}
   1024  *	<setup and prepare commands>
   1025  *	bus_dmamap_sync(dmat, dmamap_cmd[slot],... BUS_DMASYNC_PREWRITE);
   1026  *	bus_dmamap_sync(dmat, dmamap_payload[slot],...);
   1027  *	virtio_enqueue(sc, vq, slot, dmamap_cmd[slot], false);
   1028  *	virtio_enqueue(sc, vq, slot, dmamap_payload[slot], iswrite);
   1029  *	virtio_enqueue_commit(sc, vq, slot, true);
   1030  */
   1031 
   1032 /*
   1033  * enqueue_prep: allocate a slot number
   1034  */
   1035 int
   1036 virtio_enqueue_prep(struct virtio_softc *sc, struct virtqueue *vq, int *slotp)
   1037 {
   1038 	uint16_t slot;
   1039 
   1040 	KASSERT(slotp != NULL);
   1041 
   1042 	slot = vq_alloc_slot(sc, vq, 1);
   1043 	if (slot == VRING_DESC_CHAIN_END)
   1044 		return EAGAIN;
   1045 
   1046 	*slotp = slot;
   1047 
   1048 	return 0;
   1049 }
   1050 
   1051 /*
   1052  * enqueue_reserve: allocate remaining slots and build the descriptor chain.
   1053  */
   1054 int
   1055 virtio_enqueue_reserve(struct virtio_softc *sc, struct virtqueue *vq,
   1056     int slot, int nsegs)
   1057 {
   1058 	struct vring_desc *vd;
   1059 	struct vring_desc_extra *vdx;
   1060 	int i;
   1061 
   1062 	KASSERT(1 <= nsegs && nsegs <= vq->vq_num);
   1063 
   1064 	vdx = &vq->vq_descx[slot];
   1065 	vd = &vq->vq_desc[slot];
   1066 
   1067 	KASSERT((vd->flags & virtio_rw16(sc, VRING_DESC_F_NEXT)) == 0);
   1068 
   1069 	if ((vq->vq_indirect != NULL) &&
   1070 	    (nsegs >= MINSEG_INDIRECT) &&
   1071 	    (nsegs <= vq->vq_maxnsegs))
   1072 		vdx->use_indirect = true;
   1073 	else
   1074 		vdx->use_indirect = false;
   1075 
   1076 	if (vdx->use_indirect) {
   1077 		uint64_t addr;
   1078 
   1079 		addr = vq->vq_dmamap->dm_segs[0].ds_addr
   1080 		    + vq->vq_indirectoffset;
   1081 		addr += sizeof(struct vring_desc)
   1082 		    * vq->vq_maxnsegs * slot;
   1083 
   1084 		vd->addr  = virtio_rw64(sc, addr);
   1085 		vd->len   = virtio_rw32(sc, sizeof(struct vring_desc) * nsegs);
   1086 		vd->flags = virtio_rw16(sc, VRING_DESC_F_INDIRECT);
   1087 
   1088 		vd = &vq->vq_indirect[vq->vq_maxnsegs * slot];
   1089 		vdx->desc_base = vd;
   1090 		vdx->desc_free_idx = 0;
   1091 
   1092 		for (i = 0; i < nsegs - 1; i++) {
   1093 			vd[i].flags = virtio_rw16(sc, VRING_DESC_F_NEXT);
   1094 		}
   1095 		vd[i].flags  = virtio_rw16(sc, 0);
   1096 	} else {
   1097 		if (nsegs > 1) {
   1098 			uint16_t s;
   1099 
   1100 			s = vq_alloc_slot(sc, vq, nsegs - 1);
   1101 			if (s == VRING_DESC_CHAIN_END) {
   1102 				vq_free_slot(sc, vq, slot);
   1103 				return EAGAIN;
   1104 			}
   1105 			vd->next = virtio_rw16(sc, s);
   1106 			vd->flags = virtio_rw16(sc, VRING_DESC_F_NEXT);
   1107 		}
   1108 
   1109 		vdx->desc_base = &vq->vq_desc[0];
   1110 		vdx->desc_free_idx = slot;
   1111 	}
   1112 
   1113 	return 0;
   1114 }
   1115 
   1116 /*
   1117  * enqueue: enqueue a single dmamap.
   1118  */
   1119 int
   1120 virtio_enqueue(struct virtio_softc *sc, struct virtqueue *vq, int slot,
   1121     bus_dmamap_t dmamap, bool write)
   1122 {
   1123 	struct vring_desc *vds;
   1124 	struct vring_desc_extra *vdx;
   1125 	uint16_t s;
   1126 	int i;
   1127 
   1128 	KASSERT(dmamap->dm_nsegs > 0);
   1129 
   1130 	vdx = &vq->vq_descx[slot];
   1131 	vds = vdx->desc_base;
   1132 	s = vdx->desc_free_idx;
   1133 
   1134 	KASSERT(vds != NULL);
   1135 
   1136 	for (i = 0; i < dmamap->dm_nsegs; i++) {
   1137 		KASSERT(s != VRING_DESC_CHAIN_END);
   1138 
   1139 		vds[s].addr = virtio_rw64(sc, dmamap->dm_segs[i].ds_addr);
   1140 		vds[s].len  = virtio_rw32(sc, dmamap->dm_segs[i].ds_len);
   1141 		if (!write)
   1142 			vds[s].flags |= virtio_rw16(sc, VRING_DESC_F_WRITE);
   1143 
   1144 		if ((vds[s].flags & virtio_rw16(sc, VRING_DESC_F_NEXT)) == 0) {
   1145 			s = VRING_DESC_CHAIN_END;
   1146 		} else {
   1147 			s = virtio_rw16(sc, vds[s].next);
   1148 		}
   1149 	}
   1150 
   1151 	vdx->desc_free_idx = s;
   1152 
   1153 	return 0;
   1154 }
   1155 
   1156 int
   1157 virtio_enqueue_p(struct virtio_softc *sc, struct virtqueue *vq, int slot,
   1158     bus_dmamap_t dmamap, bus_addr_t start, bus_size_t len,
   1159     bool write)
   1160 {
   1161 	struct vring_desc_extra *vdx;
   1162 	struct vring_desc *vds;
   1163 	uint16_t s;
   1164 
   1165 	vdx = &vq->vq_descx[slot];
   1166 	vds = vdx->desc_base;
   1167 	s = vdx->desc_free_idx;
   1168 
   1169 	KASSERT(s != VRING_DESC_CHAIN_END);
   1170 	KASSERT(vds != NULL);
   1171 	KASSERT(dmamap->dm_nsegs == 1); /* XXX */
   1172 	KASSERT(dmamap->dm_segs[0].ds_len > start);
   1173 	KASSERT(dmamap->dm_segs[0].ds_len >= start + len);
   1174 
   1175 	vds[s].addr = virtio_rw64(sc, dmamap->dm_segs[0].ds_addr + start);
   1176 	vds[s].len  = virtio_rw32(sc, len);
   1177 	if (!write)
   1178 		vds[s].flags |= virtio_rw16(sc, VRING_DESC_F_WRITE);
   1179 
   1180 	if ((vds[s].flags & virtio_rw16(sc, VRING_DESC_F_NEXT)) == 0) {
   1181 		s = VRING_DESC_CHAIN_END;
   1182 	} else {
   1183 		s = virtio_rw16(sc, vds[s].next);
   1184 	}
   1185 
   1186 	vdx->desc_free_idx = s;
   1187 
   1188 	return 0;
   1189 }
   1190 
   1191 /*
   1192  * enqueue_commit: add it to the aring.
   1193  */
   1194 int
   1195 virtio_enqueue_commit(struct virtio_softc *sc, struct virtqueue *vq, int slot,
   1196     bool notifynow)
   1197 {
   1198 
   1199 	if (slot < 0) {
   1200 		mutex_enter(&vq->vq_aring_lock);
   1201 		goto notify;
   1202 	}
   1203 
   1204 	vq_sync_descs(sc, vq, BUS_DMASYNC_PREWRITE);
   1205 	if (vq->vq_descx[slot].use_indirect)
   1206 		vq_sync_indirect(sc, vq, slot, BUS_DMASYNC_PREWRITE);
   1207 
   1208 	mutex_enter(&vq->vq_aring_lock);
   1209 	vq->vq_avail->ring[(vq->vq_avail_idx++) % vq->vq_num] =
   1210 	    virtio_rw16(sc, slot);
   1211 
   1212 notify:
   1213 	if (notifynow) {
   1214 		uint16_t o, n, t;
   1215 		uint16_t flags;
   1216 
   1217 		o = virtio_rw16(sc, vq->vq_avail->idx) - 1;
   1218 		n = vq->vq_avail_idx;
   1219 
   1220 		/*
   1221 		 * Prepare for `device->CPU' (host->guest) transfer
   1222 		 * into the buffer.  This must happen before we commit
   1223 		 * the vq->vq_avail->idx update to ensure we're not
   1224 		 * still using the buffer in case program-prior loads
   1225 		 * or stores in it get delayed past the store to
   1226 		 * vq->vq_avail->idx.
   1227 		 */
   1228 		vq_sync_uring_all(sc, vq, BUS_DMASYNC_PREREAD);
   1229 
   1230 		/* ensure payload is published, then avail idx */
   1231 		vq_sync_aring_payload(sc, vq, BUS_DMASYNC_PREWRITE);
   1232 		vq->vq_avail->idx = virtio_rw16(sc, vq->vq_avail_idx);
   1233 		vq_sync_aring_header(sc, vq, BUS_DMASYNC_PREWRITE);
   1234 		vq->vq_queued++;
   1235 
   1236 		if (sc->sc_active_features & VIRTIO_F_RING_EVENT_IDX) {
   1237 			vq_sync_uring_avail(sc, vq, BUS_DMASYNC_POSTREAD);
   1238 			t = virtio_rw16(sc, *vq->vq_avail_event) + 1;
   1239 			if ((uint16_t) (n - t) < (uint16_t) (n - o))
   1240 				sc->sc_ops->kick(sc, vq->vq_index);
   1241 		} else {
   1242 			vq_sync_uring_header(sc, vq, BUS_DMASYNC_POSTREAD);
   1243 			flags = virtio_rw16(sc, vq->vq_used->flags);
   1244 			if (!(flags & VRING_USED_F_NO_NOTIFY))
   1245 				sc->sc_ops->kick(sc, vq->vq_index);
   1246 		}
   1247 	}
   1248 	mutex_exit(&vq->vq_aring_lock);
   1249 
   1250 	return 0;
   1251 }
   1252 
   1253 /*
   1254  * enqueue_abort: rollback.
   1255  */
   1256 int
   1257 virtio_enqueue_abort(struct virtio_softc *sc, struct virtqueue *vq, int slot)
   1258 {
   1259 	struct vring_desc_extra *vdx;
   1260 
   1261 	vdx = &vq->vq_descx[slot];
   1262 	vdx->desc_free_idx = VRING_DESC_CHAIN_END;
   1263 	vdx->desc_base = NULL;
   1264 
   1265 	vq_free_slot(sc, vq, slot);
   1266 
   1267 	return 0;
   1268 }
   1269 
   1270 /*
   1271  * Dequeue a request.
   1272  */
   1273 /*
   1274  * dequeue: dequeue a request from uring; dmamap_sync for uring is
   1275  *	    already done in the interrupt handler.
   1276  */
   1277 int
   1278 virtio_dequeue(struct virtio_softc *sc, struct virtqueue *vq,
   1279     int *slotp, int *lenp)
   1280 {
   1281 	uint16_t slot, usedidx;
   1282 
   1283 	if (vq->vq_used_idx == virtio_rw16(sc, vq->vq_used->idx))
   1284 		return ENOENT;
   1285 	mutex_enter(&vq->vq_uring_lock);
   1286 	usedidx = vq->vq_used_idx++;
   1287 	mutex_exit(&vq->vq_uring_lock);
   1288 	usedidx %= vq->vq_num;
   1289 	slot = virtio_rw32(sc, vq->vq_used->ring[usedidx].id);
   1290 
   1291 	if (vq->vq_descx[slot].use_indirect)
   1292 		vq_sync_indirect(sc, vq, slot, BUS_DMASYNC_POSTWRITE);
   1293 
   1294 	if (slotp)
   1295 		*slotp = slot;
   1296 	if (lenp)
   1297 		*lenp = virtio_rw32(sc, vq->vq_used->ring[usedidx].len);
   1298 
   1299 	return 0;
   1300 }
   1301 
   1302 /*
   1303  * dequeue_commit: complete dequeue; the slot is recycled for future use.
   1304  *                 if you forget to call this the slot will be leaked.
   1305  */
   1306 int
   1307 virtio_dequeue_commit(struct virtio_softc *sc, struct virtqueue *vq, int slot)
   1308 {
   1309 	struct vring_desc_extra *vdx;
   1310 
   1311 	vdx = &vq->vq_descx[slot];
   1312 	vdx->desc_base = NULL;
   1313 	vdx->desc_free_idx = VRING_DESC_CHAIN_END;
   1314 
   1315 	vq_free_slot(sc, vq, slot);
   1316 
   1317 	return 0;
   1318 }
   1319 
   1320 /*
   1321  * Attach a child, fill all the members.
   1322  */
   1323 void
   1324 virtio_child_attach_start(struct virtio_softc *sc, device_t child, int ipl,
   1325     uint64_t req_features, const char *feat_bits)
   1326 {
   1327 	char buf[1024];
   1328 
   1329 	KASSERT(sc->sc_child == NULL);
   1330 	KASSERT(sc->sc_child_state == VIRTIO_NO_CHILD);
   1331 
   1332 	sc->sc_child = child;
   1333 	sc->sc_ipl = ipl;
   1334 
   1335 	virtio_negotiate_features(sc, req_features);
   1336 	snprintb(buf, sizeof(buf), feat_bits, sc->sc_active_features);
   1337 	aprint_normal(": features: %s\n", buf);
   1338 	aprint_naive("\n");
   1339 }
   1340 
   1341 int
   1342 virtio_child_attach_finish(struct virtio_softc *sc,
   1343     struct virtqueue *vqs, size_t nvqs,
   1344     virtio_callback config_change,
   1345     int req_flags)
   1346 {
   1347 	size_t i;
   1348 	int r;
   1349 
   1350 #ifdef DIAGNOSTIC
   1351 	KASSERT(nvqs > 0);
   1352 #define VIRTIO_ASSERT_FLAGS	(VIRTIO_F_INTR_SOFTINT | VIRTIO_F_INTR_PERVQ)
   1353 	KASSERT((req_flags & VIRTIO_ASSERT_FLAGS) != VIRTIO_ASSERT_FLAGS);
   1354 #undef VIRTIO_ASSERT_FLAGS
   1355 
   1356 	for (i = 0; i < nvqs; i++){
   1357 		KASSERT(vqs[i].vq_index == i);
   1358 		KASSERT(vqs[i].vq_intrhand != NULL);
   1359 		KASSERT(vqs[i].vq_done == NULL ||
   1360 		    vqs[i].vq_intrhand == virtio_vq_done);
   1361 	}
   1362 #endif
   1363 
   1364 
   1365 	sc->sc_vqs = vqs;
   1366 	sc->sc_nvqs = nvqs;
   1367 	sc->sc_config_change = config_change;
   1368 	sc->sc_intrhand = virtio_vq_intr;
   1369 	sc->sc_flags = req_flags;
   1370 
   1371 	/* set the vq address */
   1372 	for (i = 0; i < nvqs; i++) {
   1373 		sc->sc_ops->setup_queue(sc, vqs[i].vq_index,
   1374 		    vqs[i].vq_dmamap->dm_segs[0].ds_addr);
   1375 	}
   1376 
   1377 	r = sc->sc_ops->alloc_interrupts(sc);
   1378 	if (r != 0) {
   1379 		aprint_error_dev(sc->sc_dev,
   1380 		    "failed to allocate interrupts\n");
   1381 		goto fail;
   1382 	}
   1383 
   1384 	r = sc->sc_ops->setup_interrupts(sc, 0);
   1385 	if (r != 0) {
   1386 		aprint_error_dev(sc->sc_dev, "failed to setup interrupts\n");
   1387 		goto fail;
   1388 	}
   1389 
   1390 	KASSERT(sc->sc_soft_ih == NULL);
   1391 	if (sc->sc_flags & VIRTIO_F_INTR_SOFTINT) {
   1392 		u_int flags = SOFTINT_NET;
   1393 		if (sc->sc_flags & VIRTIO_F_INTR_MPSAFE)
   1394 			flags |= SOFTINT_MPSAFE;
   1395 
   1396 		sc->sc_soft_ih = softint_establish(flags, virtio_soft_intr,
   1397 		    sc);
   1398 		if (sc->sc_soft_ih == NULL) {
   1399 			sc->sc_ops->free_interrupts(sc);
   1400 			aprint_error_dev(sc->sc_dev,
   1401 			    "failed to establish soft interrupt\n");
   1402 			goto fail;
   1403 		}
   1404 	}
   1405 
   1406 	sc->sc_child_state = VIRTIO_CHILD_ATTACH_FINISHED;
   1407 	virtio_set_status(sc, VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK);
   1408 	return 0;
   1409 
   1410 fail:
   1411 	if (sc->sc_soft_ih) {
   1412 		softint_disestablish(sc->sc_soft_ih);
   1413 		sc->sc_soft_ih = NULL;
   1414 	}
   1415 
   1416 	sc->sc_ops->free_interrupts(sc);
   1417 
   1418 	virtio_set_status(sc, VIRTIO_CONFIG_DEVICE_STATUS_FAILED);
   1419 	return 1;
   1420 }
   1421 
   1422 void
   1423 virtio_child_detach(struct virtio_softc *sc)
   1424 {
   1425 
   1426 	/* already detached */
   1427 	if (sc->sc_child == NULL)
   1428 		return;
   1429 
   1430 
   1431 	virtio_device_reset(sc);
   1432 
   1433 	sc->sc_ops->free_interrupts(sc);
   1434 
   1435 	if (sc->sc_soft_ih) {
   1436 		softint_disestablish(sc->sc_soft_ih);
   1437 		sc->sc_soft_ih = NULL;
   1438 	}
   1439 
   1440 	sc->sc_vqs = NULL;
   1441 	sc->sc_child = NULL;
   1442 }
   1443 
   1444 void
   1445 virtio_child_attach_failed(struct virtio_softc *sc)
   1446 {
   1447 	virtio_child_detach(sc);
   1448 
   1449 	virtio_set_status(sc, VIRTIO_CONFIG_DEVICE_STATUS_FAILED);
   1450 
   1451 	sc->sc_child_state = VIRTIO_CHILD_ATTACH_FAILED;
   1452 }
   1453 
   1454 bus_dma_tag_t
   1455 virtio_dmat(struct virtio_softc *sc)
   1456 {
   1457 	return sc->sc_dmat;
   1458 }
   1459 
   1460 device_t
   1461 virtio_child(struct virtio_softc *sc)
   1462 {
   1463 	return sc->sc_child;
   1464 }
   1465 
   1466 int
   1467 virtio_intrhand(struct virtio_softc *sc)
   1468 {
   1469 	return (*sc->sc_intrhand)(sc);
   1470 }
   1471 
   1472 uint64_t
   1473 virtio_features(struct virtio_softc *sc)
   1474 {
   1475 	return sc->sc_active_features;
   1476 }
   1477 
   1478 int
   1479 virtio_attach_failed(struct virtio_softc *sc)
   1480 {
   1481 	device_t self = sc->sc_dev;
   1482 
   1483 	/* no error if its not connected, but its failed */
   1484 	if (sc->sc_childdevid == 0)
   1485 		return 1;
   1486 
   1487 	if (sc->sc_child == NULL) {
   1488 		switch (sc->sc_child_state) {
   1489 		case VIRTIO_CHILD_ATTACH_FAILED:
   1490 			aprint_error_dev(self,
   1491 			    "virtio configuration failed\n");
   1492 			break;
   1493 		case VIRTIO_NO_CHILD:
   1494 			aprint_error_dev(self,
   1495 			    "no matching child driver; not configured\n");
   1496 			break;
   1497 		default:
   1498 			/* sanity check */
   1499 			aprint_error_dev(self,
   1500 			    "virtio internal error, "
   1501 			    "child driver is not configured\n");
   1502 			break;
   1503 		}
   1504 
   1505 		return 1;
   1506 	}
   1507 
   1508 	/* sanity check */
   1509 	if (sc->sc_child_state != VIRTIO_CHILD_ATTACH_FINISHED) {
   1510 		aprint_error_dev(self, "virtio internal error, child driver "
   1511 		    "signaled OK but didn't initialize interrupts\n");
   1512 		return 1;
   1513 	}
   1514 
   1515 	return 0;
   1516 }
   1517 
   1518 void
   1519 virtio_print_device_type(device_t self, int id, int revision)
   1520 {
   1521 	aprint_normal_dev(self, "%s device (id %d, rev. 0x%02x)\n",
   1522 	    (id < NDEVNAMES ? virtio_device_name[id] : "Unknown"),
   1523 	    id,
   1524 	    revision);
   1525 }
   1526 
   1527 
   1528 MODULE(MODULE_CLASS_DRIVER, virtio, NULL);
   1529 
   1530 #ifdef _MODULE
   1531 #include "ioconf.c"
   1532 #endif
   1533 
   1534 static int
   1535 virtio_modcmd(modcmd_t cmd, void *opaque)
   1536 {
   1537 	int error = 0;
   1538 
   1539 #ifdef _MODULE
   1540 	switch (cmd) {
   1541 	case MODULE_CMD_INIT:
   1542 		error = config_init_component(cfdriver_ioconf_virtio,
   1543 		    cfattach_ioconf_virtio, cfdata_ioconf_virtio);
   1544 		break;
   1545 	case MODULE_CMD_FINI:
   1546 		error = config_fini_component(cfdriver_ioconf_virtio,
   1547 		    cfattach_ioconf_virtio, cfdata_ioconf_virtio);
   1548 		break;
   1549 	default:
   1550 		error = ENOTTY;
   1551 		break;
   1552 	}
   1553 #endif
   1554 
   1555 	return error;
   1556 }
   1557