Home | History | Annotate | Line # | Download | only in pci
pvscsi.c revision 1.4
      1 /*-
      2  * Copyright (c) 2018 VMware, Inc.
      3  *
      4  * SPDX-License-Identifier: (BSD-2-Clause OR GPL-2.0)
      5  */
      6 
      7 /*
      8 
      9 These files are provided under a dual BSD-2 Clause/GPLv2 license. When
     10 using or redistributing this file, you may do so under either license.
     11 
     12 BSD-2 Clause License
     13 
     14 Copyright (c) 2018 VMware, Inc.
     15 
     16 Redistribution and use in source and binary forms, with or without
     17 modification, are permitted provided that the following conditions
     18 are met:
     19 
     20   * Redistributions of source code must retain the above copyright
     21     notice, this list of conditions and the following disclaimer.
     22 
     23   * Redistributions in binary form must reproduce the above copyright
     24     notice, this list of conditions and the following disclaimer in
     25     the documentation and/or other materials provided with the
     26     distribution.
     27 
     28 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     29 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     30 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     31 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     32 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     33 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     34 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     35 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     36 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     37 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     38 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     39 
     40 GPL License Summary
     41 
     42 Copyright (c) 2018 VMware, Inc.
     43 
     44 This program is free software; you can redistribute it and/or modify
     45 it under the terms of version 2 of the GNU General Public License as
     46 published by the Free Software Foundation.
     47 
     48 This program is distributed in the hope that it will be useful, but
     49 WITHOUT ANY WARRANTY; without even the implied warranty of
     50 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     51 General Public License for more details.
     52 
     53 You should have received a copy of the GNU General Public License
     54 along with this program; if not, write to the Free Software
     55 Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
     56 The full GNU General Public License is included in this distribution
     57 in the file called LICENSE.GPL.
     58 
     59 */
     60 
     61 #include <sys/cdefs.h>
     62 __KERNEL_RCSID(0, "$NetBSD: pvscsi.c,v 1.4 2025/09/06 02:56:40 riastradh Exp $");
     63 
     64 #include <sys/param.h>
     65 
     66 #include <sys/buf.h>
     67 #include <sys/bus.h>
     68 #include <sys/cpu.h>
     69 #include <sys/device.h>
     70 #include <sys/kernel.h>
     71 #include <sys/kmem.h>
     72 #include <sys/paravirt_membar.h>
     73 #include <sys/queue.h>
     74 #include <sys/sysctl.h>
     75 #include <sys/systm.h>
     76 
     77 #include <dev/pci/pcireg.h>
     78 #include <dev/pci/pcivar.h>
     79 #include <dev/pci/pcidevs.h>
     80 
     81 #include <dev/scsipi/scsi_all.h>
     82 #include <dev/scsipi/scsi_message.h>
     83 #include <dev/scsipi/scsiconf.h>
     84 #include <dev/scsipi/scsipi_disk.h>
     85 #include <dev/scsipi/scsi_disk.h>
     86 
     87 #include "pvscsi.h"
     88 
     89 #define	PVSCSI_DEFAULT_NUM_PAGES_REQ_RING	8
     90 #define	PVSCSI_SENSE_LENGTH			256
     91 
     92 #define PVSCSI_MAXPHYS				MAXPHYS
     93 #define PVSCSI_MAXPHYS_SEGS			((PVSCSI_MAXPHYS / PAGE_SIZE) + 1)
     94 
     95 #define PVSCSI_CMD_PER_LUN 64
     96 #define PVSCSI_MAX_LUN 8
     97 #define PVSCSI_MAX_TARGET 16
     98 
     99 //#define PVSCSI_DEBUG_LOGGING
    100 
    101 #ifdef PVSCSI_DEBUG_LOGGING
    102 #define	DEBUG_PRINTF(level, dev, fmt, ...)				\
    103 	do {								\
    104 		if (pvscsi_log_level >= (level)) {			\
    105 			aprint_normal_dev((dev), (fmt), ##__VA_ARGS__);	\
    106 		}							\
    107 	} while(0)
    108 #else
    109 #define DEBUG_PRINTF(level, dev, fmt, ...)
    110 #endif /* PVSCSI_DEBUG_LOGGING */
    111 
    112 struct pvscsi_softc;
    113 struct pvscsi_hcb;
    114 struct pvscsi_dma;
    115 
    116 #define VMWARE_PVSCSI_DEVSTR	"VMware Paravirtual SCSI Controller"
    117 
    118 static inline uint32_t pvscsi_reg_read(struct pvscsi_softc *sc,
    119     uint32_t offset);
    120 static inline void pvscsi_reg_write(struct pvscsi_softc *sc, uint32_t offset,
    121     uint32_t val);
    122 static inline uint32_t pvscsi_read_intr_status(struct pvscsi_softc *sc);
    123 static inline void pvscsi_write_intr_status(struct pvscsi_softc *sc,
    124     uint32_t val);
    125 static inline void pvscsi_intr_enable(struct pvscsi_softc *sc);
    126 static inline void pvscsi_intr_disable(struct pvscsi_softc *sc);
    127 static void pvscsi_kick_io(struct pvscsi_softc *sc, uint8_t cdb0);
    128 static void pvscsi_write_cmd(struct pvscsi_softc *sc, uint32_t cmd, void *data,
    129     uint32_t len);
    130 static uint32_t pvscsi_get_max_targets(struct pvscsi_softc *sc);
    131 static int pvscsi_setup_req_call(struct pvscsi_softc *sc, uint32_t enable);
    132 static void pvscsi_setup_rings(struct pvscsi_softc *sc);
    133 static void pvscsi_setup_msg_ring(struct pvscsi_softc *sc);
    134 static int pvscsi_hw_supports_msg(struct pvscsi_softc *sc);
    135 
    136 static void pvscsi_timeout(void *arg);
    137 static void pvscsi_adapter_reset(struct pvscsi_softc *sc);
    138 static void pvscsi_bus_reset(struct pvscsi_softc *sc);
    139 static void pvscsi_device_reset(struct pvscsi_softc *sc, uint32_t target);
    140 static void pvscsi_abort(struct pvscsi_softc *sc, uint32_t target,
    141     struct pvscsi_hcb *hcb);
    142 
    143 static void pvscsi_process_completion(struct pvscsi_softc *sc,
    144     struct pvscsi_ring_cmp_desc *e);
    145 static void pvscsi_process_cmp_ring(struct pvscsi_softc *sc);
    146 static void pvscsi_process_msg(struct pvscsi_softc *sc,
    147     struct pvscsi_ring_msg_desc *e);
    148 static void pvscsi_process_msg_ring(struct pvscsi_softc *sc);
    149 
    150 static void pvscsi_intr_locked(struct pvscsi_softc *sc);
    151 static int pvscsi_intr(void *xsc);
    152 
    153 static void pvscsi_scsipi_request(struct scsipi_channel *,
    154     scsipi_adapter_req_t, void *);
    155 
    156 static inline uint64_t pvscsi_hcb_to_context(struct pvscsi_softc *sc,
    157     struct pvscsi_hcb *hcb);
    158 static inline struct pvscsi_hcb *pvscsi_context_to_hcb(struct pvscsi_softc *sc,
    159     uint64_t context);
    160 static struct pvscsi_hcb * pvscsi_hcb_get(struct pvscsi_softc *sc);
    161 static void pvscsi_hcb_put(struct pvscsi_softc *sc, struct pvscsi_hcb *hcb);
    162 
    163 static void pvscsi_dma_free(struct pvscsi_softc *sc, struct pvscsi_dma *dma);
    164 static int pvscsi_dma_alloc(struct pvscsi_softc *sc, struct pvscsi_dma *dma,
    165     bus_size_t size, bus_size_t alignment);
    166 static int pvscsi_dma_alloc_ppns(struct pvscsi_softc *sc,
    167     struct pvscsi_dma *dma, uint64_t *ppn_list, uint32_t num_pages);
    168 static void pvscsi_dma_free_per_hcb(struct pvscsi_softc *sc,
    169     uint32_t hcbs_allocated);
    170 static int pvscsi_dma_alloc_per_hcb(struct pvscsi_softc *sc);
    171 static void pvscsi_free_rings(struct pvscsi_softc *sc);
    172 static int pvscsi_allocate_rings(struct pvscsi_softc *sc);
    173 static void pvscsi_free_interrupts(struct pvscsi_softc *sc);
    174 static int pvscsi_setup_interrupts(struct pvscsi_softc *sc, const struct pci_attach_args *);
    175 static void pvscsi_free_all(struct pvscsi_softc *sc);
    176 
    177 static void pvscsi_attach(device_t, device_t, void *);
    178 static int pvscsi_detach(device_t, int);
    179 static int pvscsi_probe(device_t, cfdata_t, void *);
    180 
    181 #define pvscsi_get_tunable(_sc, _name, _value)	(_value)
    182 
    183 #ifdef PVSCSI_DEBUG_LOGGING
    184 static int pvscsi_log_level = 1;
    185 #endif
    186 
    187 #define TUNABLE_INT(__x, __d)					\
    188 	err = sysctl_createv(clog, 0, &rnode, &cnode,		\
    189 	    CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT,	\
    190 	    #__x, SYSCTL_DESCR(__d),				\
    191 	    NULL, 0, &(pvscsi_ ## __x), sizeof(pvscsi_ ## __x), \
    192 	    CTL_CREATE,	CTL_EOL);				\
    193 	if (err)						\
    194 		goto fail;
    195 
    196 static int pvscsi_request_ring_pages = 0;
    197 static int pvscsi_use_msg = 1;
    198 static int pvscsi_use_msi = 1;
    199 static int pvscsi_use_msix = 1;
    200 static int pvscsi_use_req_call_threshold = 0;
    201 static int pvscsi_max_queue_depth = 0;
    202 
    203 SYSCTL_SETUP(sysctl_hw_pvscsi_setup, "sysctl hw.pvscsi setup")
    204 {
    205 	int err;
    206 	const struct sysctlnode *rnode;
    207 	const struct sysctlnode *cnode;
    208 
    209 	err = sysctl_createv(clog, 0, NULL, &rnode,
    210 	    CTLFLAG_PERMANENT, CTLTYPE_NODE, "pvscsi",
    211 	    SYSCTL_DESCR("pvscsi global controls"),
    212 	    NULL, 0, NULL, 0, CTL_HW, CTL_CREATE, CTL_EOL);
    213 
    214 	if (err)
    215 		goto fail;
    216 
    217 #ifdef PVSCSI_DEBUG_LOGGING
    218 	TUNABLE_INT(log_level, "Enable debugging output");
    219 #endif
    220 
    221 	TUNABLE_INT(request_ring_pages, "No. of pages for the request ring");
    222 	TUNABLE_INT(use_msg, "Use message passing");
    223 	TUNABLE_INT(use_msi, "Use MSI interrupt");
    224 	TUNABLE_INT(use_msix, "Use MSXI interrupt");
    225 	TUNABLE_INT(use_req_call_threshold, "Use request limit");
    226 	TUNABLE_INT(max_queue_depth, "Maximum size of request queue");
    227 
    228 	return;
    229 fail:
    230 	aprint_error("%s: sysctl_createv failed (err = %d)\n", __func__, err);
    231 }
    232 
    233 struct pvscsi_sg_list {
    234 	struct pvscsi_sg_element sge[PVSCSI_MAX_SG_ENTRIES_PER_SEGMENT];
    235 };
    236 
    237 #define	PVSCSI_ABORT_TIMEOUT	2
    238 #define	PVSCSI_RESET_TIMEOUT	10
    239 
    240 #define	PVSCSI_HCB_NONE		0
    241 #define	PVSCSI_HCB_ABORT	1
    242 #define	PVSCSI_HCB_DEVICE_RESET	2
    243 #define	PVSCSI_HCB_BUS_RESET	3
    244 
    245 struct pvscsi_hcb {
    246 	struct scsipi_xfer 		*xs;
    247 	struct pvscsi_softc		*sc;
    248 
    249 	struct pvscsi_ring_req_desc	*e;
    250 	int				 recovery;
    251 	SLIST_ENTRY(pvscsi_hcb)		 links;
    252 
    253 	bus_dmamap_t			 dma_map;
    254 	bus_addr_t			 dma_map_offset;
    255 	bus_size_t			 dma_map_size;
    256 	void				*sense_buffer;
    257 	bus_addr_t			 sense_buffer_paddr;
    258 	struct pvscsi_sg_list		*sg_list;
    259 	bus_addr_t			 sg_list_paddr;
    260 	bus_addr_t			 sg_list_offset;
    261 };
    262 
    263 struct pvscsi_dma {
    264 	bus_dmamap_t		 map;
    265 	void		        *vaddr;
    266 	bus_addr_t	 	 paddr;
    267 	bus_size_t	 	 size;
    268 	bus_dma_segment_t	 seg[1];
    269 };
    270 
    271 struct pvscsi_softc {
    272 	device_t		 dev;
    273 	kmutex_t		 lock;
    274 
    275 	device_t		 sc_scsibus_dv;
    276 	struct scsipi_adapter	 sc_adapter;
    277 	struct scsipi_channel 	 sc_channel;
    278 
    279 	struct pvscsi_rings_state	*rings_state;
    280 	struct pvscsi_ring_req_desc	*req_ring;
    281 	struct pvscsi_ring_cmp_desc	*cmp_ring;
    282 	struct pvscsi_ring_msg_desc	*msg_ring;
    283 	uint32_t		 hcb_cnt;
    284 	struct pvscsi_hcb	*hcbs;
    285 	SLIST_HEAD(, pvscsi_hcb) free_list;
    286 
    287 	bus_dma_tag_t		sc_dmat;
    288 	bus_space_tag_t		sc_memt;
    289 	bus_space_handle_t	sc_memh;
    290 	bus_size_t		sc_mems;
    291 
    292 	bool		 use_msg;
    293 	uint32_t	 max_targets;
    294 	int		 mm_rid;
    295 	int		 irq_id;
    296 	int		 use_req_call_threshold;
    297 
    298 	pci_chipset_tag_t	 sc_pc;
    299 	pci_intr_handle_t *	 sc_pihp;
    300 	void			*sc_ih;
    301 
    302 	uint64_t	rings_state_ppn;
    303 	uint32_t	req_ring_num_pages;
    304 	uint64_t	req_ring_ppn[PVSCSI_MAX_NUM_PAGES_REQ_RING];
    305 	uint32_t	cmp_ring_num_pages;
    306 	uint64_t	cmp_ring_ppn[PVSCSI_MAX_NUM_PAGES_CMP_RING];
    307 	uint32_t	msg_ring_num_pages;
    308 	uint64_t	msg_ring_ppn[PVSCSI_MAX_NUM_PAGES_MSG_RING];
    309 
    310 	struct	pvscsi_dma rings_state_dma;
    311 	struct	pvscsi_dma req_ring_dma;
    312 	struct	pvscsi_dma cmp_ring_dma;
    313 	struct	pvscsi_dma msg_ring_dma;
    314 
    315 	struct	pvscsi_dma sg_list_dma;
    316 	struct	pvscsi_dma sense_buffer_dma;
    317 };
    318 
    319 CFATTACH_DECL3_NEW(pvscsi, sizeof(struct pvscsi_softc),
    320     pvscsi_probe, pvscsi_attach, pvscsi_detach, NULL, NULL, NULL,
    321     DVF_DETACH_SHUTDOWN);
    322 
    323 #define	PVSCSI_DMA_SYNC_STATE(sc, dma, structptr, member, ops)		      \
    324 	bus_dmamap_sync((sc)->sc_dmat, (dma)->map,			      \
    325 	    /*offset*/offsetof(__typeof__(*(structptr)), member),	      \
    326 	    /*length*/sizeof((structptr)->member),			      \
    327 	    (ops))
    328 
    329 #define	PVSCSI_DMA_SYNC_RING(sc, dma, ring, idx, ops)			      \
    330 	bus_dmamap_sync((sc)->sc_dmat, (dma)->map,			      \
    331 	    /*offset*/sizeof(*(ring)) * (idx),				      \
    332 	    /*length*/sizeof(*(ring)),					      \
    333 	    (ops))
    334 
    335 static inline uint32_t
    336 pvscsi_reg_read(struct pvscsi_softc *sc, uint32_t offset)
    337 {
    338 
    339 	return (bus_space_read_4(sc->sc_memt, sc->sc_memh, offset));
    340 }
    341 
    342 static inline void
    343 pvscsi_reg_write(struct pvscsi_softc *sc, uint32_t offset, uint32_t val)
    344 {
    345 
    346 	bus_space_write_4(sc->sc_memt, sc->sc_memh, offset, val);
    347 }
    348 
    349 static inline uint32_t
    350 pvscsi_read_intr_status(struct pvscsi_softc *sc)
    351 {
    352 
    353 	return (pvscsi_reg_read(sc, PVSCSI_REG_OFFSET_INTR_STATUS));
    354 }
    355 
    356 static inline void
    357 pvscsi_write_intr_status(struct pvscsi_softc *sc, uint32_t val)
    358 {
    359 
    360 	pvscsi_reg_write(sc, PVSCSI_REG_OFFSET_INTR_STATUS, val);
    361 }
    362 
    363 static inline void
    364 pvscsi_intr_enable(struct pvscsi_softc *sc)
    365 {
    366 	uint32_t mask;
    367 
    368 	mask = PVSCSI_INTR_CMPL_MASK;
    369 	if (sc->use_msg) {
    370 		mask |= PVSCSI_INTR_MSG_MASK;
    371 	}
    372 
    373 	pvscsi_reg_write(sc, PVSCSI_REG_OFFSET_INTR_MASK, mask);
    374 }
    375 
    376 static inline void
    377 pvscsi_intr_disable(struct pvscsi_softc *sc)
    378 {
    379 
    380 	pvscsi_reg_write(sc, PVSCSI_REG_OFFSET_INTR_MASK, 0);
    381 }
    382 
    383 static void
    384 pvscsi_kick_io(struct pvscsi_softc *sc, uint8_t cdb0)
    385 {
    386 	struct pvscsi_dma *s_dma;
    387 	struct pvscsi_rings_state *s;
    388 
    389 	DEBUG_PRINTF(2, sc->dev, "%s: cdb0 %#x\n", __func__, cdb0);
    390 	if (cdb0 == SCSI_READ_6_COMMAND  || cdb0 == READ_10  ||
    391 	    cdb0 == READ_12  || cdb0 == READ_16  ||
    392 	    cdb0 == SCSI_WRITE_6_COMMAND || cdb0 == WRITE_10 ||
    393 	    cdb0 == WRITE_12 || cdb0 == WRITE_16) {
    394 		s_dma = &sc->rings_state_dma;
    395 		s = sc->rings_state;
    396 
    397 		/*
    398 		 * Ensure the command has been published before we read
    399 		 * req_cons_idx to test whether we need to kick the
    400 		 * host.
    401 		 */
    402 		paravirt_membar_sync();
    403 
    404 		PVSCSI_DMA_SYNC_STATE(sc, s_dma, s, req_cons_idx,
    405 		    BUS_DMASYNC_POSTREAD);
    406 		DEBUG_PRINTF(2, sc->dev, "%s req prod %d cons %d\n", __func__,
    407 		    s->req_prod_idx, s->req_cons_idx);
    408 		if (!sc->use_req_call_threshold ||
    409 		    (s->req_prod_idx - s->req_cons_idx) >=
    410 		     s->req_call_threshold) {
    411 			pvscsi_reg_write(sc, PVSCSI_REG_OFFSET_KICK_RW_IO, 0);
    412 			DEBUG_PRINTF(2, sc->dev, "kicked\n");
    413 		} else {
    414 			DEBUG_PRINTF(2, sc->dev, "wtf\n");
    415 		}
    416 		PVSCSI_DMA_SYNC_STATE(sc, s_dma, s, req_cons_idx,
    417 		    BUS_DMASYNC_PREREAD);
    418 	} else {
    419 		s = sc->rings_state;
    420 		/*
    421 		 * XXX req_cons_idx in debug log might be stale, but no
    422 		 * need for DMA sync otherwise in this branch
    423 		 */
    424 		DEBUG_PRINTF(1, sc->dev, "%s req prod %d cons %d not checked\n", __func__,
    425 		    s->req_prod_idx, s->req_cons_idx);
    426 
    427 		pvscsi_reg_write(sc, PVSCSI_REG_OFFSET_KICK_NON_RW_IO, 0);
    428 	}
    429 }
    430 
    431 static void
    432 pvscsi_write_cmd(struct pvscsi_softc *sc, uint32_t cmd, void *data,
    433 		 uint32_t len)
    434 {
    435 	uint32_t *data_ptr;
    436 	int i;
    437 
    438 	KASSERTMSG(len % sizeof(uint32_t) == 0,
    439 		"command size not a multiple of 4");
    440 
    441 	data_ptr = data;
    442 	len /= sizeof(uint32_t);
    443 
    444 	pvscsi_reg_write(sc, PVSCSI_REG_OFFSET_COMMAND, cmd);
    445 	for (i = 0; i < len; ++i) {
    446 		pvscsi_reg_write(sc, PVSCSI_REG_OFFSET_COMMAND_DATA,
    447 		   data_ptr[i]);
    448 	}
    449 }
    450 
    451 static inline uint64_t pvscsi_hcb_to_context(struct pvscsi_softc *sc,
    452     struct pvscsi_hcb *hcb)
    453 {
    454 
    455 	/* Offset by 1 because context must not be 0 */
    456 	return (hcb - sc->hcbs + 1);
    457 }
    458 
    459 static inline struct pvscsi_hcb* pvscsi_context_to_hcb(struct pvscsi_softc *sc,
    460     uint64_t context)
    461 {
    462 
    463 	return (sc->hcbs + (context - 1));
    464 }
    465 
    466 static struct pvscsi_hcb *
    467 pvscsi_hcb_get(struct pvscsi_softc *sc)
    468 {
    469 	struct pvscsi_hcb *hcb;
    470 
    471 	KASSERT(mutex_owned(&sc->lock));
    472 
    473 	hcb = SLIST_FIRST(&sc->free_list);
    474 	if (hcb) {
    475 		SLIST_REMOVE_HEAD(&sc->free_list, links);
    476 	}
    477 
    478 	return (hcb);
    479 }
    480 
    481 static void
    482 pvscsi_hcb_put(struct pvscsi_softc *sc, struct pvscsi_hcb *hcb)
    483 {
    484 
    485 	KASSERT(mutex_owned(&sc->lock));
    486 	hcb->xs = NULL;
    487 	hcb->e = NULL;
    488 	hcb->recovery = PVSCSI_HCB_NONE;
    489 	SLIST_INSERT_HEAD(&sc->free_list, hcb, links);
    490 }
    491 
    492 static uint32_t
    493 pvscsi_get_max_targets(struct pvscsi_softc *sc)
    494 {
    495 	uint32_t max_targets;
    496 
    497 	pvscsi_write_cmd(sc, PVSCSI_CMD_GET_MAX_TARGETS, NULL, 0);
    498 
    499 	max_targets = pvscsi_reg_read(sc, PVSCSI_REG_OFFSET_COMMAND_STATUS);
    500 
    501 	if (max_targets == ~0) {
    502 		max_targets = 16;
    503 	}
    504 
    505 	return (max_targets);
    506 }
    507 
    508 static int pvscsi_setup_req_call(struct pvscsi_softc *sc, uint32_t enable)
    509 {
    510 	uint32_t status;
    511 	struct pvscsi_cmd_desc_setup_req_call cmd;
    512 
    513 	if (!pvscsi_get_tunable(sc, "pvscsi_use_req_call_threshold",
    514 	    pvscsi_use_req_call_threshold)) {
    515 		return (0);
    516 	}
    517 
    518 	pvscsi_reg_write(sc, PVSCSI_REG_OFFSET_COMMAND,
    519 	    PVSCSI_CMD_SETUP_REQCALLTHRESHOLD);
    520 	status = pvscsi_reg_read(sc, PVSCSI_REG_OFFSET_COMMAND_STATUS);
    521 
    522 	if (status != -1) {
    523 		memset(&cmd, 0, sizeof(cmd));
    524 		cmd.enable = enable;
    525 		pvscsi_write_cmd(sc, PVSCSI_CMD_SETUP_REQCALLTHRESHOLD,
    526 		    &cmd, sizeof(cmd));
    527 		status = pvscsi_reg_read(sc, PVSCSI_REG_OFFSET_COMMAND_STATUS);
    528 
    529 		/*
    530 		 * After setup, sync req_call_threshold before use.
    531 		 * After this point it should be stable, so no need to
    532 		 * sync again during use.
    533 		 */
    534 		PVSCSI_DMA_SYNC_STATE(sc, &sc->rings_state_dma,
    535 		    sc->rings_state, req_call_threshold,
    536 		    BUS_DMASYNC_POSTREAD);
    537 
    538 		return (status != 0);
    539 	} else {
    540 		return (0);
    541 	}
    542 }
    543 
    544 static void
    545 pvscsi_dma_free(struct pvscsi_softc *sc, struct pvscsi_dma *dma)
    546 {
    547 
    548 	bus_dmamap_unload(sc->sc_dmat, dma->map);
    549 	bus_dmamem_unmap(sc->sc_dmat, dma->vaddr, dma->size);
    550 	bus_dmamap_destroy(sc->sc_dmat, dma->map);
    551 	bus_dmamem_free(sc->sc_dmat, dma->seg, __arraycount(dma->seg));
    552 
    553 	memset(dma, 0, sizeof(*dma));
    554 }
    555 
    556 static int
    557 pvscsi_dma_alloc(struct pvscsi_softc *sc, struct pvscsi_dma *dma,
    558     bus_size_t size, bus_size_t alignment)
    559 {
    560 	int error;
    561 	int nsegs;
    562 
    563 	memset(dma, 0, sizeof(*dma));
    564 
    565 	error = bus_dmamem_alloc(sc->sc_dmat, size, alignment, 0, dma->seg,
    566 	    __arraycount(dma->seg), &nsegs, BUS_DMA_WAITOK);
    567 	if (error) {
    568 		aprint_normal_dev(sc->dev, "error allocating dma mem, error %d\n",
    569 		    error);
    570 		goto fail;
    571 	}
    572 
    573 	error = bus_dmamem_map(sc->sc_dmat, dma->seg, nsegs, size,
    574 	    &dma->vaddr, BUS_DMA_WAITOK);
    575 	if (error != 0) {
    576 		device_printf(sc->dev, "Failed to map DMA memory\n");
    577 		goto dmamemmap_fail;
    578 	}
    579 
    580 	error = bus_dmamap_create(sc->sc_dmat, size, 1, size, 0,
    581 	    BUS_DMA_WAITOK, &dma->map);
    582 	if (error != 0) {
    583 		device_printf(sc->dev, "Failed to create DMA map\n");
    584 		goto dmamapcreate_fail;
    585 	}
    586 
    587 	error = bus_dmamap_load(sc->sc_dmat, dma->map, dma->vaddr, size,
    588 	    NULL, BUS_DMA_WAITOK);
    589 	if (error) {
    590 		aprint_normal_dev(sc->dev, "error mapping dma mam, error %d\n",
    591 		    error);
    592 		goto dmamapload_fail;
    593 	}
    594 
    595 	dma->paddr = dma->map->dm_segs[0].ds_addr;
    596 	dma->size = size;
    597 
    598 	return 0;
    599 
    600 dmamapload_fail:
    601 	bus_dmamap_destroy(sc->sc_dmat, dma->map);
    602 dmamapcreate_fail:
    603 	bus_dmamem_unmap(sc->sc_dmat, dma->vaddr, dma->size);
    604 dmamemmap_fail:
    605 	bus_dmamem_free(sc->sc_dmat, dma->seg, __arraycount(dma->seg));
    606 fail:
    607 
    608 	return (error);
    609 }
    610 
    611 static int
    612 pvscsi_dma_alloc_ppns(struct pvscsi_softc *sc, struct pvscsi_dma *dma,
    613     uint64_t *ppn_list, uint32_t num_pages)
    614 {
    615 	int error;
    616 	uint32_t i;
    617 	uint64_t ppn;
    618 
    619 	error = pvscsi_dma_alloc(sc, dma, num_pages * PAGE_SIZE, PAGE_SIZE);
    620 	if (error) {
    621 		aprint_normal_dev(sc->dev, "Error allocating pages, error %d\n",
    622 		    error);
    623 		return (error);
    624 	}
    625 
    626 	ppn = dma->paddr >> PAGE_SHIFT;
    627 	for (i = 0; i < num_pages; i++) {
    628 		ppn_list[i] = ppn + i;
    629 	}
    630 
    631 	return (0);
    632 }
    633 
    634 static void
    635 pvscsi_dma_free_per_hcb(struct pvscsi_softc *sc, uint32_t hcbs_allocated)
    636 {
    637 	int i;
    638 	struct pvscsi_hcb *hcb;
    639 
    640 	for (i = 0; i < hcbs_allocated; ++i) {
    641 		hcb = sc->hcbs + i;
    642 		bus_dmamap_destroy(sc->sc_dmat, hcb->dma_map);
    643 	};
    644 
    645 	pvscsi_dma_free(sc, &sc->sense_buffer_dma);
    646 	pvscsi_dma_free(sc, &sc->sg_list_dma);
    647 }
    648 
    649 static int
    650 pvscsi_dma_alloc_per_hcb(struct pvscsi_softc *sc)
    651 {
    652 	int i;
    653 	int error;
    654 	struct pvscsi_hcb *hcb;
    655 
    656 	i = 0;
    657 
    658 	error = pvscsi_dma_alloc(sc, &sc->sg_list_dma,
    659 	    sizeof(struct pvscsi_sg_list) * sc->hcb_cnt, 1);
    660 	if (error) {
    661 		aprint_normal_dev(sc->dev,
    662 		    "Error allocation sg list DMA memory, error %d\n", error);
    663 		goto fail;
    664 	}
    665 
    666 	error = pvscsi_dma_alloc(sc, &sc->sense_buffer_dma,
    667 				 PVSCSI_SENSE_LENGTH * sc->hcb_cnt, 1);
    668 	if (error) {
    669 		aprint_normal_dev(sc->dev,
    670 		    "Error allocation buffer DMA memory, error %d\n", error);
    671 		goto fail;
    672 	}
    673 
    674 	for (i = 0; i < sc->hcb_cnt; ++i) {
    675 		hcb = sc->hcbs + i;
    676 
    677 		error = bus_dmamap_create(sc->sc_dmat, PVSCSI_MAXPHYS,
    678 		    PVSCSI_MAXPHYS_SEGS, PVSCSI_MAXPHYS, 0,
    679 		    BUS_DMA_WAITOK, &hcb->dma_map);
    680 		if (error) {
    681 			aprint_normal_dev(sc->dev,
    682 			    "Error creating dma map for hcb %d, error %d\n",
    683 			    i, error);
    684 			goto fail;
    685 		}
    686 
    687 		hcb->sc = sc;
    688 		hcb->dma_map_offset = PVSCSI_SENSE_LENGTH * i;
    689 		hcb->dma_map_size = PVSCSI_SENSE_LENGTH;
    690 		hcb->sense_buffer =
    691 		    (void *)((char *)sc->sense_buffer_dma.vaddr +
    692 		    PVSCSI_SENSE_LENGTH * i);
    693 		hcb->sense_buffer_paddr = sc->sense_buffer_dma.paddr +
    694 		    PVSCSI_SENSE_LENGTH * i;
    695 
    696 		hcb->sg_list =
    697 		    (struct pvscsi_sg_list *)((char *)sc->sg_list_dma.vaddr +
    698 		    sizeof(struct pvscsi_sg_list) * i);
    699 		hcb->sg_list_paddr =
    700 		    sc->sg_list_dma.paddr + sizeof(struct pvscsi_sg_list) * i;
    701 		hcb->sg_list_offset = sizeof(struct pvscsi_sg_list) * i;
    702 	}
    703 
    704 	SLIST_INIT(&sc->free_list);
    705 	for (i = (sc->hcb_cnt - 1); i >= 0; --i) {
    706 		hcb = sc->hcbs + i;
    707 		SLIST_INSERT_HEAD(&sc->free_list, hcb, links);
    708 	}
    709 
    710 fail:
    711 	if (error) {
    712 		pvscsi_dma_free_per_hcb(sc, i);
    713 	}
    714 
    715 	return (error);
    716 }
    717 
    718 static void
    719 pvscsi_free_rings(struct pvscsi_softc *sc)
    720 {
    721 
    722 	pvscsi_dma_free(sc, &sc->rings_state_dma);
    723 	pvscsi_dma_free(sc, &sc->req_ring_dma);
    724 	pvscsi_dma_free(sc, &sc->cmp_ring_dma);
    725 	if (sc->use_msg) {
    726 		pvscsi_dma_free(sc, &sc->msg_ring_dma);
    727 	}
    728 }
    729 
    730 static int
    731 pvscsi_allocate_rings(struct pvscsi_softc *sc)
    732 {
    733 	int error;
    734 
    735 	error = pvscsi_dma_alloc_ppns(sc, &sc->rings_state_dma,
    736 	    &sc->rings_state_ppn, 1);
    737 	if (error) {
    738 		aprint_normal_dev(sc->dev,
    739 		    "Error allocating rings state, error = %d\n", error);
    740 		goto fail;
    741 	}
    742 	sc->rings_state = sc->rings_state_dma.vaddr;
    743 
    744 	error = pvscsi_dma_alloc_ppns(sc, &sc->req_ring_dma, sc->req_ring_ppn,
    745 	    sc->req_ring_num_pages);
    746 	if (error) {
    747 		aprint_normal_dev(sc->dev,
    748 		    "Error allocating req ring pages, error = %d\n", error);
    749 		goto fail;
    750 	}
    751 	sc->req_ring = sc->req_ring_dma.vaddr;
    752 
    753 	error = pvscsi_dma_alloc_ppns(sc, &sc->cmp_ring_dma, sc->cmp_ring_ppn,
    754 	    sc->cmp_ring_num_pages);
    755 	if (error) {
    756 		aprint_normal_dev(sc->dev,
    757 		    "Error allocating cmp ring pages, error = %d\n", error);
    758 		goto fail;
    759 	}
    760 	sc->cmp_ring = sc->cmp_ring_dma.vaddr;
    761 
    762 	sc->msg_ring = NULL;
    763 	if (sc->use_msg) {
    764 		error = pvscsi_dma_alloc_ppns(sc, &sc->msg_ring_dma,
    765 		    sc->msg_ring_ppn, sc->msg_ring_num_pages);
    766 		if (error) {
    767 			aprint_normal_dev(sc->dev,
    768 			    "Error allocating cmp ring pages, error = %d\n",
    769 			    error);
    770 			goto fail;
    771 		}
    772 		sc->msg_ring = sc->msg_ring_dma.vaddr;
    773 	}
    774 
    775 fail:
    776 	if (error) {
    777 		pvscsi_free_rings(sc);
    778 	}
    779 	return (error);
    780 }
    781 
    782 static void
    783 pvscsi_setup_rings(struct pvscsi_softc *sc)
    784 {
    785 	struct pvscsi_cmd_desc_setup_rings cmd;
    786 	uint32_t i;
    787 
    788 	memset(&cmd, 0, sizeof(cmd));
    789 
    790 	cmd.rings_state_ppn = sc->rings_state_ppn;
    791 
    792 	cmd.req_ring_num_pages = sc->req_ring_num_pages;
    793 	for (i = 0; i < sc->req_ring_num_pages; ++i) {
    794 		cmd.req_ring_ppns[i] = sc->req_ring_ppn[i];
    795 	}
    796 
    797 	cmd.cmp_ring_num_pages = sc->cmp_ring_num_pages;
    798 	for (i = 0; i < sc->cmp_ring_num_pages; ++i) {
    799 		cmd.cmp_ring_ppns[i] = sc->cmp_ring_ppn[i];
    800 	}
    801 
    802 	pvscsi_write_cmd(sc, PVSCSI_CMD_SETUP_RINGS, &cmd, sizeof(cmd));
    803 
    804 	/*
    805 	 * After setup, sync *_num_entries_log2 before use.  After this
    806 	 * point they should be stable, so no need to sync again during
    807 	 * use.
    808 	 */
    809 	PVSCSI_DMA_SYNC_STATE(sc, &sc->rings_state_dma,
    810 	    sc->rings_state, req_num_entries_log2,
    811 	    BUS_DMASYNC_POSTREAD);
    812 	PVSCSI_DMA_SYNC_STATE(sc, &sc->rings_state_dma,
    813 	    sc->rings_state, cmp_num_entries_log2,
    814 	    BUS_DMASYNC_POSTREAD);
    815 }
    816 
    817 static int
    818 pvscsi_hw_supports_msg(struct pvscsi_softc *sc)
    819 {
    820 	uint32_t status;
    821 
    822 	pvscsi_reg_write(sc, PVSCSI_REG_OFFSET_COMMAND,
    823 	    PVSCSI_CMD_SETUP_MSG_RING);
    824 	status = pvscsi_reg_read(sc, PVSCSI_REG_OFFSET_COMMAND_STATUS);
    825 
    826 	return (status != -1);
    827 }
    828 
    829 static void
    830 pvscsi_setup_msg_ring(struct pvscsi_softc *sc)
    831 {
    832 	struct pvscsi_cmd_desc_setup_msg_ring cmd;
    833 	uint32_t i;
    834 
    835 	KASSERTMSG(sc->use_msg, "msg is not being used");
    836 
    837 	memset(&cmd, 0, sizeof(cmd));
    838 
    839 	cmd.num_pages = sc->msg_ring_num_pages;
    840 	for (i = 0; i < sc->msg_ring_num_pages; ++i) {
    841 		cmd.ring_ppns[i] = sc->msg_ring_ppn[i];
    842 	}
    843 
    844 	pvscsi_write_cmd(sc, PVSCSI_CMD_SETUP_MSG_RING, &cmd, sizeof(cmd));
    845 
    846 	/*
    847 	 * After setup, sync msg_num_entries_log2 before use.  After
    848 	 * this point it should be stable, so no need to sync again
    849 	 * during use.
    850 	 */
    851 	PVSCSI_DMA_SYNC_STATE(sc, &sc->rings_state_dma,
    852 	    sc->rings_state, msg_num_entries_log2,
    853 	    BUS_DMASYNC_POSTREAD);
    854 }
    855 
    856 static void
    857 pvscsi_adapter_reset(struct pvscsi_softc *sc)
    858 {
    859 	aprint_normal_dev(sc->dev, "Adapter Reset\n");
    860 
    861 	pvscsi_write_cmd(sc, PVSCSI_CMD_ADAPTER_RESET, NULL, 0);
    862 #ifdef PVSCSI_DEBUG_LOGGING
    863 	uint32_t val =
    864 #endif
    865 	pvscsi_read_intr_status(sc);
    866 
    867 	DEBUG_PRINTF(2, sc->dev, "adapter reset done: %u\n", val);
    868 }
    869 
    870 static void
    871 pvscsi_bus_reset(struct pvscsi_softc *sc)
    872 {
    873 
    874 	aprint_normal_dev(sc->dev, "Bus Reset\n");
    875 
    876 	pvscsi_write_cmd(sc, PVSCSI_CMD_RESET_BUS, NULL, 0);
    877 	pvscsi_process_cmp_ring(sc);
    878 
    879 	DEBUG_PRINTF(2, sc->dev, "bus reset done\n");
    880 }
    881 
    882 static void
    883 pvscsi_device_reset(struct pvscsi_softc *sc, uint32_t target)
    884 {
    885 	struct pvscsi_cmd_desc_reset_device cmd;
    886 
    887 	memset(&cmd, 0, sizeof(cmd));
    888 
    889 	cmd.target = target;
    890 
    891 	aprint_normal_dev(sc->dev, "Device reset for target %u\n", target);
    892 
    893 	pvscsi_write_cmd(sc, PVSCSI_CMD_RESET_DEVICE, &cmd, sizeof cmd);
    894 	pvscsi_process_cmp_ring(sc);
    895 
    896 	DEBUG_PRINTF(2, sc->dev, "device reset done\n");
    897 }
    898 
    899 static void
    900 pvscsi_abort(struct pvscsi_softc *sc, uint32_t target, struct pvscsi_hcb *hcb)
    901 {
    902 	struct pvscsi_cmd_desc_abort_cmd cmd;
    903 	uint64_t context;
    904 
    905 	pvscsi_process_cmp_ring(sc);
    906 
    907 	if (hcb != NULL) {
    908 		context = pvscsi_hcb_to_context(sc, hcb);
    909 
    910 		memset(&cmd, 0, sizeof cmd);
    911 		cmd.target = target;
    912 		cmd.context = context;
    913 
    914 		aprint_normal_dev(sc->dev, "Abort for target %u context %llx\n",
    915 		    target, (unsigned long long)context);
    916 
    917 		pvscsi_write_cmd(sc, PVSCSI_CMD_ABORT_CMD, &cmd, sizeof(cmd));
    918 		pvscsi_process_cmp_ring(sc);
    919 
    920 		DEBUG_PRINTF(2, sc->dev, "abort done\n");
    921 	} else {
    922 		DEBUG_PRINTF(1, sc->dev,
    923 		    "Target %u hcb %p not found for abort\n", target, hcb);
    924 	}
    925 }
    926 
    927 static int
    928 pvscsi_probe(device_t dev, cfdata_t cf, void *aux)
    929 {
    930 	const struct pci_attach_args *pa = aux;
    931 
    932 	if (PCI_VENDOR(pa->pa_id) == PCI_VENDOR_VMWARE &&
    933 	    PCI_PRODUCT(pa->pa_id) == PCI_PRODUCT_VMWARE_PVSCSI) {
    934 		return 1;
    935 	}
    936 	return 0;
    937 }
    938 
    939 static void
    940 pvscsi_timeout(void *arg)
    941 {
    942 	struct pvscsi_hcb *hcb = arg;
    943 	struct scsipi_xfer *xs = hcb->xs;
    944 
    945 	if (xs == NULL) {
    946 		/* Already completed */
    947 		return;
    948 	}
    949 
    950 	struct pvscsi_softc *sc = hcb->sc;
    951 
    952 	mutex_enter(&sc->lock);
    953 
    954 	scsipi_printaddr(xs->xs_periph);
    955 	printf("command timeout, CDB: ");
    956 	scsipi_print_cdb(xs->cmd);
    957 	printf("\n");
    958 
    959 	switch (hcb->recovery) {
    960 	case PVSCSI_HCB_NONE:
    961 		hcb->recovery = PVSCSI_HCB_ABORT;
    962 		pvscsi_abort(sc, hcb->e->target, hcb);
    963 		callout_reset(&xs->xs_callout,
    964 		    mstohz(PVSCSI_ABORT_TIMEOUT * 1000),
    965 		    pvscsi_timeout, hcb);
    966 		break;
    967 	case PVSCSI_HCB_ABORT:
    968 		hcb->recovery = PVSCSI_HCB_DEVICE_RESET;
    969 		pvscsi_device_reset(sc, hcb->e->target);
    970 		callout_reset(&xs->xs_callout,
    971 		    mstohz(PVSCSI_RESET_TIMEOUT * 1000),
    972 		    pvscsi_timeout, hcb);
    973 		break;
    974 	case PVSCSI_HCB_DEVICE_RESET:
    975 		hcb->recovery = PVSCSI_HCB_BUS_RESET;
    976 		pvscsi_bus_reset(sc);
    977 		callout_reset(&xs->xs_callout,
    978 		    mstohz(PVSCSI_RESET_TIMEOUT * 1000),
    979 		    pvscsi_timeout, hcb);
    980 		break;
    981 	case PVSCSI_HCB_BUS_RESET:
    982 		pvscsi_adapter_reset(sc);
    983 		break;
    984 	};
    985 	mutex_exit(&sc->lock);
    986 }
    987 
    988 static void
    989 pvscsi_process_completion(struct pvscsi_softc *sc,
    990     struct pvscsi_ring_cmp_desc *e)
    991 {
    992 	struct pvscsi_hcb *hcb;
    993 	struct scsipi_xfer *xs;
    994 	uint32_t error = XS_NOERROR;
    995 	uint32_t btstat;
    996 	uint32_t sdstat;
    997 	int op;
    998 
    999 	hcb = pvscsi_context_to_hcb(sc, e->context);
   1000 	xs = hcb->xs;
   1001 
   1002 	callout_stop(&xs->xs_callout);
   1003 
   1004 	btstat = e->host_status;
   1005 	sdstat = e->scsi_status;
   1006 
   1007 	xs->status = sdstat;
   1008 	xs->resid = xs->datalen - e->data_len;
   1009 
   1010 	DEBUG_PRINTF(3, sc->dev,
   1011 	    "command context %llx btstat %d (%#x) sdstat %d (%#x)\n",
   1012 	    (unsigned long long)e->context, btstat, btstat, sdstat, sdstat);
   1013 
   1014 	if ((xs->xs_control & XS_CTL_DATA_IN) == XS_CTL_DATA_IN) {
   1015 		op = BUS_DMASYNC_POSTREAD;
   1016 	} else {
   1017 		op = BUS_DMASYNC_POSTWRITE;
   1018 	}
   1019 	bus_dmamap_sync(sc->sc_dmat, sc->sense_buffer_dma.map,
   1020 	    hcb->dma_map_offset, hcb->dma_map_size, op);
   1021 
   1022 	if (btstat == BTSTAT_SUCCESS && sdstat == SCSI_OK) {
   1023 		DEBUG_PRINTF(3, sc->dev,
   1024 		    "completing command context %llx success\n",
   1025 		    (unsigned long long)e->context);
   1026 		xs->resid = 0;
   1027 	} else {
   1028 		switch (btstat) {
   1029 		case BTSTAT_SUCCESS:
   1030 		case BTSTAT_LINKED_COMMAND_COMPLETED:
   1031 		case BTSTAT_LINKED_COMMAND_COMPLETED_WITH_FLAG:
   1032 			switch (sdstat) {
   1033 			case SCSI_OK:
   1034 				xs->resid = 0;
   1035 				error = XS_NOERROR;
   1036 				break;
   1037 			case SCSI_CHECK:
   1038 				error = XS_SENSE;
   1039 				xs->resid = 0;
   1040 
   1041 				memset(&xs->sense, 0, sizeof(xs->sense));
   1042 				memcpy(&xs->sense, hcb->sense_buffer,
   1043 				    MIN(sizeof(xs->sense), e->sense_len));
   1044 				break;
   1045 			case SCSI_BUSY:
   1046 			case SCSI_QUEUE_FULL:
   1047 				error = XS_NOERROR;
   1048 				break;
   1049 			case SCSI_TERMINATED:
   1050 // 			case SCSI_STATUS_TASK_ABORTED:
   1051 				DEBUG_PRINTF(1, sc->dev,
   1052 				    "xs: %p sdstat=0x%x\n", xs, sdstat);
   1053 				error = XS_DRIVER_STUFFUP;
   1054 				break;
   1055 			default:
   1056 				DEBUG_PRINTF(1, sc->dev,
   1057 				    "xs: %p sdstat=0x%x\n", xs, sdstat);
   1058 				error = XS_DRIVER_STUFFUP;
   1059 				break;
   1060 			}
   1061 			break;
   1062 		case BTSTAT_SELTIMEO:
   1063 			error = XS_SELTIMEOUT;
   1064 			break;
   1065 		case BTSTAT_DATARUN:
   1066 		case BTSTAT_DATA_UNDERRUN:
   1067 //			xs->resid = xs->datalen - c->data_len;
   1068 			error = XS_NOERROR;
   1069 			break;
   1070 		case BTSTAT_ABORTQUEUE:
   1071 		case BTSTAT_HATIMEOUT:
   1072 			error = XS_NOERROR;
   1073 			break;
   1074 		case BTSTAT_NORESPONSE:
   1075 		case BTSTAT_SENTRST:
   1076 		case BTSTAT_RECVRST:
   1077 		case BTSTAT_BUSRESET:
   1078 			error = XS_RESET;
   1079 			break;
   1080 		case BTSTAT_SCSIPARITY:
   1081 			error = XS_DRIVER_STUFFUP;
   1082 			DEBUG_PRINTF(1, sc->dev,
   1083 			    "xs: %p sdstat=0x%x\n", xs, sdstat);
   1084 			break;
   1085 		case BTSTAT_BUSFREE:
   1086 			error = XS_DRIVER_STUFFUP;
   1087 			DEBUG_PRINTF(1, sc->dev,
   1088 			    "xs: %p sdstat=0x%x\n", xs, sdstat);
   1089 			break;
   1090 		case BTSTAT_INVPHASE:
   1091 			error = XS_DRIVER_STUFFUP;
   1092 			DEBUG_PRINTF(1, sc->dev,
   1093 			    "xs: %p sdstat=0x%x\n", xs, sdstat);
   1094 			break;
   1095 		case BTSTAT_SENSFAILED:
   1096 			error = XS_DRIVER_STUFFUP;
   1097 			DEBUG_PRINTF(1, sc->dev,
   1098 			    "xs: %p sdstat=0x%x\n", xs, sdstat);
   1099 			break;
   1100 		case BTSTAT_LUNMISMATCH:
   1101 		case BTSTAT_TAGREJECT:
   1102 		case BTSTAT_DISCONNECT:
   1103 		case BTSTAT_BADMSG:
   1104 		case BTSTAT_INVPARAM:
   1105 			error = XS_DRIVER_STUFFUP;
   1106 			DEBUG_PRINTF(1, sc->dev,
   1107 			    "xs: %p sdstat=0x%x\n", xs, sdstat);
   1108 			break;
   1109 		case BTSTAT_HASOFTWARE:
   1110 		case BTSTAT_HAHARDWARE:
   1111 			error = XS_DRIVER_STUFFUP;
   1112 			DEBUG_PRINTF(1, sc->dev,
   1113 			    "xs: %p sdstat=0x%x\n", xs, sdstat);
   1114 			break;
   1115 		default:
   1116 			aprint_normal_dev(sc->dev, "unknown hba status: 0x%x\n",
   1117 			    btstat);
   1118 			error = XS_DRIVER_STUFFUP;
   1119 			break;
   1120 		}
   1121 
   1122 		DEBUG_PRINTF(3, sc->dev,
   1123 		    "completing command context %llx btstat %x sdstat %x - error %x\n",
   1124 		    (unsigned long long)e->context, btstat, sdstat, error);
   1125 	}
   1126 
   1127 	xs->error = error;
   1128 	pvscsi_hcb_put(sc, hcb);
   1129 
   1130 	mutex_exit(&sc->lock);
   1131 
   1132 	scsipi_done(xs);
   1133 
   1134 	mutex_enter(&sc->lock);
   1135 }
   1136 
   1137 static void
   1138 pvscsi_process_cmp_ring(struct pvscsi_softc *sc)
   1139 {
   1140 	struct pvscsi_dma *ring_dma;
   1141 	struct pvscsi_ring_cmp_desc *ring;
   1142 	struct pvscsi_dma *s_dma;
   1143 	struct pvscsi_rings_state *s;
   1144 	struct pvscsi_ring_cmp_desc *e;
   1145 	uint32_t mask;
   1146 
   1147 	KASSERT(mutex_owned(&sc->lock));
   1148 
   1149 	s_dma = &sc->rings_state_dma;
   1150 	s = sc->rings_state;
   1151 	ring_dma = &sc->cmp_ring_dma;
   1152 	ring = sc->cmp_ring;
   1153 	mask = MASK(s->cmp_num_entries_log2);
   1154 
   1155 	for (;;) {
   1156 		PVSCSI_DMA_SYNC_STATE(sc, s_dma, s, cmp_prod_idx,
   1157 		    BUS_DMASYNC_POSTREAD);
   1158 		size_t crpidx = s->cmp_prod_idx;
   1159 		PVSCSI_DMA_SYNC_STATE(sc, s_dma, s, cmp_prod_idx,
   1160 		    BUS_DMASYNC_PREREAD);
   1161 
   1162 		if (s->cmp_cons_idx == crpidx)
   1163 			break;
   1164 
   1165 		size_t crcidx = s->cmp_cons_idx & mask;
   1166 
   1167 		PVSCSI_DMA_SYNC_RING(sc, ring_dma, ring, crcidx,
   1168 		    BUS_DMASYNC_POSTREAD);
   1169 
   1170 		e = ring + crcidx;
   1171 
   1172 		pvscsi_process_completion(sc, e);
   1173 
   1174 		/*
   1175 		 * ensure completion processing reads happen before write to
   1176 		 * (increment of) cmp_cons_idx
   1177 		 */
   1178 		PVSCSI_DMA_SYNC_RING(sc, ring_dma, ring, crcidx,
   1179 		    BUS_DMASYNC_PREREAD);
   1180 
   1181 		/*
   1182 		 * XXX Not actually sure the `device' does DMA for
   1183 		 * s->cmp_cons_idx at all -- qemu doesn't.  If not, we
   1184 		 * can skip these DMA syncs.
   1185 		 */
   1186 		PVSCSI_DMA_SYNC_STATE(sc, s_dma, s, cmp_cons_idx,
   1187 		    BUS_DMASYNC_POSTWRITE);
   1188 		s->cmp_cons_idx++;
   1189 		PVSCSI_DMA_SYNC_STATE(sc, s_dma, s, cmp_cons_idx,
   1190 		    BUS_DMASYNC_PREWRITE);
   1191 	}
   1192 }
   1193 
   1194 static void
   1195 pvscsi_process_msg(struct pvscsi_softc *sc, struct pvscsi_ring_msg_desc *e)
   1196 {
   1197 	struct pvscsi_ring_msg_dev_status_changed *desc;
   1198 
   1199 	switch (e->type) {
   1200 	case PVSCSI_MSG_DEV_ADDED:
   1201 	case PVSCSI_MSG_DEV_REMOVED: {
   1202 		desc = (struct pvscsi_ring_msg_dev_status_changed *)e;
   1203 		struct scsibus_softc *ssc = device_private(sc->sc_scsibus_dv);
   1204 
   1205 		aprint_normal_dev(sc->dev, "MSG: device %s at scsi%u:%u:%u\n",
   1206 		    desc->type == PVSCSI_MSG_DEV_ADDED ? "addition" : "removal",
   1207 		    desc->bus, desc->target, desc->lun[1]);
   1208 
   1209 		if (desc->type == PVSCSI_MSG_DEV_ADDED) {
   1210 			if (scsi_probe_bus(ssc,
   1211 			    desc->target, desc->lun[1]) != 0) {
   1212 				aprint_normal_dev(sc->dev,
   1213 				    "Error creating path for dev change.\n");
   1214 				break;
   1215 			}
   1216 		} else {
   1217 			if (scsipi_target_detach(ssc->sc_channel,
   1218 			    desc->target, desc->lun[1],
   1219 			    DETACH_FORCE) != 0) {
   1220 				aprint_normal_dev(sc->dev,
   1221 				    "Error detaching target %d lun %d\n",
   1222 				    desc->target, desc->lun[1]);
   1223 			};
   1224 
   1225 		}
   1226 	} break;
   1227 	default:
   1228 		aprint_normal_dev(sc->dev, "Unknown msg type 0x%x\n", e->type);
   1229 	};
   1230 }
   1231 
   1232 static void
   1233 pvscsi_process_msg_ring(struct pvscsi_softc *sc)
   1234 {
   1235 	struct pvscsi_dma *ring_dma;
   1236 	struct pvscsi_ring_msg_desc *ring;
   1237 	struct pvscsi_dma *s_dma;
   1238 	struct pvscsi_rings_state *s;
   1239 	struct pvscsi_ring_msg_desc *e;
   1240 	uint32_t mask;
   1241 
   1242 	KASSERT(mutex_owned(&sc->lock));
   1243 
   1244 	s_dma = &sc->rings_state_dma;
   1245 	s = sc->rings_state;
   1246 	ring_dma = &sc->msg_ring_dma;
   1247 	ring = sc->msg_ring;
   1248 	mask = MASK(s->msg_num_entries_log2);
   1249 
   1250 	for (;;) {
   1251 		PVSCSI_DMA_SYNC_STATE(sc, s_dma, s, msg_prod_idx,
   1252 		    BUS_DMASYNC_POSTREAD);
   1253 		size_t mpidx = s->msg_prod_idx;	// dma read (device -> cpu)
   1254 		PVSCSI_DMA_SYNC_STATE(sc, s_dma, s, msg_prod_idx,
   1255 		    BUS_DMASYNC_PREREAD);
   1256 
   1257 		if (s->msg_cons_idx == mpidx)
   1258 			break;
   1259 
   1260 		size_t mcidx = s->msg_cons_idx & mask;
   1261 
   1262 		PVSCSI_DMA_SYNC_RING(sc, ring_dma, ring, mcidx,
   1263 		    BUS_DMASYNC_POSTREAD);
   1264 
   1265 		e = ring + mcidx;
   1266 
   1267 		pvscsi_process_msg(sc, e);
   1268 
   1269 		/*
   1270 		 * ensure message processing reads happen before write to
   1271 		 * (increment of) msg_cons_idx
   1272 		 */
   1273 		PVSCSI_DMA_SYNC_RING(sc, ring_dma, ring, mcidx,
   1274 		    BUS_DMASYNC_PREREAD);
   1275 
   1276 		PVSCSI_DMA_SYNC_STATE(sc, s_dma, s, msg_cons_idx,
   1277 		    BUS_DMASYNC_POSTWRITE);
   1278 		s->msg_cons_idx++;
   1279 		PVSCSI_DMA_SYNC_STATE(sc, s_dma, s, msg_cons_idx,
   1280 		    BUS_DMASYNC_PREWRITE);
   1281 	}
   1282 }
   1283 
   1284 static void
   1285 pvscsi_intr_locked(struct pvscsi_softc *sc)
   1286 {
   1287 	uint32_t val;
   1288 
   1289 	KASSERT(mutex_owned(&sc->lock));
   1290 
   1291 	val = pvscsi_read_intr_status(sc);
   1292 
   1293 	if ((val & PVSCSI_INTR_ALL_SUPPORTED) != 0) {
   1294 		pvscsi_write_intr_status(sc, val & PVSCSI_INTR_ALL_SUPPORTED);
   1295 		pvscsi_process_cmp_ring(sc);
   1296 		if (sc->use_msg) {
   1297 			pvscsi_process_msg_ring(sc);
   1298 		}
   1299 	}
   1300 }
   1301 
   1302 static int
   1303 pvscsi_intr(void *xsc)
   1304 {
   1305 	struct pvscsi_softc *sc;
   1306 
   1307 	sc = xsc;
   1308 
   1309 	mutex_enter(&sc->lock);
   1310 	pvscsi_intr_locked(xsc);
   1311 	mutex_exit(&sc->lock);
   1312 
   1313 	return 1;
   1314 }
   1315 
   1316 static void
   1317 pvscsi_scsipi_request(struct scsipi_channel *chan, scsipi_adapter_req_t
   1318     request, void *arg)
   1319 {
   1320 	struct pvscsi_softc *sc = device_private(chan->chan_adapter->adapt_dev);
   1321 
   1322 	if (request == ADAPTER_REQ_SET_XFER_MODE) {
   1323 		struct scsipi_xfer_mode *xm = arg;
   1324 
   1325 		xm->xm_mode = PERIPH_CAP_TQING;
   1326 		xm->xm_period = 0;
   1327 		xm->xm_offset = 0;
   1328 		scsipi_async_event(chan, ASYNC_EVENT_XFER_MODE, xm);
   1329 		return;
   1330 	} else if (request != ADAPTER_REQ_RUN_XFER) {
   1331 		DEBUG_PRINTF(1, sc->dev, "unhandled %d\n", request);
   1332 		return;
   1333 	}
   1334 
   1335 	/* request is ADAPTER_REQ_RUN_XFER */
   1336 	struct scsipi_xfer *xs = arg;
   1337 	struct scsipi_periph *periph = xs->xs_periph;
   1338 #ifdef SCSIPI_DEBUG
   1339 	periph->periph_dbflags |= SCSIPI_DEBUG_FLAGS;
   1340 #endif
   1341 
   1342 	uint32_t req_num_entries_log2;
   1343 	struct pvscsi_dma *ring_dma;
   1344 	struct pvscsi_ring_req_desc *ring;
   1345 	struct pvscsi_ring_req_desc *e;
   1346 	struct pvscsi_dma *s_dma;
   1347 	struct pvscsi_rings_state *s;
   1348 	struct pvscsi_hcb *hcb;
   1349 
   1350 	if (xs->cmdlen < 0 || xs->cmdlen > sizeof(e->cdb)) {
   1351 		DEBUG_PRINTF(1, sc->dev, "bad cmdlen %zu > %zu\n",
   1352 		    (size_t)xs->cmdlen, sizeof(e->cdb));
   1353 		/* not a temporary condition */
   1354 		xs->error = XS_DRIVER_STUFFUP;
   1355 		scsipi_done(xs);
   1356 		return;
   1357 	}
   1358 
   1359 	ring_dma = &sc->req_ring_dma;
   1360 	ring = sc->req_ring;
   1361 	s_dma = &sc->rings_state_dma;
   1362 	s = sc->rings_state;
   1363 
   1364 	hcb = NULL;
   1365 	req_num_entries_log2 = s->req_num_entries_log2;
   1366 
   1367 	/* Protect against multiple senders */
   1368 	mutex_enter(&sc->lock);
   1369 
   1370 	if (s->req_prod_idx - s->cmp_cons_idx >=
   1371 	    (1 << req_num_entries_log2)) {
   1372 		aprint_normal_dev(sc->dev,
   1373 		    "Not enough room on completion ring.\n");
   1374 		xs->error = XS_RESOURCE_SHORTAGE;
   1375 		goto finish_xs;
   1376 	}
   1377 
   1378 	if (xs->cmdlen > sizeof(e->cdb)) {
   1379 		DEBUG_PRINTF(1, sc->dev, "cdb length %u too large\n",
   1380 		    xs->cmdlen);
   1381 		xs->error = XS_DRIVER_STUFFUP;
   1382 		goto finish_xs;
   1383 	}
   1384 
   1385 	hcb = pvscsi_hcb_get(sc);
   1386 	if (hcb == NULL) {
   1387 		aprint_normal_dev(sc->dev, "No free hcbs.\n");
   1388 		xs->error = XS_RESOURCE_SHORTAGE;
   1389 		goto finish_xs;
   1390 	}
   1391 
   1392 	hcb->xs = xs;
   1393 
   1394 	const size_t rridx = s->req_prod_idx & MASK(req_num_entries_log2);
   1395 	PVSCSI_DMA_SYNC_RING(sc, ring_dma, ring, rridx, BUS_DMASYNC_POSTWRITE);
   1396 	e = ring + rridx;
   1397 
   1398 	memset(e, 0, sizeof(*e));
   1399 	e->bus = 0;
   1400 	e->target = periph->periph_target;
   1401 	e->lun[1] = periph->periph_lun;
   1402 	e->data_addr = 0;
   1403 	e->data_len = xs->datalen;
   1404 	e->vcpu_hint = cpu_index(curcpu());
   1405 	e->flags = 0;
   1406 
   1407 	e->cdb_len = xs->cmdlen;
   1408 	memcpy(e->cdb, xs->cmd, xs->cmdlen);
   1409 
   1410 	e->sense_addr = 0;
   1411 	e->sense_len = sizeof(xs->sense);
   1412 	if (e->sense_len > 0) {
   1413 		e->sense_addr = hcb->sense_buffer_paddr;
   1414 	}
   1415 	//e->tag = xs->xs_tag_type;
   1416 	e->tag = MSG_SIMPLE_Q_TAG;
   1417 
   1418 	switch (xs->xs_control & (XS_CTL_DATA_IN | XS_CTL_DATA_OUT)) {
   1419 	case XS_CTL_DATA_IN:
   1420 		e->flags |= PVSCSI_FLAG_CMD_DIR_TOHOST;
   1421 		break;
   1422 	case XS_CTL_DATA_OUT:
   1423 		e->flags |= PVSCSI_FLAG_CMD_DIR_TODEVICE;
   1424 		break;
   1425 	default:
   1426 		e->flags |= PVSCSI_FLAG_CMD_DIR_NONE;
   1427 		break;
   1428 	}
   1429 
   1430 	e->context = pvscsi_hcb_to_context(sc, hcb);
   1431 	hcb->e = e;
   1432 
   1433 	DEBUG_PRINTF(3, sc->dev,
   1434 	    " queuing command %02x context %llx\n", e->cdb[0],
   1435 	    (unsigned long long)e->context);
   1436 
   1437 	int flags;
   1438 	flags  = (xs->xs_control & XS_CTL_DATA_IN) ? BUS_DMA_READ : BUS_DMA_WRITE;
   1439 	flags |= (xs->xs_control & XS_CTL_NOSLEEP) ? BUS_DMA_NOWAIT : BUS_DMA_WAITOK;
   1440 
   1441 	int error = bus_dmamap_load(sc->sc_dmat, hcb->dma_map,
   1442 	    xs->data, xs->datalen, NULL, flags);
   1443 
   1444 	if (error) {
   1445 		if (error == ENOMEM || error == EAGAIN) {
   1446 			xs->error = XS_RESOURCE_SHORTAGE;
   1447 		} else {
   1448 			xs->error = XS_DRIVER_STUFFUP;
   1449 		}
   1450 		DEBUG_PRINTF(1, sc->dev,
   1451 		    "xs: %p load error %d data %p len %d",
   1452                     xs, error, xs->data, xs->datalen);
   1453 		goto error_load;
   1454 	}
   1455 
   1456 	int op = (xs->xs_control & XS_CTL_DATA_IN) ? BUS_DMASYNC_PREREAD :
   1457 	    BUS_DMASYNC_PREWRITE;
   1458 	int nseg = hcb->dma_map->dm_nsegs;
   1459 	bus_dma_segment_t *segs = hcb->dma_map->dm_segs;
   1460 	if (nseg != 0) {
   1461 		if (nseg > 1) {
   1462 			struct pvscsi_sg_element *sge;
   1463 
   1464 			KASSERTMSG(nseg <= PVSCSI_MAX_SG_ENTRIES_PER_SEGMENT,
   1465 			    "too many sg segments");
   1466 
   1467 			sge = hcb->sg_list->sge;
   1468 			e->flags |= PVSCSI_FLAG_CMD_WITH_SG_LIST;
   1469 
   1470 			for (size_t i = 0; i < nseg; ++i) {
   1471 				sge[i].addr = segs[i].ds_addr;
   1472 				sge[i].length = segs[i].ds_len;
   1473 				sge[i].flags = 0;
   1474 			}
   1475 
   1476 			e->data_addr = hcb->sg_list_paddr;
   1477 
   1478 			bus_dmamap_sync(sc->sc_dmat,
   1479 			    sc->sg_list_dma.map, hcb->sg_list_offset,
   1480 			    sizeof(*sge) * nseg, BUS_DMASYNC_PREWRITE);
   1481 		} else {
   1482 			e->data_addr = segs->ds_addr;
   1483 		}
   1484 
   1485 		bus_dmamap_sync(sc->sc_dmat, hcb->dma_map, 0,
   1486 		    xs->datalen, op);
   1487 	} else {
   1488 		e->data_addr = 0;
   1489 	}
   1490 
   1491 	/*
   1492 	 * Ensure request record writes happen before write to (increment of)
   1493 	 * req_prod_idx.
   1494 	 */
   1495 	PVSCSI_DMA_SYNC_RING(sc, ring_dma, ring, rridx, BUS_DMASYNC_PREWRITE);
   1496 
   1497 	uint8_t cdb0 = e->cdb[0];
   1498 
   1499 	/* handle timeout */
   1500 	if ((xs->xs_control & XS_CTL_POLL) == 0) {
   1501 		int timeout = mstohz(xs->timeout);
   1502 		/* start expire timer */
   1503 		if (timeout == 0)
   1504 			timeout = 1;
   1505 		callout_reset(&xs->xs_callout, timeout, pvscsi_timeout, hcb);
   1506 	}
   1507 
   1508 	PVSCSI_DMA_SYNC_STATE(sc, s_dma, s, req_prod_idx,
   1509 	    BUS_DMASYNC_POSTWRITE);
   1510 	s->req_prod_idx++;
   1511 
   1512 	/*
   1513 	 * Ensure req_prod_idx write (increment) happens before
   1514 	 * IO is kicked (via a write).
   1515 	 */
   1516 	PVSCSI_DMA_SYNC_STATE(sc, s_dma, s, req_prod_idx,
   1517 	    BUS_DMASYNC_PREWRITE);
   1518 
   1519 	pvscsi_kick_io(sc, cdb0);
   1520 	mutex_exit(&sc->lock);
   1521 
   1522 	return;
   1523 
   1524 error_load:
   1525 	pvscsi_hcb_put(sc, hcb);
   1526 
   1527 finish_xs:
   1528 	mutex_exit(&sc->lock);
   1529 	scsipi_done(xs);
   1530 }
   1531 
   1532 static void
   1533 pvscsi_free_interrupts(struct pvscsi_softc *sc)
   1534 {
   1535 
   1536 	if (sc->sc_ih != NULL) {
   1537 		pci_intr_disestablish(sc->sc_pc, sc->sc_ih);
   1538 		sc->sc_ih = NULL;
   1539 	}
   1540 	if (sc->sc_pihp != NULL) {
   1541 		pci_intr_release(sc->sc_pc, sc->sc_pihp, 1);
   1542 		sc->sc_pihp = NULL;
   1543 	}
   1544 }
   1545 
   1546 static int
   1547 pvscsi_setup_interrupts(struct pvscsi_softc *sc, const struct pci_attach_args *pa)
   1548 {
   1549 	int use_msix;
   1550 	int use_msi;
   1551 	int counts[PCI_INTR_TYPE_SIZE];
   1552 
   1553 	for (size_t i = 0; i < PCI_INTR_TYPE_SIZE; i++) {
   1554 		counts[i] = 1;
   1555 	}
   1556 
   1557 	use_msix = pvscsi_get_tunable(sc, "use_msix", pvscsi_use_msix);
   1558 	use_msi = pvscsi_get_tunable(sc, "use_msi", pvscsi_use_msi);
   1559 
   1560 	if (!use_msix) {
   1561 		counts[PCI_INTR_TYPE_MSIX] = 0;
   1562 	}
   1563 	if (!use_msi) {
   1564 		counts[PCI_INTR_TYPE_MSI] = 0;
   1565 	}
   1566 
   1567 	/* Allocate and establish the interrupt. */
   1568 	if (pci_intr_alloc(pa, &sc->sc_pihp, counts, PCI_INTR_TYPE_MSIX)) {
   1569 		aprint_error_dev(sc->dev, "can't allocate handler\n");
   1570 		goto fail;
   1571 	}
   1572 
   1573 	char intrbuf[PCI_INTRSTR_LEN];
   1574 	const pci_chipset_tag_t pc = pa->pa_pc;
   1575 	char const *intrstr = pci_intr_string(pc, sc->sc_pihp[0], intrbuf,
   1576 	    sizeof(intrbuf));
   1577 
   1578 	sc->sc_ih = pci_intr_establish_xname(pc, sc->sc_pihp[0], IPL_BIO,
   1579 	    pvscsi_intr, sc, device_xname(sc->dev));
   1580 	if (sc->sc_ih == NULL) {
   1581 		pci_intr_release(pc, sc->sc_pihp, 1);
   1582 		sc->sc_pihp = NULL;
   1583 		aprint_error_dev(sc->dev, "couldn't establish interrupt");
   1584 		if (intrstr != NULL)
   1585 			aprint_error(" at %s", intrstr);
   1586 		aprint_error("\n");
   1587 		goto fail;
   1588 	}
   1589 	pci_intr_setattr(pc, sc->sc_pihp, PCI_INTR_MPSAFE, true);
   1590 
   1591 	aprint_normal_dev(sc->dev, "interrupting at %s\n", intrstr);
   1592 
   1593 	return (0);
   1594 
   1595 fail:
   1596 	if (sc->sc_ih != NULL) {
   1597 		pci_intr_disestablish(sc->sc_pc, sc->sc_ih);
   1598 		sc->sc_ih = NULL;
   1599 	}
   1600 	if (sc->sc_pihp != NULL) {
   1601 		pci_intr_release(sc->sc_pc, sc->sc_pihp, 1);
   1602 		sc->sc_pihp = NULL;
   1603 	}
   1604 	if (sc->sc_mems) {
   1605 		bus_space_unmap(sc->sc_memt, sc->sc_memh, sc->sc_mems);
   1606 		sc->sc_mems = 0;
   1607 	}
   1608 
   1609 	return 1;
   1610 }
   1611 
   1612 static void
   1613 pvscsi_free_all(struct pvscsi_softc *sc)
   1614 {
   1615 
   1616 	pvscsi_dma_free_per_hcb(sc, sc->hcb_cnt);
   1617 
   1618 	if (sc->hcbs) {
   1619 		kmem_free(sc->hcbs, sc->hcb_cnt * sizeof(*sc->hcbs));
   1620 	}
   1621 
   1622 	pvscsi_free_rings(sc);
   1623 
   1624 	pvscsi_free_interrupts(sc);
   1625 
   1626 	if (sc->sc_mems) {
   1627 		bus_space_unmap(sc->sc_memt, sc->sc_memh, sc->sc_mems);
   1628 		sc->sc_mems = 0;
   1629 	}
   1630 }
   1631 
   1632 static inline void
   1633 pci_enable_busmaster(device_t dev, const pci_chipset_tag_t pc,
   1634     const pcitag_t tag)
   1635 {
   1636 	pcireg_t pci_cmd_word;
   1637 
   1638 	pci_cmd_word = pci_conf_read(pc, tag, PCI_COMMAND_STATUS_REG);
   1639 	if (!(pci_cmd_word & PCI_COMMAND_MASTER_ENABLE)) {
   1640 		pci_cmd_word |= PCI_COMMAND_MASTER_ENABLE;
   1641 		pci_conf_write(pc, tag, PCI_COMMAND_STATUS_REG, pci_cmd_word);
   1642 	}
   1643 }
   1644 
   1645 static void
   1646 pvscsi_attach(device_t parent, device_t dev, void *aux)
   1647 {
   1648 	const struct pci_attach_args *pa = aux;
   1649 	struct pvscsi_softc *sc;
   1650 	int rid;
   1651 	int error;
   1652 	int max_queue_depth;
   1653 	int adapter_queue_size;
   1654 
   1655 	sc = device_private(dev);
   1656 	sc->dev = dev;
   1657 
   1658 	struct scsipi_adapter *adapt = &sc->sc_adapter;
   1659 	struct scsipi_channel *chan = &sc->sc_channel;
   1660 
   1661 	mutex_init(&sc->lock, MUTEX_DEFAULT, IPL_BIO);
   1662 
   1663 	sc->sc_pc = pa->pa_pc;
   1664 	pci_enable_busmaster(dev, pa->pa_pc, pa->pa_tag);
   1665 
   1666 	pci_aprint_devinfo_fancy(pa, "virtual disk controller",
   1667 	    VMWARE_PVSCSI_DEVSTR, true);
   1668 
   1669 	/*
   1670 	 * Map the device.  All devices support memory-mapped acccess.
   1671 	 */
   1672 	bool memh_valid;
   1673 	bus_space_tag_t memt;
   1674 	bus_space_handle_t memh;
   1675 	bus_size_t mems;
   1676 	pcireg_t regt;
   1677 
   1678 	for (rid = PCI_MAPREG_START; rid < PCI_MAPREG_END; rid += sizeof(regt)) {
   1679 		regt = pci_mapreg_type(pa->pa_pc, pa->pa_tag, rid);
   1680 		if (PCI_MAPREG_TYPE(regt) == PCI_MAPREG_TYPE_MEM)
   1681 			break;
   1682 	}
   1683 
   1684 	if (rid >= PCI_MAPREG_END) {
   1685 		aprint_error_dev(dev,
   1686 		    "unable to locate device registers\n");
   1687 	}
   1688 
   1689 	memh_valid = (pci_mapreg_map(pa, rid, regt, 0, &memt, &memh,
   1690 	    NULL, &mems) == 0);
   1691 	if (!memh_valid) {
   1692 		aprint_error_dev(dev,
   1693 		    "unable to map device registers\n");
   1694 		return;
   1695 	}
   1696 	sc->sc_memt = memt;
   1697 	sc->sc_memh = memh;
   1698 	sc->sc_mems = mems;
   1699 
   1700 	if (pci_dma64_available(pa)) {
   1701 		sc->sc_dmat = pa->pa_dmat64;
   1702 		aprint_verbose_dev(sc->dev, "64-bit DMA\n");
   1703 	} else {
   1704 		aprint_verbose_dev(sc->dev, "32-bit DMA\n");
   1705 		sc->sc_dmat = pa->pa_dmat;
   1706 	}
   1707 
   1708 	error = pvscsi_setup_interrupts(sc, pa);
   1709 	if (error) {
   1710 		aprint_normal_dev(dev, "Interrupt setup failed\n");
   1711 		pvscsi_free_all(sc);
   1712 		return;
   1713 	}
   1714 
   1715 	sc->max_targets = pvscsi_get_max_targets(sc);
   1716 
   1717 	sc->use_msg = pvscsi_get_tunable(sc, "use_msg", pvscsi_use_msg) &&
   1718 	    pvscsi_hw_supports_msg(sc);
   1719 	sc->msg_ring_num_pages = sc->use_msg ? 1 : 0;
   1720 
   1721 	sc->req_ring_num_pages = pvscsi_get_tunable(sc, "request_ring_pages",
   1722 	    pvscsi_request_ring_pages);
   1723 	if (sc->req_ring_num_pages <= 0) {
   1724 		if (sc->max_targets <= 16) {
   1725 			sc->req_ring_num_pages =
   1726 			    PVSCSI_DEFAULT_NUM_PAGES_REQ_RING;
   1727 		} else {
   1728 			sc->req_ring_num_pages = PVSCSI_MAX_NUM_PAGES_REQ_RING;
   1729 		}
   1730 	} else if (sc->req_ring_num_pages > PVSCSI_MAX_NUM_PAGES_REQ_RING) {
   1731 		sc->req_ring_num_pages = PVSCSI_MAX_NUM_PAGES_REQ_RING;
   1732 	}
   1733 	sc->cmp_ring_num_pages = sc->req_ring_num_pages;
   1734 
   1735 	max_queue_depth = pvscsi_get_tunable(sc, "max_queue_depth",
   1736 	    pvscsi_max_queue_depth);
   1737 
   1738 	adapter_queue_size = (sc->req_ring_num_pages * PAGE_SIZE) /
   1739 	    sizeof(struct pvscsi_ring_req_desc);
   1740 	if (max_queue_depth > 0) {
   1741 		adapter_queue_size = MIN(adapter_queue_size, max_queue_depth);
   1742 	}
   1743 	adapter_queue_size = MIN(adapter_queue_size,
   1744 	    PVSCSI_MAX_REQ_QUEUE_DEPTH);
   1745 
   1746 	aprint_normal_dev(sc->dev, "Use Msg: %d\n", sc->use_msg);
   1747 	aprint_normal_dev(sc->dev, "Max targets: %d\n", sc->max_targets);
   1748 	aprint_normal_dev(sc->dev, "REQ num pages: %d\n", sc->req_ring_num_pages);
   1749 	aprint_normal_dev(sc->dev, "CMP num pages: %d\n", sc->cmp_ring_num_pages);
   1750 	aprint_normal_dev(sc->dev, "MSG num pages: %d\n", sc->msg_ring_num_pages);
   1751 	aprint_normal_dev(sc->dev, "Queue size: %d\n", adapter_queue_size);
   1752 
   1753 	if (pvscsi_allocate_rings(sc)) {
   1754 		aprint_normal_dev(dev, "ring allocation failed\n");
   1755 		pvscsi_free_all(sc);
   1756 		return;
   1757 	}
   1758 
   1759 	sc->hcb_cnt = adapter_queue_size;
   1760 	sc->hcbs = kmem_zalloc(sc->hcb_cnt * sizeof(*sc->hcbs), KM_SLEEP);
   1761 
   1762 	if (pvscsi_dma_alloc_per_hcb(sc)) {
   1763 		aprint_normal_dev(dev, "error allocating per hcb dma memory\n");
   1764 		pvscsi_free_all(sc);
   1765 		return;
   1766 	}
   1767 
   1768 	pvscsi_adapter_reset(sc);
   1769 
   1770 	/*
   1771 	 * Fill in the scsipi_adapter.
   1772 	 */
   1773 	memset(adapt, 0, sizeof(*adapt));
   1774 	adapt->adapt_dev = sc->dev;
   1775 	adapt->adapt_nchannels = 1;
   1776 	adapt->adapt_openings = MIN(adapter_queue_size, PVSCSI_CMD_PER_LUN);
   1777 	adapt->adapt_max_periph = adapt->adapt_openings;
   1778 	adapt->adapt_request = pvscsi_scsipi_request;
   1779 	adapt->adapt_minphys = minphys;
   1780 
   1781 	/*
   1782 	 * Fill in the scsipi_channel.
   1783 	 */
   1784 	memset(chan, 0, sizeof(*chan));
   1785 	chan->chan_adapter = adapt;
   1786 	chan->chan_bustype = &scsi_bustype;
   1787 	chan->chan_channel = 0;
   1788 	chan->chan_ntargets = MIN(PVSCSI_MAX_TARGET, 16);	/* cap reasonably */
   1789 	chan->chan_nluns = MIN(PVSCSI_MAX_LUN, 1024);		/* cap reasonably */
   1790 	chan->chan_id = PVSCSI_MAX_TARGET;
   1791 	chan->chan_flags = SCSIPI_CHAN_NOSETTLE;
   1792 
   1793 	pvscsi_setup_rings(sc);
   1794 	if (sc->use_msg) {
   1795 		pvscsi_setup_msg_ring(sc);
   1796 	}
   1797 
   1798 	sc->use_req_call_threshold = pvscsi_setup_req_call(sc, 1);
   1799 
   1800 	pvscsi_intr_enable(sc);
   1801 
   1802 	sc->sc_scsibus_dv = config_found(sc->dev, &sc->sc_channel, scsiprint,
   1803 	    CFARGS_NONE);
   1804 
   1805 	return;
   1806 }
   1807 
   1808 static int
   1809 pvscsi_detach(device_t dev, int flags)
   1810 {
   1811 	struct pvscsi_softc *sc;
   1812 
   1813 	sc = device_private(dev);
   1814 
   1815 	pvscsi_intr_disable(sc);
   1816 	pvscsi_adapter_reset(sc);
   1817 
   1818 	pvscsi_free_all(sc);
   1819 
   1820 	mutex_destroy(&sc->lock);
   1821 
   1822 	return (0);
   1823 }
   1824