Home | History | Annotate | Line # | Download | only in pci
nvme_pci.c revision 1.10
      1 /*	$NetBSD: nvme_pci.c,v 1.10 2016/09/17 12:58:51 jdolecek Exp $	*/
      2 /*	$OpenBSD: nvme_pci.c,v 1.3 2016/04/14 11:18:32 dlg Exp $ */
      3 
      4 /*
      5  * Copyright (c) 2014 David Gwynne <dlg (at) openbsd.org>
      6  *
      7  * Permission to use, copy, modify, and distribute this software for any
      8  * purpose with or without fee is hereby granted, provided that the above
      9  * copyright notice and this permission notice appear in all copies.
     10  *
     11  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
     12  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
     13  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
     14  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
     15  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
     16  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
     17  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
     18  */
     19 
     20 /*-
     21  * Copyright (C) 2016 NONAKA Kimihiro <nonaka (at) netbsd.org>
     22  * All rights reserved.
     23  *
     24  * Redistribution and use in source and binary forms, with or without
     25  * modification, are permitted provided that the following conditions
     26  * are met:
     27  * 1. Redistributions of source code must retain the above copyright
     28  *    notice, this list of conditions and the following disclaimer.
     29  * 2. Redistributions in binary form must reproduce the above copyright
     30  *    notice, this list of conditions and the following disclaimer in the
     31  *    documentation and/or other materials provided with the distribution.
     32  *
     33  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     34  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     35  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     36  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
     37  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
     38  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     39  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     40  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     41  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
     42  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     43  */
     44 
     45 #include <sys/cdefs.h>
     46 __KERNEL_RCSID(0, "$NetBSD: nvme_pci.c,v 1.10 2016/09/17 12:58:51 jdolecek Exp $");
     47 
     48 #include <sys/param.h>
     49 #include <sys/systm.h>
     50 #include <sys/kernel.h>
     51 #include <sys/device.h>
     52 #include <sys/bitops.h>
     53 #include <sys/bus.h>
     54 #include <sys/cpu.h>
     55 #include <sys/interrupt.h>
     56 #include <sys/kmem.h>
     57 #include <sys/pmf.h>
     58 #include <sys/module.h>
     59 
     60 #include <dev/pci/pcireg.h>
     61 #include <dev/pci/pcivar.h>
     62 
     63 #include <dev/ic/nvmereg.h>
     64 #include <dev/ic/nvmevar.h>
     65 
     66 int nvme_pci_force_intx = 0;
     67 int nvme_pci_mpsafe = 0;
     68 int nvme_pci_mq = 1;		/* INTx: ioq=1, MSI/MSI-X: ioq=ncpu */
     69 
     70 #define NVME_PCI_BAR		0x10
     71 
     72 struct nvme_pci_softc {
     73 	struct nvme_softc	psc_nvme;
     74 
     75 	pci_chipset_tag_t	psc_pc;
     76 	pci_intr_handle_t	*psc_intrs;
     77 	int			psc_nintrs;
     78 };
     79 
     80 static int	nvme_pci_match(device_t, cfdata_t, void *);
     81 static void	nvme_pci_attach(device_t, device_t, void *);
     82 static int	nvme_pci_detach(device_t, int);
     83 
     84 CFATTACH_DECL3_NEW(nvme_pci, sizeof(struct nvme_pci_softc),
     85     nvme_pci_match, nvme_pci_attach, nvme_pci_detach, NULL, NULL,
     86     nvme_childdet, DVF_DETACH_SHUTDOWN);
     87 
     88 static int	nvme_pci_intr_establish(struct nvme_softc *,
     89 		    uint16_t, struct nvme_queue *);
     90 static int	nvme_pci_intr_disestablish(struct nvme_softc *, uint16_t);
     91 static int	nvme_pci_setup_intr(struct pci_attach_args *,
     92 		    struct nvme_pci_softc *);
     93 
     94 static int
     95 nvme_pci_match(device_t parent, cfdata_t match, void *aux)
     96 {
     97 	struct pci_attach_args *pa = aux;
     98 
     99 	if (PCI_CLASS(pa->pa_class) == PCI_CLASS_MASS_STORAGE &&
    100 	    PCI_SUBCLASS(pa->pa_class) == PCI_SUBCLASS_MASS_STORAGE_NVM &&
    101 	    PCI_INTERFACE(pa->pa_class) == PCI_INTERFACE_NVM_NVME)
    102 		return 1;
    103 
    104 	return 0;
    105 }
    106 
    107 static void
    108 nvme_pci_attach(device_t parent, device_t self, void *aux)
    109 {
    110 	struct nvme_pci_softc *psc = device_private(self);
    111 	struct nvme_softc *sc = &psc->psc_nvme;
    112 	struct pci_attach_args *pa = aux;
    113 	pcireg_t memtype, reg;
    114 	bus_addr_t memaddr;
    115 	int flags, msixoff;
    116 	int error;
    117 
    118 	sc->sc_dev = self;
    119 	psc->psc_pc = pa->pa_pc;
    120 	if (pci_dma64_available(pa))
    121 		sc->sc_dmat = pa->pa_dmat64;
    122 	else
    123 		sc->sc_dmat = pa->pa_dmat;
    124 
    125 	pci_aprint_devinfo(pa, NULL);
    126 
    127 	reg = pci_conf_read(pa->pa_pc, pa->pa_tag, PCI_COMMAND_STATUS_REG);
    128 	if ((reg & PCI_COMMAND_MASTER_ENABLE) == 0) {
    129 		reg |= PCI_COMMAND_MASTER_ENABLE;
    130         	pci_conf_write(pa->pa_pc, pa->pa_tag, PCI_COMMAND_STATUS_REG, reg);
    131 	}
    132 
    133 	/* Map registers */
    134 	memtype = pci_mapreg_type(pa->pa_pc, pa->pa_tag, NVME_PCI_BAR);
    135 	if (PCI_MAPREG_TYPE(memtype) != PCI_MAPREG_TYPE_MEM) {
    136 		aprint_error_dev(self, "invalid type (type=0x%x)\n", memtype);
    137 		return;
    138 	}
    139 	sc->sc_iot = pa->pa_memt;
    140 	error = pci_mapreg_info(pa->pa_pc, pa->pa_tag, PCI_MAPREG_START,
    141 	    memtype, &memaddr, &sc->sc_ios, &flags);
    142 	if (error) {
    143 		aprint_error_dev(self, "can't get map info\n");
    144 		return;
    145 	}
    146 	if (pci_get_capability(pa->pa_pc, pa->pa_tag, PCI_CAP_MSIX, &msixoff,
    147 	    NULL)) {
    148 		pcireg_t msixtbl;
    149 		uint32_t table_offset;
    150 		int bir;
    151 
    152 		msixtbl = pci_conf_read(pa->pa_pc, pa->pa_tag,
    153 		    msixoff + PCI_MSIX_TBLOFFSET);
    154 		table_offset = msixtbl & PCI_MSIX_TBLOFFSET_MASK;
    155 		bir = msixtbl & PCI_MSIX_PBABIR_MASK;
    156 		if (bir == 0) {
    157 			sc->sc_ios = table_offset;
    158 		}
    159 	}
    160 	error = bus_space_map(sc->sc_iot, memaddr, sc->sc_ios, flags,
    161 	    &sc->sc_ioh);
    162 	if (error != 0) {
    163 		aprint_error_dev(self, "can't map mem space (error=%d)\n",
    164 		    error);
    165 		return;
    166 	}
    167 
    168 	/* Establish interrupts */
    169 	if (nvme_pci_setup_intr(pa, psc) != 0) {
    170 		aprint_error_dev(self, "unable to allocate interrupt\n");
    171 		goto unmap;
    172 	}
    173 	sc->sc_intr_establish = nvme_pci_intr_establish;
    174 	sc->sc_intr_disestablish = nvme_pci_intr_disestablish;
    175 
    176 	sc->sc_ih = kmem_zalloc(sizeof(*sc->sc_ih) * psc->psc_nintrs, KM_SLEEP);
    177 	if (sc->sc_ih == NULL) {
    178 		aprint_error_dev(self, "unable to allocate ih memory\n");
    179 		goto intr_release;
    180 	}
    181 
    182 	if (nvme_attach(sc) != 0) {
    183 		/* error printed by nvme_attach() */
    184 		goto intr_free;
    185 	}
    186 
    187 	if (!pmf_device_register(self, NULL, NULL))
    188 		aprint_error_dev(self, "couldn't establish power handler\n");
    189 
    190 	SET(sc->sc_flags, NVME_F_ATTACHED);
    191 	return;
    192 
    193 intr_free:
    194 	kmem_free(sc->sc_ih, sizeof(*sc->sc_ih) * psc->psc_nintrs);
    195 	sc->sc_nq = 0;
    196 intr_release:
    197 	pci_intr_release(pa->pa_pc, psc->psc_intrs, psc->psc_nintrs);
    198 	psc->psc_nintrs = 0;
    199 unmap:
    200 	bus_space_unmap(sc->sc_iot, sc->sc_ioh, sc->sc_ios);
    201 	sc->sc_ios = 0;
    202 }
    203 
    204 static int
    205 nvme_pci_detach(device_t self, int flags)
    206 {
    207 	struct nvme_pci_softc *psc = device_private(self);
    208 	struct nvme_softc *sc = &psc->psc_nvme;
    209 	int error;
    210 
    211 	if (!ISSET(sc->sc_flags, NVME_F_ATTACHED))
    212 		return 0;
    213 
    214 	error = nvme_detach(sc, flags);
    215 	if (error)
    216 		return error;
    217 
    218 	kmem_free(sc->sc_ih, sizeof(*sc->sc_ih) * psc->psc_nintrs);
    219 	pci_intr_release(psc->psc_pc, psc->psc_intrs, psc->psc_nintrs);
    220 	bus_space_unmap(sc->sc_iot, sc->sc_ioh, sc->sc_ios);
    221 	return 0;
    222 }
    223 
    224 static int
    225 nvme_pci_intr_establish(struct nvme_softc *sc, uint16_t qid,
    226     struct nvme_queue *q)
    227 {
    228 	struct nvme_pci_softc *psc = (struct nvme_pci_softc *)sc;
    229 	char intr_xname[INTRDEVNAMEBUF];
    230 	char intrbuf[PCI_INTRSTR_LEN];
    231 	const char *intrstr = NULL;
    232 	int (*ih_func)(void *);
    233 	void *ih_arg;
    234 	kcpuset_t *affinity;
    235 	cpuid_t affinity_to;
    236 	int error;
    237 
    238 	if (!sc->sc_use_mq && qid > 0)
    239 		return 0;
    240 
    241 	KASSERT(sc->sc_ih[qid] == NULL);
    242 
    243 	if (nvme_pci_mpsafe) {
    244 		pci_intr_setattr(psc->psc_pc, &psc->psc_intrs[qid],
    245 		    PCI_INTR_MPSAFE, true);
    246 	}
    247 	if (!sc->sc_use_mq) {
    248 		snprintf(intr_xname, sizeof(intr_xname), "%s",
    249 		    device_xname(sc->sc_dev));
    250 		ih_arg = sc;
    251 		ih_func = nvme_intr;
    252 	} else {
    253 		if (qid == 0) {
    254 			snprintf(intr_xname, sizeof(intr_xname), "%s adminq",
    255 			    device_xname(sc->sc_dev));
    256 		} else {
    257 			snprintf(intr_xname, sizeof(intr_xname), "%s ioq%d",
    258 			    device_xname(sc->sc_dev), qid);
    259 		}
    260 		ih_arg = q;
    261 		if (pci_intr_type(psc->psc_pc, psc->psc_intrs[qid])
    262 		    == PCI_INTR_TYPE_MSIX)
    263 			ih_func = nvme_mq_msix_intr;
    264 		else
    265 			ih_func = nvme_mq_msi_intr;
    266 	}
    267 	sc->sc_ih[qid] = pci_intr_establish_xname(psc->psc_pc,
    268 	    psc->psc_intrs[qid], IPL_BIO, ih_func, ih_arg, intr_xname);
    269 	if (sc->sc_ih[qid] == NULL) {
    270 		aprint_error_dev(sc->sc_dev,
    271 		    "unable to establish %s interrupt\n", intr_xname);
    272 		return 1;
    273 	}
    274 	intrstr = pci_intr_string(psc->psc_pc, psc->psc_intrs[qid], intrbuf,
    275 	    sizeof(intrbuf));
    276 	if (!sc->sc_use_mq) {
    277 		aprint_normal_dev(sc->sc_dev, "interrupting at %s\n", intrstr);
    278 	} else if (qid == NVME_ADMIN_Q) {
    279 		aprint_normal_dev(sc->sc_dev,
    280 		    "for admin queue interrupting at %s\n", intrstr);
    281 	} else if (!nvme_pci_mpsafe) {
    282 		aprint_normal_dev(sc->sc_dev,
    283 		    "for io queue %d interrupting at %s\n", qid, intrstr);
    284 	} else {
    285 		kcpuset_create(&affinity, true);
    286 		affinity_to = (qid - 1) % ncpu;
    287 		kcpuset_set(affinity, affinity_to);
    288 		error = interrupt_distribute(sc->sc_ih[qid], affinity, NULL);
    289 		kcpuset_destroy(affinity);
    290 		aprint_normal_dev(sc->sc_dev,
    291 		    "for io queue %d interrupting at %s", qid, intrstr);
    292 		if (error == 0)
    293 			aprint_normal(" affinity to cpu%lu", affinity_to);
    294 		aprint_normal("\n");
    295 	}
    296 	return 0;
    297 }
    298 
    299 static int
    300 nvme_pci_intr_disestablish(struct nvme_softc *sc, uint16_t qid)
    301 {
    302 	struct nvme_pci_softc *psc = (struct nvme_pci_softc *)sc;
    303 
    304 	if (!sc->sc_use_mq && qid > 0)
    305 		return 0;
    306 
    307 	KASSERT(sc->sc_ih[qid] != NULL);
    308 
    309 	pci_intr_disestablish(psc->psc_pc, sc->sc_ih[qid]);
    310 	sc->sc_ih[qid] = NULL;
    311 
    312 	return 0;
    313 }
    314 
    315 static int
    316 nvme_pci_setup_intr(struct pci_attach_args *pa, struct nvme_pci_softc *psc)
    317 {
    318 	struct nvme_softc *sc = &psc->psc_nvme;
    319 	pci_intr_handle_t *ihps;
    320 	int counts[PCI_INTR_TYPE_SIZE], alloced_counts[PCI_INTR_TYPE_SIZE];
    321 	int max_type, intr_type;
    322 	int error;
    323 
    324 	if (nvme_pci_force_intx) {
    325 		max_type = PCI_INTR_TYPE_INTX;
    326 		goto force_intx;
    327 	}
    328 
    329 	/* MSI-X */
    330 	max_type = PCI_INTR_TYPE_MSIX;
    331 	counts[PCI_INTR_TYPE_MSIX] = min(pci_msix_count(pa->pa_pc, pa->pa_tag),
    332 	    ncpu + 1);
    333 	if (counts[PCI_INTR_TYPE_MSIX] > 0) {
    334 		memset(alloced_counts, 0, sizeof(alloced_counts));
    335 		alloced_counts[PCI_INTR_TYPE_MSIX] = counts[PCI_INTR_TYPE_MSIX];
    336 		if (pci_intr_alloc(pa, &ihps, alloced_counts,
    337 		    PCI_INTR_TYPE_MSIX)) {
    338 			counts[PCI_INTR_TYPE_MSIX] = 0;
    339 		} else {
    340 			counts[PCI_INTR_TYPE_MSIX] =
    341 			    alloced_counts[PCI_INTR_TYPE_MSIX];
    342 			pci_intr_release(pa->pa_pc, ihps,
    343 			    alloced_counts[PCI_INTR_TYPE_MSIX]);
    344 		}
    345 	}
    346 	if (counts[PCI_INTR_TYPE_MSIX] < 2) {
    347 		counts[PCI_INTR_TYPE_MSIX] = 0;
    348 		max_type = PCI_INTR_TYPE_MSI;
    349 	} else if (!nvme_pci_mq || !nvme_pci_mpsafe) {
    350 		counts[PCI_INTR_TYPE_MSIX] = 2;	/* adminq + 1 ioq */
    351 	}
    352 
    353 retry_msi:
    354 	/* MSI */
    355 	counts[PCI_INTR_TYPE_MSI] = pci_msi_count(pa->pa_pc, pa->pa_tag);
    356 	if (counts[PCI_INTR_TYPE_MSI] > 0) {
    357 		while (counts[PCI_INTR_TYPE_MSI] > ncpu + 1) {
    358 			if (counts[PCI_INTR_TYPE_MSI] / 2 <= ncpu + 1)
    359 				break;
    360 			counts[PCI_INTR_TYPE_MSI] /= 2;
    361 		}
    362 		memset(alloced_counts, 0, sizeof(alloced_counts));
    363 		alloced_counts[PCI_INTR_TYPE_MSI] = counts[PCI_INTR_TYPE_MSI];
    364 		if (pci_intr_alloc(pa, &ihps, alloced_counts,
    365 		    PCI_INTR_TYPE_MSI)) {
    366 			counts[PCI_INTR_TYPE_MSI] = 0;
    367 		} else {
    368 			counts[PCI_INTR_TYPE_MSI] =
    369 			    alloced_counts[PCI_INTR_TYPE_MSI];
    370 			pci_intr_release(pa->pa_pc, ihps,
    371 			    alloced_counts[PCI_INTR_TYPE_MSI]);
    372 		}
    373 	}
    374 	if (counts[PCI_INTR_TYPE_MSI] < 1) {
    375 		counts[PCI_INTR_TYPE_MSI] = 0;
    376 		if (max_type == PCI_INTR_TYPE_MSI)
    377 			max_type = PCI_INTR_TYPE_INTX;
    378 	} else if (!nvme_pci_mq || !nvme_pci_mpsafe) {
    379 		if (counts[PCI_INTR_TYPE_MSI] > 2)
    380 			counts[PCI_INTR_TYPE_MSI] = 2;	/* adminq + 1 ioq */
    381 	}
    382 
    383 force_intx:
    384 	/* INTx */
    385 	counts[PCI_INTR_TYPE_INTX] = 1;
    386 
    387 	memcpy(alloced_counts, counts, sizeof(counts));
    388 	error = pci_intr_alloc(pa, &ihps, alloced_counts, max_type);
    389 	if (error) {
    390 		if (max_type != PCI_INTR_TYPE_INTX) {
    391 retry:
    392 			memset(counts, 0, sizeof(counts));
    393 			if (max_type == PCI_INTR_TYPE_MSIX) {
    394 				max_type = PCI_INTR_TYPE_MSI;
    395 				goto retry_msi;
    396 			} else {
    397 				max_type = PCI_INTR_TYPE_INTX;
    398 				goto force_intx;
    399 			}
    400 		}
    401 		return error;
    402 	}
    403 
    404 	intr_type = pci_intr_type(pa->pa_pc, ihps[0]);
    405 	if (alloced_counts[intr_type] < counts[intr_type]) {
    406 		if (intr_type != PCI_INTR_TYPE_INTX) {
    407 			pci_intr_release(pa->pa_pc, ihps,
    408 			    alloced_counts[intr_type]);
    409 			max_type = intr_type;
    410 			goto retry;
    411 		}
    412 		return EBUSY;
    413 	}
    414 
    415 	psc->psc_intrs = ihps;
    416 	psc->psc_nintrs = alloced_counts[intr_type];
    417 	if (intr_type == PCI_INTR_TYPE_MSI) {
    418 		if (alloced_counts[intr_type] > ncpu + 1)
    419 			alloced_counts[intr_type] = ncpu + 1;
    420 	}
    421 	sc->sc_use_mq = alloced_counts[intr_type] > 1;
    422 	sc->sc_nq = sc->sc_use_mq ? alloced_counts[intr_type] - 1 : 1;
    423 	return 0;
    424 }
    425 
    426 MODULE(MODULE_CLASS_DRIVER, nvme, "pci,dk_subr");
    427 
    428 #ifdef _MODULE
    429 #include "ioconf.c"
    430 
    431 extern const struct bdevsw ld_bdevsw;
    432 extern const struct cdevsw ld_cdevsw;
    433 #endif
    434 
    435 static int
    436 nvme_modcmd(modcmd_t cmd, void *opaque)
    437 {
    438 #ifdef _MODULE
    439 	devmajor_t cmajor, bmajor;
    440 #endif
    441 	int error = 0;
    442 
    443 	switch (cmd) {
    444 	case MODULE_CMD_INIT:
    445 #ifdef _MODULE
    446 		/* devsw must be done before configuring the actual device,
    447 		 * otherwise ldattach() fails
    448 		 */
    449 		bmajor = cmajor = NODEVMAJOR;
    450 		error = devsw_attach(ld_cd.cd_name, &ld_bdevsw, &bmajor,
    451 		    &ld_cdevsw, &cmajor);
    452 		if (error) {
    453 			aprint_error("%s: unable to register devsw\n",
    454 			    ld_cd.cd_name);
    455 			return error;
    456 		}
    457 
    458 		error = config_init_component(cfdriver_ioconf_nvme_pci,
    459 		    cfattach_ioconf_nvme_pci, cfdata_ioconf_nvme_pci);
    460 		if (error)
    461 			return error;
    462 
    463 #endif
    464 		return error;
    465 	case MODULE_CMD_FINI:
    466 #ifdef _MODULE
    467 		error = config_fini_component(cfdriver_ioconf_nvme_pci,
    468 		    cfattach_ioconf_nvme_pci, cfdata_ioconf_nvme_pci);
    469 		if (error)
    470 			return error;
    471 
    472 		devsw_detach(&ld_bdevsw, &ld_cdevsw);
    473 #endif
    474 		return error;
    475 	default:
    476 		return ENOTTY;
    477 	}
    478 }
    479