nvme_pci.c revision 1.13 1 /* $NetBSD: nvme_pci.c,v 1.13 2016/09/18 11:58:35 jdolecek Exp $ */
2 /* $OpenBSD: nvme_pci.c,v 1.3 2016/04/14 11:18:32 dlg Exp $ */
3
4 /*
5 * Copyright (c) 2014 David Gwynne <dlg (at) openbsd.org>
6 *
7 * Permission to use, copy, modify, and distribute this software for any
8 * purpose with or without fee is hereby granted, provided that the above
9 * copyright notice and this permission notice appear in all copies.
10 *
11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18 */
19
20 /*-
21 * Copyright (C) 2016 NONAKA Kimihiro <nonaka (at) netbsd.org>
22 * All rights reserved.
23 *
24 * Redistribution and use in source and binary forms, with or without
25 * modification, are permitted provided that the following conditions
26 * are met:
27 * 1. Redistributions of source code must retain the above copyright
28 * notice, this list of conditions and the following disclaimer.
29 * 2. Redistributions in binary form must reproduce the above copyright
30 * notice, this list of conditions and the following disclaimer in the
31 * documentation and/or other materials provided with the distribution.
32 *
33 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
34 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
35 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
36 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
37 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
38 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
39 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
40 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
41 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
42 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43 */
44
45 #include <sys/cdefs.h>
46 __KERNEL_RCSID(0, "$NetBSD: nvme_pci.c,v 1.13 2016/09/18 11:58:35 jdolecek Exp $");
47
48 #include <sys/param.h>
49 #include <sys/systm.h>
50 #include <sys/kernel.h>
51 #include <sys/device.h>
52 #include <sys/bitops.h>
53 #include <sys/bus.h>
54 #include <sys/cpu.h>
55 #include <sys/interrupt.h>
56 #include <sys/kmem.h>
57 #include <sys/pmf.h>
58 #include <sys/module.h>
59
60 #include <dev/pci/pcireg.h>
61 #include <dev/pci/pcivar.h>
62
63 #include <dev/ic/nvmereg.h>
64 #include <dev/ic/nvmevar.h>
65
66 int nvme_pci_force_intx = 0;
67 int nvme_pci_mpsafe = 0;
68 int nvme_pci_mq = 1; /* INTx: ioq=1, MSI/MSI-X: ioq=ncpu */
69
70 #define NVME_PCI_BAR 0x10
71
72 #ifndef __HAVE_PCI_MSI_MSIX
73 #define pci_intr_release(pc, intrs, nintrs) \
74 kmem_free(intrs, sizeof(*intrs) * nintrs)
75 #define pci_intr_establish_xname(pc, ih, level, intrhand, intrarg, xname) \
76 pci_intr_establish(pc, ih, level, intrhand, intrarg)
77 #endif
78
79 struct nvme_pci_softc {
80 struct nvme_softc psc_nvme;
81
82 pci_chipset_tag_t psc_pc;
83 pci_intr_handle_t *psc_intrs;
84 int psc_nintrs;
85 };
86
87 static int nvme_pci_match(device_t, cfdata_t, void *);
88 static void nvme_pci_attach(device_t, device_t, void *);
89 static int nvme_pci_detach(device_t, int);
90
91 CFATTACH_DECL3_NEW(nvme_pci, sizeof(struct nvme_pci_softc),
92 nvme_pci_match, nvme_pci_attach, nvme_pci_detach, NULL, NULL,
93 nvme_childdet, DVF_DETACH_SHUTDOWN);
94
95 static int nvme_pci_intr_establish(struct nvme_softc *,
96 uint16_t, struct nvme_queue *);
97 static int nvme_pci_intr_disestablish(struct nvme_softc *, uint16_t);
98 static int nvme_pci_setup_intr(struct pci_attach_args *,
99 struct nvme_pci_softc *);
100
101 static int
102 nvme_pci_match(device_t parent, cfdata_t match, void *aux)
103 {
104 struct pci_attach_args *pa = aux;
105
106 if (PCI_CLASS(pa->pa_class) == PCI_CLASS_MASS_STORAGE &&
107 PCI_SUBCLASS(pa->pa_class) == PCI_SUBCLASS_MASS_STORAGE_NVM &&
108 PCI_INTERFACE(pa->pa_class) == PCI_INTERFACE_NVM_NVME)
109 return 1;
110
111 return 0;
112 }
113
114 static void
115 nvme_pci_attach(device_t parent, device_t self, void *aux)
116 {
117 struct nvme_pci_softc *psc = device_private(self);
118 struct nvme_softc *sc = &psc->psc_nvme;
119 struct pci_attach_args *pa = aux;
120 pcireg_t memtype, reg;
121 bus_addr_t memaddr;
122 int flags, error;
123 #ifdef __HAVE_PCI_MSI_MSIX
124 int msixoff;
125 #endif
126
127 sc->sc_dev = self;
128 psc->psc_pc = pa->pa_pc;
129 if (pci_dma64_available(pa))
130 sc->sc_dmat = pa->pa_dmat64;
131 else
132 sc->sc_dmat = pa->pa_dmat;
133
134 pci_aprint_devinfo(pa, NULL);
135
136 reg = pci_conf_read(pa->pa_pc, pa->pa_tag, PCI_COMMAND_STATUS_REG);
137 if ((reg & PCI_COMMAND_MASTER_ENABLE) == 0) {
138 reg |= PCI_COMMAND_MASTER_ENABLE;
139 pci_conf_write(pa->pa_pc, pa->pa_tag, PCI_COMMAND_STATUS_REG, reg);
140 }
141
142 /* Map registers */
143 memtype = pci_mapreg_type(pa->pa_pc, pa->pa_tag, NVME_PCI_BAR);
144 if (PCI_MAPREG_TYPE(memtype) != PCI_MAPREG_TYPE_MEM) {
145 aprint_error_dev(self, "invalid type (type=0x%x)\n", memtype);
146 return;
147 }
148 sc->sc_iot = pa->pa_memt;
149 error = pci_mapreg_info(pa->pa_pc, pa->pa_tag, PCI_MAPREG_START,
150 memtype, &memaddr, &sc->sc_ios, &flags);
151 if (error) {
152 aprint_error_dev(self, "can't get map info\n");
153 return;
154 }
155
156 #ifdef __HAVE_PCI_MSI_MSIX
157 if (pci_get_capability(pa->pa_pc, pa->pa_tag, PCI_CAP_MSIX, &msixoff,
158 NULL)) {
159 pcireg_t msixtbl;
160 uint32_t table_offset;
161 int bir;
162
163 msixtbl = pci_conf_read(pa->pa_pc, pa->pa_tag,
164 msixoff + PCI_MSIX_TBLOFFSET);
165 table_offset = msixtbl & PCI_MSIX_TBLOFFSET_MASK;
166 bir = msixtbl & PCI_MSIX_PBABIR_MASK;
167 if (bir == 0) {
168 sc->sc_ios = table_offset;
169 }
170 }
171 #endif /* __HAVE_PCI_MSI_MSIX */
172
173 error = bus_space_map(sc->sc_iot, memaddr, sc->sc_ios, flags,
174 &sc->sc_ioh);
175 if (error != 0) {
176 aprint_error_dev(self, "can't map mem space (error=%d)\n",
177 error);
178 return;
179 }
180
181 /* Establish interrupts */
182 if (nvme_pci_setup_intr(pa, psc) != 0) {
183 aprint_error_dev(self, "unable to allocate interrupt\n");
184 goto unmap;
185 }
186 sc->sc_intr_establish = nvme_pci_intr_establish;
187 sc->sc_intr_disestablish = nvme_pci_intr_disestablish;
188
189 sc->sc_ih = kmem_zalloc(sizeof(*sc->sc_ih) * psc->psc_nintrs, KM_SLEEP);
190 if (sc->sc_ih == NULL) {
191 aprint_error_dev(self, "unable to allocate ih memory\n");
192 goto intr_release;
193 }
194
195 if (nvme_attach(sc) != 0) {
196 /* error printed by nvme_attach() */
197 goto intr_free;
198 }
199
200 if (!pmf_device_register(self, NULL, NULL))
201 aprint_error_dev(self, "couldn't establish power handler\n");
202
203 SET(sc->sc_flags, NVME_F_ATTACHED);
204 return;
205
206 intr_free:
207 kmem_free(sc->sc_ih, sizeof(*sc->sc_ih) * psc->psc_nintrs);
208 sc->sc_nq = 0;
209 intr_release:
210 pci_intr_release(pa->pa_pc, psc->psc_intrs, psc->psc_nintrs);
211 psc->psc_nintrs = 0;
212 unmap:
213 bus_space_unmap(sc->sc_iot, sc->sc_ioh, sc->sc_ios);
214 sc->sc_ios = 0;
215 }
216
217 static int
218 nvme_pci_detach(device_t self, int flags)
219 {
220 struct nvme_pci_softc *psc = device_private(self);
221 struct nvme_softc *sc = &psc->psc_nvme;
222 int error;
223
224 if (!ISSET(sc->sc_flags, NVME_F_ATTACHED))
225 return 0;
226
227 error = nvme_detach(sc, flags);
228 if (error)
229 return error;
230
231 kmem_free(sc->sc_ih, sizeof(*sc->sc_ih) * psc->psc_nintrs);
232 pci_intr_release(psc->psc_pc, psc->psc_intrs, psc->psc_nintrs);
233 bus_space_unmap(sc->sc_iot, sc->sc_ioh, sc->sc_ios);
234 return 0;
235 }
236
237 static int
238 nvme_pci_intr_establish(struct nvme_softc *sc, uint16_t qid,
239 struct nvme_queue *q)
240 {
241 struct nvme_pci_softc *psc = (struct nvme_pci_softc *)sc;
242 char intr_xname[INTRDEVNAMEBUF];
243 char intrbuf[PCI_INTRSTR_LEN];
244 const char *intrstr = NULL;
245 int (*ih_func)(void *);
246 void *ih_arg;
247 #ifdef __HAVE_PCI_MSI_MSIX
248 int error;
249 #endif
250
251 KASSERT(sc->sc_use_mq || qid == NVME_ADMIN_Q);
252 KASSERT(sc->sc_ih[qid] == NULL);
253
254 if (nvme_pci_mpsafe) {
255 pci_intr_setattr(psc->psc_pc, &psc->psc_intrs[qid],
256 PCI_INTR_MPSAFE, true);
257 }
258
259 #ifdef __HAVE_PCI_MSI_MSIX
260 if (!sc->sc_use_mq) {
261 #endif
262 snprintf(intr_xname, sizeof(intr_xname), "%s",
263 device_xname(sc->sc_dev));
264 ih_arg = sc;
265 ih_func = nvme_intr;
266 #ifdef __HAVE_PCI_MSI_MSIX
267 }
268 else {
269 if (qid == NVME_ADMIN_Q) {
270 snprintf(intr_xname, sizeof(intr_xname), "%s adminq",
271 device_xname(sc->sc_dev));
272 } else {
273 snprintf(intr_xname, sizeof(intr_xname), "%s ioq%d",
274 device_xname(sc->sc_dev), qid);
275 }
276 ih_arg = q;
277 if (pci_intr_type(psc->psc_pc, psc->psc_intrs[qid])
278 == PCI_INTR_TYPE_MSIX)
279 ih_func = nvme_mq_msix_intr;
280 else
281 ih_func = nvme_mq_msi_intr;
282 }
283 #endif /* __HAVE_PCI_MSI_MSIX */
284 sc->sc_ih[qid] = pci_intr_establish_xname(psc->psc_pc,
285 psc->psc_intrs[qid], IPL_BIO, ih_func, ih_arg, intr_xname);
286 if (sc->sc_ih[qid] == NULL) {
287 aprint_error_dev(sc->sc_dev,
288 "unable to establish %s interrupt\n", intr_xname);
289 return 1;
290 }
291 intrstr = pci_intr_string(psc->psc_pc, psc->psc_intrs[qid], intrbuf,
292 sizeof(intrbuf));
293 if (!sc->sc_use_mq) {
294 aprint_normal_dev(sc->sc_dev, "interrupting at %s\n", intrstr);
295 }
296 #ifdef __HAVE_PCI_MSI_MSIX
297 else if (qid == NVME_ADMIN_Q) {
298 aprint_normal_dev(sc->sc_dev,
299 "for admin queue interrupting at %s\n", intrstr);
300 } else if (!nvme_pci_mpsafe) {
301 aprint_normal_dev(sc->sc_dev,
302 "for io queue %d interrupting at %s\n", qid, intrstr);
303 } else {
304 kcpuset_t *affinity;
305 cpuid_t affinity_to;
306
307 kcpuset_create(&affinity, true);
308 affinity_to = (qid - 1) % ncpu;
309 kcpuset_set(affinity, affinity_to);
310 error = interrupt_distribute(sc->sc_ih[qid], affinity, NULL);
311 kcpuset_destroy(affinity);
312 aprint_normal_dev(sc->sc_dev,
313 "for io queue %d interrupting at %s", qid, intrstr);
314 if (error == 0)
315 aprint_normal(" affinity to cpu%lu", affinity_to);
316 aprint_normal("\n");
317 }
318 #endif
319 return 0;
320 }
321
322 static int
323 nvme_pci_intr_disestablish(struct nvme_softc *sc, uint16_t qid)
324 {
325 struct nvme_pci_softc *psc = (struct nvme_pci_softc *)sc;
326
327 if (!sc->sc_use_mq && qid > 0)
328 return 0;
329
330 KASSERT(sc->sc_ih[qid] != NULL);
331
332 pci_intr_disestablish(psc->psc_pc, sc->sc_ih[qid]);
333 sc->sc_ih[qid] = NULL;
334
335 return 0;
336 }
337
338 static int
339 nvme_pci_setup_intr(struct pci_attach_args *pa, struct nvme_pci_softc *psc)
340 {
341 struct nvme_softc *sc = &psc->psc_nvme;
342 #ifdef __HAVE_PCI_MSI_MSIX
343 int error;
344 int counts[PCI_INTR_TYPE_SIZE], alloced_counts[PCI_INTR_TYPE_SIZE];
345 pci_intr_handle_t *ihps;
346 int max_type, intr_type;
347 #else
348 pci_intr_handle_t ih;
349 #endif /* __HAVE_PCI_MSI_MSIX */
350
351 #ifdef __HAVE_PCI_MSI_MSIX
352 if (nvme_pci_force_intx) {
353 max_type = PCI_INTR_TYPE_INTX;
354 goto force_intx;
355 }
356
357 /* MSI-X */
358 max_type = PCI_INTR_TYPE_MSIX;
359 counts[PCI_INTR_TYPE_MSIX] = min(pci_msix_count(pa->pa_pc, pa->pa_tag),
360 ncpu + 1);
361 if (counts[PCI_INTR_TYPE_MSIX] > 0) {
362 memset(alloced_counts, 0, sizeof(alloced_counts));
363 alloced_counts[PCI_INTR_TYPE_MSIX] = counts[PCI_INTR_TYPE_MSIX];
364 if (pci_intr_alloc(pa, &ihps, alloced_counts,
365 PCI_INTR_TYPE_MSIX)) {
366 counts[PCI_INTR_TYPE_MSIX] = 0;
367 } else {
368 counts[PCI_INTR_TYPE_MSIX] =
369 alloced_counts[PCI_INTR_TYPE_MSIX];
370 pci_intr_release(pa->pa_pc, ihps,
371 alloced_counts[PCI_INTR_TYPE_MSIX]);
372 }
373 }
374 if (counts[PCI_INTR_TYPE_MSIX] < 2) {
375 counts[PCI_INTR_TYPE_MSIX] = 0;
376 max_type = PCI_INTR_TYPE_MSI;
377 } else if (!nvme_pci_mq || !nvme_pci_mpsafe) {
378 counts[PCI_INTR_TYPE_MSIX] = 2; /* adminq + 1 ioq */
379 }
380
381 retry_msi:
382 /* MSI */
383 counts[PCI_INTR_TYPE_MSI] = pci_msi_count(pa->pa_pc, pa->pa_tag);
384 if (counts[PCI_INTR_TYPE_MSI] > 0) {
385 while (counts[PCI_INTR_TYPE_MSI] > ncpu + 1) {
386 if (counts[PCI_INTR_TYPE_MSI] / 2 <= ncpu + 1)
387 break;
388 counts[PCI_INTR_TYPE_MSI] /= 2;
389 }
390 memset(alloced_counts, 0, sizeof(alloced_counts));
391 alloced_counts[PCI_INTR_TYPE_MSI] = counts[PCI_INTR_TYPE_MSI];
392 if (pci_intr_alloc(pa, &ihps, alloced_counts,
393 PCI_INTR_TYPE_MSI)) {
394 counts[PCI_INTR_TYPE_MSI] = 0;
395 } else {
396 counts[PCI_INTR_TYPE_MSI] =
397 alloced_counts[PCI_INTR_TYPE_MSI];
398 pci_intr_release(pa->pa_pc, ihps,
399 alloced_counts[PCI_INTR_TYPE_MSI]);
400 }
401 }
402 if (counts[PCI_INTR_TYPE_MSI] < 1) {
403 counts[PCI_INTR_TYPE_MSI] = 0;
404 if (max_type == PCI_INTR_TYPE_MSI)
405 max_type = PCI_INTR_TYPE_INTX;
406 } else if (!nvme_pci_mq || !nvme_pci_mpsafe) {
407 if (counts[PCI_INTR_TYPE_MSI] > 2)
408 counts[PCI_INTR_TYPE_MSI] = 2; /* adminq + 1 ioq */
409 }
410
411 force_intx:
412 /* INTx */
413 counts[PCI_INTR_TYPE_INTX] = 1;
414
415 memcpy(alloced_counts, counts, sizeof(counts));
416 error = pci_intr_alloc(pa, &ihps, alloced_counts, max_type);
417 if (error) {
418 if (max_type != PCI_INTR_TYPE_INTX) {
419 retry:
420 memset(counts, 0, sizeof(counts));
421 if (max_type == PCI_INTR_TYPE_MSIX) {
422 max_type = PCI_INTR_TYPE_MSI;
423 goto retry_msi;
424 } else {
425 max_type = PCI_INTR_TYPE_INTX;
426 goto force_intx;
427 }
428 }
429 return error;
430 }
431
432 intr_type = pci_intr_type(pa->pa_pc, ihps[0]);
433 if (alloced_counts[intr_type] < counts[intr_type]) {
434 if (intr_type != PCI_INTR_TYPE_INTX) {
435 pci_intr_release(pa->pa_pc, ihps,
436 alloced_counts[intr_type]);
437 max_type = intr_type;
438 goto retry;
439 }
440 return EBUSY;
441 }
442
443 psc->psc_intrs = ihps;
444 psc->psc_nintrs = alloced_counts[intr_type];
445 if (intr_type == PCI_INTR_TYPE_MSI) {
446 if (alloced_counts[intr_type] > ncpu + 1)
447 alloced_counts[intr_type] = ncpu + 1;
448 }
449 sc->sc_use_mq = alloced_counts[intr_type] > 1;
450 sc->sc_nq = sc->sc_use_mq ? alloced_counts[intr_type] - 1 : 1;
451
452 #else /* !__HAVE_PCI_MSI_MSIX */
453 if (pci_intr_map(pa, &ih)) {
454 aprint_error_dev(sc->sc_dev, "couldn't map interrupt\n");
455 return EBUSY;
456 }
457
458 psc->psc_intrs = kmem_zalloc(sizeof(ih), KM_SLEEP);
459 psc->psc_intrs[0] = ih;
460 psc->psc_nintrs = 1;
461 sc->sc_use_mq = 0;
462 sc->sc_nq = 1;
463 #endif /* __HAVE_PCI_MSI_MSIX */
464
465 return 0;
466 }
467
468 MODULE(MODULE_CLASS_DRIVER, nvme, "pci,dk_subr");
469
470 #ifdef _MODULE
471 #include "ioconf.c"
472 #endif
473
474 static int
475 nvme_modcmd(modcmd_t cmd, void *opaque)
476 {
477 #ifdef _MODULE
478 devmajor_t cmajor, bmajor;
479 extern const struct bdevsw ld_bdevsw;
480 extern const struct cdevsw ld_cdevsw;
481 extern const struct cdevsw nvme_cdevsw;
482 #endif
483 int error = 0;
484
485 switch (cmd) {
486 case MODULE_CMD_INIT:
487 #ifdef _MODULE
488 /* devsw must be done before configuring the actual device,
489 * otherwise ldattach() fails
490 */
491 bmajor = cmajor = NODEVMAJOR;
492 error = devsw_attach(ld_cd.cd_name, &ld_bdevsw, &bmajor,
493 &ld_cdevsw, &cmajor);
494 if (error) {
495 aprint_error("%s: unable to register devsw\n",
496 ld_cd.cd_name);
497 return error;
498 }
499
500 error = config_init_component(cfdriver_ioconf_nvme_pci,
501 cfattach_ioconf_nvme_pci, cfdata_ioconf_nvme_pci);
502 if (error)
503 return error;
504
505 bmajor = cmajor = NODEVMAJOR;
506 error = devsw_attach(nvme_cd.cd_name, NULL, &bmajor,
507 &nvme_cdevsw, &cmajor);
508 if (error) {
509 aprint_error("%s: unable to register devsw\n",
510 nvme_cd.cd_name);
511 /* do not abort, just /dev/nvme* will not work */
512 }
513 #endif
514 return error;
515 case MODULE_CMD_FINI:
516 #ifdef _MODULE
517 devsw_detach(NULL, &nvme_cdevsw);
518
519 error = config_fini_component(cfdriver_ioconf_nvme_pci,
520 cfattach_ioconf_nvme_pci, cfdata_ioconf_nvme_pci);
521 if (error)
522 return error;
523
524 devsw_detach(&ld_bdevsw, &ld_cdevsw);
525 #endif
526 return error;
527 default:
528 return ENOTTY;
529 }
530 }
531