nvme_pci.c revision 1.2.2.4 1 /* $NetBSD: nvme_pci.c,v 1.2.2.4 2016/10/05 20:55:43 skrll Exp $ */
2 /* $OpenBSD: nvme_pci.c,v 1.3 2016/04/14 11:18:32 dlg Exp $ */
3
4 /*
5 * Copyright (c) 2014 David Gwynne <dlg (at) openbsd.org>
6 *
7 * Permission to use, copy, modify, and distribute this software for any
8 * purpose with or without fee is hereby granted, provided that the above
9 * copyright notice and this permission notice appear in all copies.
10 *
11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18 */
19
20 /*-
21 * Copyright (C) 2016 NONAKA Kimihiro <nonaka (at) netbsd.org>
22 * All rights reserved.
23 *
24 * Redistribution and use in source and binary forms, with or without
25 * modification, are permitted provided that the following conditions
26 * are met:
27 * 1. Redistributions of source code must retain the above copyright
28 * notice, this list of conditions and the following disclaimer.
29 * 2. Redistributions in binary form must reproduce the above copyright
30 * notice, this list of conditions and the following disclaimer in the
31 * documentation and/or other materials provided with the distribution.
32 *
33 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
34 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
35 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
36 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
37 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
38 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
39 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
40 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
41 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
42 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43 */
44
45 #include <sys/cdefs.h>
46 __KERNEL_RCSID(0, "$NetBSD: nvme_pci.c,v 1.2.2.4 2016/10/05 20:55:43 skrll Exp $");
47
48 #include <sys/param.h>
49 #include <sys/systm.h>
50 #include <sys/kernel.h>
51 #include <sys/device.h>
52 #include <sys/bitops.h>
53 #include <sys/bus.h>
54 #include <sys/cpu.h>
55 #include <sys/interrupt.h>
56 #include <sys/kmem.h>
57 #include <sys/pmf.h>
58 #include <sys/module.h>
59
60 #include <dev/pci/pcireg.h>
61 #include <dev/pci/pcivar.h>
62
63 #include <dev/ic/nvmereg.h>
64 #include <dev/ic/nvmevar.h>
65
66 int nvme_pci_force_intx = 0;
67 int nvme_pci_mpsafe = 1;
68 int nvme_pci_mq = 1; /* INTx: ioq=1, MSI/MSI-X: ioq=ncpu */
69
70 #define NVME_PCI_BAR 0x10
71
72 #ifndef __HAVE_PCI_MSI_MSIX
73 #define pci_intr_release(pc, intrs, nintrs) \
74 kmem_free(intrs, sizeof(*intrs) * nintrs)
75 #define pci_intr_establish_xname(pc, ih, level, intrhand, intrarg, xname) \
76 pci_intr_establish(pc, ih, level, intrhand, intrarg)
77 #endif
78
79 struct nvme_pci_softc {
80 struct nvme_softc psc_nvme;
81
82 pci_chipset_tag_t psc_pc;
83 pci_intr_handle_t *psc_intrs;
84 int psc_nintrs;
85 };
86
87 static int nvme_pci_match(device_t, cfdata_t, void *);
88 static void nvme_pci_attach(device_t, device_t, void *);
89 static int nvme_pci_detach(device_t, int);
90 static int nvme_pci_rescan(device_t, const char *, const int *);
91
92 CFATTACH_DECL3_NEW(nvme_pci, sizeof(struct nvme_pci_softc),
93 nvme_pci_match, nvme_pci_attach, nvme_pci_detach, NULL, nvme_pci_rescan,
94 nvme_childdet, DVF_DETACH_SHUTDOWN);
95
96 static int nvme_pci_intr_establish(struct nvme_softc *,
97 uint16_t, struct nvme_queue *);
98 static int nvme_pci_intr_disestablish(struct nvme_softc *, uint16_t);
99 static int nvme_pci_setup_intr(struct pci_attach_args *,
100 struct nvme_pci_softc *);
101
102 static int
103 nvme_pci_match(device_t parent, cfdata_t match, void *aux)
104 {
105 struct pci_attach_args *pa = aux;
106
107 if (PCI_CLASS(pa->pa_class) == PCI_CLASS_MASS_STORAGE &&
108 PCI_SUBCLASS(pa->pa_class) == PCI_SUBCLASS_MASS_STORAGE_NVM &&
109 PCI_INTERFACE(pa->pa_class) == PCI_INTERFACE_NVM_NVME)
110 return 1;
111
112 return 0;
113 }
114
115 static void
116 nvme_pci_attach(device_t parent, device_t self, void *aux)
117 {
118 struct nvme_pci_softc *psc = device_private(self);
119 struct nvme_softc *sc = &psc->psc_nvme;
120 struct pci_attach_args *pa = aux;
121 pcireg_t memtype, reg;
122 bus_addr_t memaddr;
123 int flags, error;
124 #ifdef __HAVE_PCI_MSI_MSIX
125 int msixoff;
126 #endif
127
128 sc->sc_dev = self;
129 psc->psc_pc = pa->pa_pc;
130 if (pci_dma64_available(pa))
131 sc->sc_dmat = pa->pa_dmat64;
132 else
133 sc->sc_dmat = pa->pa_dmat;
134
135 pci_aprint_devinfo(pa, NULL);
136
137 reg = pci_conf_read(pa->pa_pc, pa->pa_tag, PCI_COMMAND_STATUS_REG);
138 if ((reg & PCI_COMMAND_MASTER_ENABLE) == 0) {
139 reg |= PCI_COMMAND_MASTER_ENABLE;
140 pci_conf_write(pa->pa_pc, pa->pa_tag, PCI_COMMAND_STATUS_REG, reg);
141 }
142
143 /* Map registers */
144 memtype = pci_mapreg_type(pa->pa_pc, pa->pa_tag, NVME_PCI_BAR);
145 if (PCI_MAPREG_TYPE(memtype) != PCI_MAPREG_TYPE_MEM) {
146 aprint_error_dev(self, "invalid type (type=0x%x)\n", memtype);
147 return;
148 }
149 sc->sc_iot = pa->pa_memt;
150 error = pci_mapreg_info(pa->pa_pc, pa->pa_tag, PCI_MAPREG_START,
151 memtype, &memaddr, &sc->sc_ios, &flags);
152 if (error) {
153 aprint_error_dev(self, "can't get map info\n");
154 return;
155 }
156
157 #ifdef __HAVE_PCI_MSI_MSIX
158 if (pci_get_capability(pa->pa_pc, pa->pa_tag, PCI_CAP_MSIX, &msixoff,
159 NULL)) {
160 pcireg_t msixtbl;
161 uint32_t table_offset;
162 int bir;
163
164 msixtbl = pci_conf_read(pa->pa_pc, pa->pa_tag,
165 msixoff + PCI_MSIX_TBLOFFSET);
166 table_offset = msixtbl & PCI_MSIX_TBLOFFSET_MASK;
167 bir = msixtbl & PCI_MSIX_PBABIR_MASK;
168 if (bir == 0) {
169 sc->sc_ios = table_offset;
170 }
171 }
172 #endif /* __HAVE_PCI_MSI_MSIX */
173
174 error = bus_space_map(sc->sc_iot, memaddr, sc->sc_ios, flags,
175 &sc->sc_ioh);
176 if (error != 0) {
177 aprint_error_dev(self, "can't map mem space (error=%d)\n",
178 error);
179 return;
180 }
181
182 /* Establish interrupts */
183 if (nvme_pci_setup_intr(pa, psc) != 0) {
184 aprint_error_dev(self, "unable to allocate interrupt\n");
185 goto unmap;
186 }
187 sc->sc_intr_establish = nvme_pci_intr_establish;
188 sc->sc_intr_disestablish = nvme_pci_intr_disestablish;
189
190 sc->sc_ih = kmem_zalloc(sizeof(*sc->sc_ih) * psc->psc_nintrs, KM_SLEEP);
191 if (sc->sc_ih == NULL) {
192 aprint_error_dev(self, "unable to allocate ih memory\n");
193 goto intr_release;
194 }
195
196 if (sc->sc_use_mq) {
197 sc->sc_softih = kmem_zalloc(
198 sizeof(*sc->sc_softih) * psc->psc_nintrs, KM_SLEEP);
199 if (sc->sc_softih == NULL) {
200 aprint_error_dev(self,
201 "unable to allocate softih memory\n");
202 goto intr_free;
203 }
204 }
205
206 if (nvme_attach(sc) != 0) {
207 /* error printed by nvme_attach() */
208 goto softintr_free;
209 }
210
211 if (!pmf_device_register(self, NULL, NULL))
212 aprint_error_dev(self, "couldn't establish power handler\n");
213
214 SET(sc->sc_flags, NVME_F_ATTACHED);
215 return;
216
217 softintr_free:
218 if (sc->sc_softih) {
219 kmem_free(sc->sc_softih,
220 sizeof(*sc->sc_softih) * psc->psc_nintrs);
221 }
222 intr_free:
223 kmem_free(sc->sc_ih, sizeof(*sc->sc_ih) * psc->psc_nintrs);
224 sc->sc_nq = 0;
225 intr_release:
226 pci_intr_release(pa->pa_pc, psc->psc_intrs, psc->psc_nintrs);
227 psc->psc_nintrs = 0;
228 unmap:
229 bus_space_unmap(sc->sc_iot, sc->sc_ioh, sc->sc_ios);
230 sc->sc_ios = 0;
231 }
232
233 static int
234 nvme_pci_rescan(device_t self, const char *attr, const int *flags)
235 {
236
237 return nvme_rescan(self, attr, flags);
238 }
239
240 static int
241 nvme_pci_detach(device_t self, int flags)
242 {
243 struct nvme_pci_softc *psc = device_private(self);
244 struct nvme_softc *sc = &psc->psc_nvme;
245 int error;
246
247 if (!ISSET(sc->sc_flags, NVME_F_ATTACHED))
248 return 0;
249
250 error = nvme_detach(sc, flags);
251 if (error)
252 return error;
253
254 if (sc->sc_softih) {
255 kmem_free(sc->sc_softih,
256 sizeof(*sc->sc_softih) * psc->psc_nintrs);
257 sc->sc_softih = NULL;
258 }
259 kmem_free(sc->sc_ih, sizeof(*sc->sc_ih) * psc->psc_nintrs);
260 pci_intr_release(psc->psc_pc, psc->psc_intrs, psc->psc_nintrs);
261 bus_space_unmap(sc->sc_iot, sc->sc_ioh, sc->sc_ios);
262 return 0;
263 }
264
265 static int
266 nvme_pci_intr_establish(struct nvme_softc *sc, uint16_t qid,
267 struct nvme_queue *q)
268 {
269 struct nvme_pci_softc *psc = (struct nvme_pci_softc *)sc;
270 char intr_xname[INTRDEVNAMEBUF];
271 char intrbuf[PCI_INTRSTR_LEN];
272 const char *intrstr = NULL;
273 int (*ih_func)(void *);
274 void *ih_arg;
275 #ifdef __HAVE_PCI_MSI_MSIX
276 int error;
277 #endif
278
279 KASSERT(sc->sc_use_mq || qid == NVME_ADMIN_Q);
280 KASSERT(sc->sc_ih[qid] == NULL);
281
282 if (nvme_pci_mpsafe) {
283 pci_intr_setattr(psc->psc_pc, &psc->psc_intrs[qid],
284 PCI_INTR_MPSAFE, true);
285 }
286
287 #ifdef __HAVE_PCI_MSI_MSIX
288 if (!sc->sc_use_mq) {
289 #endif
290 snprintf(intr_xname, sizeof(intr_xname), "%s",
291 device_xname(sc->sc_dev));
292 ih_arg = sc;
293 ih_func = nvme_intr;
294 #ifdef __HAVE_PCI_MSI_MSIX
295 }
296 else {
297 if (qid == NVME_ADMIN_Q) {
298 snprintf(intr_xname, sizeof(intr_xname), "%s adminq",
299 device_xname(sc->sc_dev));
300 } else {
301 snprintf(intr_xname, sizeof(intr_xname), "%s ioq%d",
302 device_xname(sc->sc_dev), qid);
303 }
304 ih_arg = q;
305 ih_func = nvme_intr_msi;
306 }
307 #endif /* __HAVE_PCI_MSI_MSIX */
308
309 /* establish hardware interrupt */
310 sc->sc_ih[qid] = pci_intr_establish_xname(psc->psc_pc,
311 psc->psc_intrs[qid], IPL_BIO, ih_func, ih_arg, intr_xname);
312 if (sc->sc_ih[qid] == NULL) {
313 aprint_error_dev(sc->sc_dev,
314 "unable to establish %s interrupt\n", intr_xname);
315 return 1;
316 }
317
318 /* if MSI, establish also the software interrupt */
319 if (sc->sc_softih) {
320 sc->sc_softih[qid] = softint_establish(
321 SOFTINT_BIO|(nvme_pci_mpsafe ? SOFTINT_MPSAFE : 0),
322 nvme_softintr_msi, q);
323 if (sc->sc_softih[qid] == NULL) {
324 pci_intr_disestablish(psc->psc_pc, sc->sc_ih[qid]);
325 sc->sc_ih[qid] = NULL;
326
327 aprint_error_dev(sc->sc_dev,
328 "unable to establish %s soft interrupt\n",
329 intr_xname);
330 return 1;
331 }
332 }
333
334 intrstr = pci_intr_string(psc->psc_pc, psc->psc_intrs[qid], intrbuf,
335 sizeof(intrbuf));
336 if (!sc->sc_use_mq) {
337 aprint_normal_dev(sc->sc_dev, "interrupting at %s\n", intrstr);
338 }
339 #ifdef __HAVE_PCI_MSI_MSIX
340 else if (qid == NVME_ADMIN_Q) {
341 aprint_normal_dev(sc->sc_dev,
342 "for admin queue interrupting at %s\n", intrstr);
343 } else if (!nvme_pci_mpsafe) {
344 aprint_normal_dev(sc->sc_dev,
345 "for io queue %d interrupting at %s\n", qid, intrstr);
346 } else {
347 kcpuset_t *affinity;
348 cpuid_t affinity_to;
349
350 kcpuset_create(&affinity, true);
351 affinity_to = (qid - 1) % ncpu;
352 kcpuset_set(affinity, affinity_to);
353 error = interrupt_distribute(sc->sc_ih[qid], affinity, NULL);
354 kcpuset_destroy(affinity);
355 aprint_normal_dev(sc->sc_dev,
356 "for io queue %d interrupting at %s", qid, intrstr);
357 if (error == 0)
358 aprint_normal(" affinity to cpu%lu", affinity_to);
359 aprint_normal("\n");
360 }
361 #endif
362 return 0;
363 }
364
365 static int
366 nvme_pci_intr_disestablish(struct nvme_softc *sc, uint16_t qid)
367 {
368 struct nvme_pci_softc *psc = (struct nvme_pci_softc *)sc;
369
370 KASSERT(sc->sc_use_mq || qid == NVME_ADMIN_Q);
371 KASSERT(sc->sc_ih[qid] != NULL);
372
373 if (sc->sc_softih) {
374 softint_disestablish(sc->sc_softih[qid]);
375 sc->sc_softih[qid] = NULL;
376 }
377
378 pci_intr_disestablish(psc->psc_pc, sc->sc_ih[qid]);
379 sc->sc_ih[qid] = NULL;
380
381 return 0;
382 }
383
384 static int
385 nvme_pci_setup_intr(struct pci_attach_args *pa, struct nvme_pci_softc *psc)
386 {
387 struct nvme_softc *sc = &psc->psc_nvme;
388 #ifdef __HAVE_PCI_MSI_MSIX
389 int error;
390 int counts[PCI_INTR_TYPE_SIZE], alloced_counts[PCI_INTR_TYPE_SIZE];
391 pci_intr_handle_t *ihps;
392 int max_type, intr_type;
393 #else
394 pci_intr_handle_t ih;
395 #endif /* __HAVE_PCI_MSI_MSIX */
396
397 #ifdef __HAVE_PCI_MSI_MSIX
398 if (nvme_pci_force_intx) {
399 max_type = PCI_INTR_TYPE_INTX;
400 goto force_intx;
401 }
402
403 /* MSI-X */
404 max_type = PCI_INTR_TYPE_MSIX;
405 counts[PCI_INTR_TYPE_MSIX] = min(pci_msix_count(pa->pa_pc, pa->pa_tag),
406 ncpu + 1);
407 if (counts[PCI_INTR_TYPE_MSIX] > 0) {
408 memset(alloced_counts, 0, sizeof(alloced_counts));
409 alloced_counts[PCI_INTR_TYPE_MSIX] = counts[PCI_INTR_TYPE_MSIX];
410 if (pci_intr_alloc(pa, &ihps, alloced_counts,
411 PCI_INTR_TYPE_MSIX)) {
412 counts[PCI_INTR_TYPE_MSIX] = 0;
413 } else {
414 counts[PCI_INTR_TYPE_MSIX] =
415 alloced_counts[PCI_INTR_TYPE_MSIX];
416 pci_intr_release(pa->pa_pc, ihps,
417 alloced_counts[PCI_INTR_TYPE_MSIX]);
418 }
419 }
420 if (counts[PCI_INTR_TYPE_MSIX] < 2) {
421 counts[PCI_INTR_TYPE_MSIX] = 0;
422 max_type = PCI_INTR_TYPE_MSI;
423 } else if (!nvme_pci_mq || !nvme_pci_mpsafe) {
424 counts[PCI_INTR_TYPE_MSIX] = 2; /* adminq + 1 ioq */
425 }
426
427 retry_msi:
428 /* MSI */
429 counts[PCI_INTR_TYPE_MSI] = pci_msi_count(pa->pa_pc, pa->pa_tag);
430 if (counts[PCI_INTR_TYPE_MSI] > 0) {
431 while (counts[PCI_INTR_TYPE_MSI] > ncpu + 1) {
432 if (counts[PCI_INTR_TYPE_MSI] / 2 <= ncpu + 1)
433 break;
434 counts[PCI_INTR_TYPE_MSI] /= 2;
435 }
436 memset(alloced_counts, 0, sizeof(alloced_counts));
437 alloced_counts[PCI_INTR_TYPE_MSI] = counts[PCI_INTR_TYPE_MSI];
438 if (pci_intr_alloc(pa, &ihps, alloced_counts,
439 PCI_INTR_TYPE_MSI)) {
440 counts[PCI_INTR_TYPE_MSI] = 0;
441 } else {
442 counts[PCI_INTR_TYPE_MSI] =
443 alloced_counts[PCI_INTR_TYPE_MSI];
444 pci_intr_release(pa->pa_pc, ihps,
445 alloced_counts[PCI_INTR_TYPE_MSI]);
446 }
447 }
448 if (counts[PCI_INTR_TYPE_MSI] < 1) {
449 counts[PCI_INTR_TYPE_MSI] = 0;
450 if (max_type == PCI_INTR_TYPE_MSI)
451 max_type = PCI_INTR_TYPE_INTX;
452 } else if (!nvme_pci_mq || !nvme_pci_mpsafe) {
453 if (counts[PCI_INTR_TYPE_MSI] > 2)
454 counts[PCI_INTR_TYPE_MSI] = 2; /* adminq + 1 ioq */
455 }
456
457 force_intx:
458 /* INTx */
459 counts[PCI_INTR_TYPE_INTX] = 1;
460
461 memcpy(alloced_counts, counts, sizeof(counts));
462 error = pci_intr_alloc(pa, &ihps, alloced_counts, max_type);
463 if (error) {
464 if (max_type != PCI_INTR_TYPE_INTX) {
465 retry:
466 memset(counts, 0, sizeof(counts));
467 if (max_type == PCI_INTR_TYPE_MSIX) {
468 max_type = PCI_INTR_TYPE_MSI;
469 goto retry_msi;
470 } else {
471 max_type = PCI_INTR_TYPE_INTX;
472 goto force_intx;
473 }
474 }
475 return error;
476 }
477
478 intr_type = pci_intr_type(pa->pa_pc, ihps[0]);
479 if (alloced_counts[intr_type] < counts[intr_type]) {
480 if (intr_type != PCI_INTR_TYPE_INTX) {
481 pci_intr_release(pa->pa_pc, ihps,
482 alloced_counts[intr_type]);
483 max_type = intr_type;
484 goto retry;
485 }
486 return EBUSY;
487 }
488
489 psc->psc_intrs = ihps;
490 psc->psc_nintrs = alloced_counts[intr_type];
491 if (intr_type == PCI_INTR_TYPE_MSI) {
492 if (alloced_counts[intr_type] > ncpu + 1)
493 alloced_counts[intr_type] = ncpu + 1;
494 }
495 sc->sc_use_mq = alloced_counts[intr_type] > 1;
496 sc->sc_nq = sc->sc_use_mq ? alloced_counts[intr_type] - 1 : 1;
497
498 #else /* !__HAVE_PCI_MSI_MSIX */
499 if (pci_intr_map(pa, &ih)) {
500 aprint_error_dev(sc->sc_dev, "couldn't map interrupt\n");
501 return EBUSY;
502 }
503
504 psc->psc_intrs = kmem_zalloc(sizeof(ih), KM_SLEEP);
505 psc->psc_intrs[0] = ih;
506 psc->psc_nintrs = 1;
507 sc->sc_use_mq = 0;
508 sc->sc_nq = 1;
509 #endif /* __HAVE_PCI_MSI_MSIX */
510
511 return 0;
512 }
513
514 MODULE(MODULE_CLASS_DRIVER, nvme, "pci,dk_subr");
515
516 #ifdef _MODULE
517 #include "ioconf.c"
518 #endif
519
520 static int
521 nvme_modcmd(modcmd_t cmd, void *opaque)
522 {
523 #ifdef _MODULE
524 devmajor_t cmajor, bmajor;
525 extern const struct cdevsw nvme_cdevsw;
526 #endif
527 int error = 0;
528
529 #ifdef _MODULE
530 switch (cmd) {
531 case MODULE_CMD_INIT:
532 error = config_init_component(cfdriver_ioconf_nvme_pci,
533 cfattach_ioconf_nvme_pci, cfdata_ioconf_nvme_pci);
534 if (error)
535 break;
536
537 bmajor = cmajor = NODEVMAJOR;
538 error = devsw_attach(nvme_cd.cd_name, NULL, &bmajor,
539 &nvme_cdevsw, &cmajor);
540 if (error) {
541 aprint_error("%s: unable to register devsw\n",
542 nvme_cd.cd_name);
543 /* do not abort, just /dev/nvme* will not work */
544 }
545 break;
546 case MODULE_CMD_FINI:
547 devsw_detach(NULL, &nvme_cdevsw);
548
549 error = config_fini_component(cfdriver_ioconf_nvme_pci,
550 cfattach_ioconf_nvme_pci, cfdata_ioconf_nvme_pci);
551 break;
552 default:
553 break;
554 }
555 #endif
556 return error;
557 }
558