nvme_pci.c revision 1.10 1 /* $NetBSD: nvme_pci.c,v 1.10 2016/09/17 12:58:51 jdolecek Exp $ */
2 /* $OpenBSD: nvme_pci.c,v 1.3 2016/04/14 11:18:32 dlg Exp $ */
3
4 /*
5 * Copyright (c) 2014 David Gwynne <dlg (at) openbsd.org>
6 *
7 * Permission to use, copy, modify, and distribute this software for any
8 * purpose with or without fee is hereby granted, provided that the above
9 * copyright notice and this permission notice appear in all copies.
10 *
11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18 */
19
20 /*-
21 * Copyright (C) 2016 NONAKA Kimihiro <nonaka (at) netbsd.org>
22 * All rights reserved.
23 *
24 * Redistribution and use in source and binary forms, with or without
25 * modification, are permitted provided that the following conditions
26 * are met:
27 * 1. Redistributions of source code must retain the above copyright
28 * notice, this list of conditions and the following disclaimer.
29 * 2. Redistributions in binary form must reproduce the above copyright
30 * notice, this list of conditions and the following disclaimer in the
31 * documentation and/or other materials provided with the distribution.
32 *
33 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
34 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
35 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
36 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
37 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
38 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
39 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
40 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
41 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
42 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43 */
44
45 #include <sys/cdefs.h>
46 __KERNEL_RCSID(0, "$NetBSD: nvme_pci.c,v 1.10 2016/09/17 12:58:51 jdolecek Exp $");
47
48 #include <sys/param.h>
49 #include <sys/systm.h>
50 #include <sys/kernel.h>
51 #include <sys/device.h>
52 #include <sys/bitops.h>
53 #include <sys/bus.h>
54 #include <sys/cpu.h>
55 #include <sys/interrupt.h>
56 #include <sys/kmem.h>
57 #include <sys/pmf.h>
58 #include <sys/module.h>
59
60 #include <dev/pci/pcireg.h>
61 #include <dev/pci/pcivar.h>
62
63 #include <dev/ic/nvmereg.h>
64 #include <dev/ic/nvmevar.h>
65
66 int nvme_pci_force_intx = 0;
67 int nvme_pci_mpsafe = 0;
68 int nvme_pci_mq = 1; /* INTx: ioq=1, MSI/MSI-X: ioq=ncpu */
69
70 #define NVME_PCI_BAR 0x10
71
72 struct nvme_pci_softc {
73 struct nvme_softc psc_nvme;
74
75 pci_chipset_tag_t psc_pc;
76 pci_intr_handle_t *psc_intrs;
77 int psc_nintrs;
78 };
79
80 static int nvme_pci_match(device_t, cfdata_t, void *);
81 static void nvme_pci_attach(device_t, device_t, void *);
82 static int nvme_pci_detach(device_t, int);
83
84 CFATTACH_DECL3_NEW(nvme_pci, sizeof(struct nvme_pci_softc),
85 nvme_pci_match, nvme_pci_attach, nvme_pci_detach, NULL, NULL,
86 nvme_childdet, DVF_DETACH_SHUTDOWN);
87
88 static int nvme_pci_intr_establish(struct nvme_softc *,
89 uint16_t, struct nvme_queue *);
90 static int nvme_pci_intr_disestablish(struct nvme_softc *, uint16_t);
91 static int nvme_pci_setup_intr(struct pci_attach_args *,
92 struct nvme_pci_softc *);
93
94 static int
95 nvme_pci_match(device_t parent, cfdata_t match, void *aux)
96 {
97 struct pci_attach_args *pa = aux;
98
99 if (PCI_CLASS(pa->pa_class) == PCI_CLASS_MASS_STORAGE &&
100 PCI_SUBCLASS(pa->pa_class) == PCI_SUBCLASS_MASS_STORAGE_NVM &&
101 PCI_INTERFACE(pa->pa_class) == PCI_INTERFACE_NVM_NVME)
102 return 1;
103
104 return 0;
105 }
106
107 static void
108 nvme_pci_attach(device_t parent, device_t self, void *aux)
109 {
110 struct nvme_pci_softc *psc = device_private(self);
111 struct nvme_softc *sc = &psc->psc_nvme;
112 struct pci_attach_args *pa = aux;
113 pcireg_t memtype, reg;
114 bus_addr_t memaddr;
115 int flags, msixoff;
116 int error;
117
118 sc->sc_dev = self;
119 psc->psc_pc = pa->pa_pc;
120 if (pci_dma64_available(pa))
121 sc->sc_dmat = pa->pa_dmat64;
122 else
123 sc->sc_dmat = pa->pa_dmat;
124
125 pci_aprint_devinfo(pa, NULL);
126
127 reg = pci_conf_read(pa->pa_pc, pa->pa_tag, PCI_COMMAND_STATUS_REG);
128 if ((reg & PCI_COMMAND_MASTER_ENABLE) == 0) {
129 reg |= PCI_COMMAND_MASTER_ENABLE;
130 pci_conf_write(pa->pa_pc, pa->pa_tag, PCI_COMMAND_STATUS_REG, reg);
131 }
132
133 /* Map registers */
134 memtype = pci_mapreg_type(pa->pa_pc, pa->pa_tag, NVME_PCI_BAR);
135 if (PCI_MAPREG_TYPE(memtype) != PCI_MAPREG_TYPE_MEM) {
136 aprint_error_dev(self, "invalid type (type=0x%x)\n", memtype);
137 return;
138 }
139 sc->sc_iot = pa->pa_memt;
140 error = pci_mapreg_info(pa->pa_pc, pa->pa_tag, PCI_MAPREG_START,
141 memtype, &memaddr, &sc->sc_ios, &flags);
142 if (error) {
143 aprint_error_dev(self, "can't get map info\n");
144 return;
145 }
146 if (pci_get_capability(pa->pa_pc, pa->pa_tag, PCI_CAP_MSIX, &msixoff,
147 NULL)) {
148 pcireg_t msixtbl;
149 uint32_t table_offset;
150 int bir;
151
152 msixtbl = pci_conf_read(pa->pa_pc, pa->pa_tag,
153 msixoff + PCI_MSIX_TBLOFFSET);
154 table_offset = msixtbl & PCI_MSIX_TBLOFFSET_MASK;
155 bir = msixtbl & PCI_MSIX_PBABIR_MASK;
156 if (bir == 0) {
157 sc->sc_ios = table_offset;
158 }
159 }
160 error = bus_space_map(sc->sc_iot, memaddr, sc->sc_ios, flags,
161 &sc->sc_ioh);
162 if (error != 0) {
163 aprint_error_dev(self, "can't map mem space (error=%d)\n",
164 error);
165 return;
166 }
167
168 /* Establish interrupts */
169 if (nvme_pci_setup_intr(pa, psc) != 0) {
170 aprint_error_dev(self, "unable to allocate interrupt\n");
171 goto unmap;
172 }
173 sc->sc_intr_establish = nvme_pci_intr_establish;
174 sc->sc_intr_disestablish = nvme_pci_intr_disestablish;
175
176 sc->sc_ih = kmem_zalloc(sizeof(*sc->sc_ih) * psc->psc_nintrs, KM_SLEEP);
177 if (sc->sc_ih == NULL) {
178 aprint_error_dev(self, "unable to allocate ih memory\n");
179 goto intr_release;
180 }
181
182 if (nvme_attach(sc) != 0) {
183 /* error printed by nvme_attach() */
184 goto intr_free;
185 }
186
187 if (!pmf_device_register(self, NULL, NULL))
188 aprint_error_dev(self, "couldn't establish power handler\n");
189
190 SET(sc->sc_flags, NVME_F_ATTACHED);
191 return;
192
193 intr_free:
194 kmem_free(sc->sc_ih, sizeof(*sc->sc_ih) * psc->psc_nintrs);
195 sc->sc_nq = 0;
196 intr_release:
197 pci_intr_release(pa->pa_pc, psc->psc_intrs, psc->psc_nintrs);
198 psc->psc_nintrs = 0;
199 unmap:
200 bus_space_unmap(sc->sc_iot, sc->sc_ioh, sc->sc_ios);
201 sc->sc_ios = 0;
202 }
203
204 static int
205 nvme_pci_detach(device_t self, int flags)
206 {
207 struct nvme_pci_softc *psc = device_private(self);
208 struct nvme_softc *sc = &psc->psc_nvme;
209 int error;
210
211 if (!ISSET(sc->sc_flags, NVME_F_ATTACHED))
212 return 0;
213
214 error = nvme_detach(sc, flags);
215 if (error)
216 return error;
217
218 kmem_free(sc->sc_ih, sizeof(*sc->sc_ih) * psc->psc_nintrs);
219 pci_intr_release(psc->psc_pc, psc->psc_intrs, psc->psc_nintrs);
220 bus_space_unmap(sc->sc_iot, sc->sc_ioh, sc->sc_ios);
221 return 0;
222 }
223
224 static int
225 nvme_pci_intr_establish(struct nvme_softc *sc, uint16_t qid,
226 struct nvme_queue *q)
227 {
228 struct nvme_pci_softc *psc = (struct nvme_pci_softc *)sc;
229 char intr_xname[INTRDEVNAMEBUF];
230 char intrbuf[PCI_INTRSTR_LEN];
231 const char *intrstr = NULL;
232 int (*ih_func)(void *);
233 void *ih_arg;
234 kcpuset_t *affinity;
235 cpuid_t affinity_to;
236 int error;
237
238 if (!sc->sc_use_mq && qid > 0)
239 return 0;
240
241 KASSERT(sc->sc_ih[qid] == NULL);
242
243 if (nvme_pci_mpsafe) {
244 pci_intr_setattr(psc->psc_pc, &psc->psc_intrs[qid],
245 PCI_INTR_MPSAFE, true);
246 }
247 if (!sc->sc_use_mq) {
248 snprintf(intr_xname, sizeof(intr_xname), "%s",
249 device_xname(sc->sc_dev));
250 ih_arg = sc;
251 ih_func = nvme_intr;
252 } else {
253 if (qid == 0) {
254 snprintf(intr_xname, sizeof(intr_xname), "%s adminq",
255 device_xname(sc->sc_dev));
256 } else {
257 snprintf(intr_xname, sizeof(intr_xname), "%s ioq%d",
258 device_xname(sc->sc_dev), qid);
259 }
260 ih_arg = q;
261 if (pci_intr_type(psc->psc_pc, psc->psc_intrs[qid])
262 == PCI_INTR_TYPE_MSIX)
263 ih_func = nvme_mq_msix_intr;
264 else
265 ih_func = nvme_mq_msi_intr;
266 }
267 sc->sc_ih[qid] = pci_intr_establish_xname(psc->psc_pc,
268 psc->psc_intrs[qid], IPL_BIO, ih_func, ih_arg, intr_xname);
269 if (sc->sc_ih[qid] == NULL) {
270 aprint_error_dev(sc->sc_dev,
271 "unable to establish %s interrupt\n", intr_xname);
272 return 1;
273 }
274 intrstr = pci_intr_string(psc->psc_pc, psc->psc_intrs[qid], intrbuf,
275 sizeof(intrbuf));
276 if (!sc->sc_use_mq) {
277 aprint_normal_dev(sc->sc_dev, "interrupting at %s\n", intrstr);
278 } else if (qid == NVME_ADMIN_Q) {
279 aprint_normal_dev(sc->sc_dev,
280 "for admin queue interrupting at %s\n", intrstr);
281 } else if (!nvme_pci_mpsafe) {
282 aprint_normal_dev(sc->sc_dev,
283 "for io queue %d interrupting at %s\n", qid, intrstr);
284 } else {
285 kcpuset_create(&affinity, true);
286 affinity_to = (qid - 1) % ncpu;
287 kcpuset_set(affinity, affinity_to);
288 error = interrupt_distribute(sc->sc_ih[qid], affinity, NULL);
289 kcpuset_destroy(affinity);
290 aprint_normal_dev(sc->sc_dev,
291 "for io queue %d interrupting at %s", qid, intrstr);
292 if (error == 0)
293 aprint_normal(" affinity to cpu%lu", affinity_to);
294 aprint_normal("\n");
295 }
296 return 0;
297 }
298
299 static int
300 nvme_pci_intr_disestablish(struct nvme_softc *sc, uint16_t qid)
301 {
302 struct nvme_pci_softc *psc = (struct nvme_pci_softc *)sc;
303
304 if (!sc->sc_use_mq && qid > 0)
305 return 0;
306
307 KASSERT(sc->sc_ih[qid] != NULL);
308
309 pci_intr_disestablish(psc->psc_pc, sc->sc_ih[qid]);
310 sc->sc_ih[qid] = NULL;
311
312 return 0;
313 }
314
315 static int
316 nvme_pci_setup_intr(struct pci_attach_args *pa, struct nvme_pci_softc *psc)
317 {
318 struct nvme_softc *sc = &psc->psc_nvme;
319 pci_intr_handle_t *ihps;
320 int counts[PCI_INTR_TYPE_SIZE], alloced_counts[PCI_INTR_TYPE_SIZE];
321 int max_type, intr_type;
322 int error;
323
324 if (nvme_pci_force_intx) {
325 max_type = PCI_INTR_TYPE_INTX;
326 goto force_intx;
327 }
328
329 /* MSI-X */
330 max_type = PCI_INTR_TYPE_MSIX;
331 counts[PCI_INTR_TYPE_MSIX] = min(pci_msix_count(pa->pa_pc, pa->pa_tag),
332 ncpu + 1);
333 if (counts[PCI_INTR_TYPE_MSIX] > 0) {
334 memset(alloced_counts, 0, sizeof(alloced_counts));
335 alloced_counts[PCI_INTR_TYPE_MSIX] = counts[PCI_INTR_TYPE_MSIX];
336 if (pci_intr_alloc(pa, &ihps, alloced_counts,
337 PCI_INTR_TYPE_MSIX)) {
338 counts[PCI_INTR_TYPE_MSIX] = 0;
339 } else {
340 counts[PCI_INTR_TYPE_MSIX] =
341 alloced_counts[PCI_INTR_TYPE_MSIX];
342 pci_intr_release(pa->pa_pc, ihps,
343 alloced_counts[PCI_INTR_TYPE_MSIX]);
344 }
345 }
346 if (counts[PCI_INTR_TYPE_MSIX] < 2) {
347 counts[PCI_INTR_TYPE_MSIX] = 0;
348 max_type = PCI_INTR_TYPE_MSI;
349 } else if (!nvme_pci_mq || !nvme_pci_mpsafe) {
350 counts[PCI_INTR_TYPE_MSIX] = 2; /* adminq + 1 ioq */
351 }
352
353 retry_msi:
354 /* MSI */
355 counts[PCI_INTR_TYPE_MSI] = pci_msi_count(pa->pa_pc, pa->pa_tag);
356 if (counts[PCI_INTR_TYPE_MSI] > 0) {
357 while (counts[PCI_INTR_TYPE_MSI] > ncpu + 1) {
358 if (counts[PCI_INTR_TYPE_MSI] / 2 <= ncpu + 1)
359 break;
360 counts[PCI_INTR_TYPE_MSI] /= 2;
361 }
362 memset(alloced_counts, 0, sizeof(alloced_counts));
363 alloced_counts[PCI_INTR_TYPE_MSI] = counts[PCI_INTR_TYPE_MSI];
364 if (pci_intr_alloc(pa, &ihps, alloced_counts,
365 PCI_INTR_TYPE_MSI)) {
366 counts[PCI_INTR_TYPE_MSI] = 0;
367 } else {
368 counts[PCI_INTR_TYPE_MSI] =
369 alloced_counts[PCI_INTR_TYPE_MSI];
370 pci_intr_release(pa->pa_pc, ihps,
371 alloced_counts[PCI_INTR_TYPE_MSI]);
372 }
373 }
374 if (counts[PCI_INTR_TYPE_MSI] < 1) {
375 counts[PCI_INTR_TYPE_MSI] = 0;
376 if (max_type == PCI_INTR_TYPE_MSI)
377 max_type = PCI_INTR_TYPE_INTX;
378 } else if (!nvme_pci_mq || !nvme_pci_mpsafe) {
379 if (counts[PCI_INTR_TYPE_MSI] > 2)
380 counts[PCI_INTR_TYPE_MSI] = 2; /* adminq + 1 ioq */
381 }
382
383 force_intx:
384 /* INTx */
385 counts[PCI_INTR_TYPE_INTX] = 1;
386
387 memcpy(alloced_counts, counts, sizeof(counts));
388 error = pci_intr_alloc(pa, &ihps, alloced_counts, max_type);
389 if (error) {
390 if (max_type != PCI_INTR_TYPE_INTX) {
391 retry:
392 memset(counts, 0, sizeof(counts));
393 if (max_type == PCI_INTR_TYPE_MSIX) {
394 max_type = PCI_INTR_TYPE_MSI;
395 goto retry_msi;
396 } else {
397 max_type = PCI_INTR_TYPE_INTX;
398 goto force_intx;
399 }
400 }
401 return error;
402 }
403
404 intr_type = pci_intr_type(pa->pa_pc, ihps[0]);
405 if (alloced_counts[intr_type] < counts[intr_type]) {
406 if (intr_type != PCI_INTR_TYPE_INTX) {
407 pci_intr_release(pa->pa_pc, ihps,
408 alloced_counts[intr_type]);
409 max_type = intr_type;
410 goto retry;
411 }
412 return EBUSY;
413 }
414
415 psc->psc_intrs = ihps;
416 psc->psc_nintrs = alloced_counts[intr_type];
417 if (intr_type == PCI_INTR_TYPE_MSI) {
418 if (alloced_counts[intr_type] > ncpu + 1)
419 alloced_counts[intr_type] = ncpu + 1;
420 }
421 sc->sc_use_mq = alloced_counts[intr_type] > 1;
422 sc->sc_nq = sc->sc_use_mq ? alloced_counts[intr_type] - 1 : 1;
423 return 0;
424 }
425
426 MODULE(MODULE_CLASS_DRIVER, nvme, "pci,dk_subr");
427
428 #ifdef _MODULE
429 #include "ioconf.c"
430
431 extern const struct bdevsw ld_bdevsw;
432 extern const struct cdevsw ld_cdevsw;
433 #endif
434
435 static int
436 nvme_modcmd(modcmd_t cmd, void *opaque)
437 {
438 #ifdef _MODULE
439 devmajor_t cmajor, bmajor;
440 #endif
441 int error = 0;
442
443 switch (cmd) {
444 case MODULE_CMD_INIT:
445 #ifdef _MODULE
446 /* devsw must be done before configuring the actual device,
447 * otherwise ldattach() fails
448 */
449 bmajor = cmajor = NODEVMAJOR;
450 error = devsw_attach(ld_cd.cd_name, &ld_bdevsw, &bmajor,
451 &ld_cdevsw, &cmajor);
452 if (error) {
453 aprint_error("%s: unable to register devsw\n",
454 ld_cd.cd_name);
455 return error;
456 }
457
458 error = config_init_component(cfdriver_ioconf_nvme_pci,
459 cfattach_ioconf_nvme_pci, cfdata_ioconf_nvme_pci);
460 if (error)
461 return error;
462
463 #endif
464 return error;
465 case MODULE_CMD_FINI:
466 #ifdef _MODULE
467 error = config_fini_component(cfdriver_ioconf_nvme_pci,
468 cfattach_ioconf_nvme_pci, cfdata_ioconf_nvme_pci);
469 if (error)
470 return error;
471
472 devsw_detach(&ld_bdevsw, &ld_cdevsw);
473 #endif
474 return error;
475 default:
476 return ENOTTY;
477 }
478 }
479