nvme_pci.c revision 1.6 1 /* $NetBSD: nvme_pci.c,v 1.6 2016/09/16 11:35:07 jdolecek Exp $ */
2 /* $OpenBSD: nvme_pci.c,v 1.3 2016/04/14 11:18:32 dlg Exp $ */
3
4 /*
5 * Copyright (c) 2014 David Gwynne <dlg (at) openbsd.org>
6 *
7 * Permission to use, copy, modify, and distribute this software for any
8 * purpose with or without fee is hereby granted, provided that the above
9 * copyright notice and this permission notice appear in all copies.
10 *
11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18 */
19
20 /*-
21 * Copyright (C) 2016 NONAKA Kimihiro <nonaka (at) netbsd.org>
22 * All rights reserved.
23 *
24 * Redistribution and use in source and binary forms, with or without
25 * modification, are permitted provided that the following conditions
26 * are met:
27 * 1. Redistributions of source code must retain the above copyright
28 * notice, this list of conditions and the following disclaimer.
29 * 2. Redistributions in binary form must reproduce the above copyright
30 * notice, this list of conditions and the following disclaimer in the
31 * documentation and/or other materials provided with the distribution.
32 *
33 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
34 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
35 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
36 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
37 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
38 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
39 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
40 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
41 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
42 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43 */
44
45 #include <sys/cdefs.h>
46 __KERNEL_RCSID(0, "$NetBSD: nvme_pci.c,v 1.6 2016/09/16 11:35:07 jdolecek Exp $");
47
48 #include <sys/param.h>
49 #include <sys/systm.h>
50 #include <sys/kernel.h>
51 #include <sys/device.h>
52 #include <sys/bitops.h>
53 #include <sys/bus.h>
54 #include <sys/cpu.h>
55 #include <sys/interrupt.h>
56 #include <sys/kmem.h>
57 #include <sys/pmf.h>
58 #ifdef _MODULE
59 #include <sys/module.h>
60 #endif
61
62 #include <dev/pci/pcireg.h>
63 #include <dev/pci/pcivar.h>
64
65 #include <dev/ic/nvmereg.h>
66 #include <dev/ic/nvmevar.h>
67
68 int nvme_pci_force_intx = 0;
69 int nvme_pci_mpsafe = 0;
70 int nvme_pci_mq = 1; /* INTx: ioq=1, MSI/MSI-X: ioq=ncpu */
71
72 #define NVME_PCI_BAR 0x10
73
74 struct nvme_pci_softc {
75 struct nvme_softc psc_nvme;
76
77 pci_chipset_tag_t psc_pc;
78 pci_intr_handle_t *psc_intrs;
79 int psc_nintrs;
80 };
81
82 static int nvme_pci_match(device_t, cfdata_t, void *);
83 static void nvme_pci_attach(device_t, device_t, void *);
84 static int nvme_pci_detach(device_t, int);
85
86 CFATTACH_DECL3_NEW(nvme_pci, sizeof(struct nvme_pci_softc),
87 nvme_pci_match, nvme_pci_attach, nvme_pci_detach, NULL, NULL,
88 nvme_childdet, DVF_DETACH_SHUTDOWN);
89
90 static int nvme_pci_intr_establish(struct nvme_softc *,
91 uint16_t, struct nvme_queue *);
92 static int nvme_pci_intr_disestablish(struct nvme_softc *, uint16_t);
93 static int nvme_pci_setup_intr(struct pci_attach_args *,
94 struct nvme_pci_softc *);
95
96 static int
97 nvme_pci_match(device_t parent, cfdata_t match, void *aux)
98 {
99 struct pci_attach_args *pa = aux;
100
101 if (PCI_CLASS(pa->pa_class) == PCI_CLASS_MASS_STORAGE &&
102 PCI_SUBCLASS(pa->pa_class) == PCI_SUBCLASS_MASS_STORAGE_NVM &&
103 PCI_INTERFACE(pa->pa_class) == PCI_INTERFACE_NVM_NVME)
104 return 1;
105
106 return 0;
107 }
108
109 static void
110 nvme_pci_attach(device_t parent, device_t self, void *aux)
111 {
112 struct nvme_pci_softc *psc = device_private(self);
113 struct nvme_softc *sc = &psc->psc_nvme;
114 struct pci_attach_args *pa = aux;
115 pcireg_t memtype;
116 bus_addr_t memaddr;
117 int flags, msixoff;
118 int error;
119
120 sc->sc_dev = self;
121 psc->psc_pc = pa->pa_pc;
122 if (pci_dma64_available(pa))
123 sc->sc_dmat = pa->pa_dmat64;
124 else
125 sc->sc_dmat = pa->pa_dmat;
126
127 pci_aprint_devinfo(pa, NULL);
128
129 /* Map registers */
130 memtype = pci_mapreg_type(pa->pa_pc, pa->pa_tag, NVME_PCI_BAR);
131 if (PCI_MAPREG_TYPE(memtype) != PCI_MAPREG_TYPE_MEM) {
132 aprint_error_dev(self, "invalid type (type=0x%x)\n", memtype);
133 return;
134 }
135 sc->sc_iot = pa->pa_memt;
136 error = pci_mapreg_info(pa->pa_pc, pa->pa_tag, PCI_MAPREG_START,
137 memtype, &memaddr, &sc->sc_ios, &flags);
138 if (error) {
139 aprint_error_dev(self, "can't get map info\n");
140 return;
141 }
142 if (pci_get_capability(pa->pa_pc, pa->pa_tag, PCI_CAP_MSIX, &msixoff,
143 NULL)) {
144 pcireg_t msixtbl;
145 uint32_t table_offset;
146 int bir;
147
148 msixtbl = pci_conf_read(pa->pa_pc, pa->pa_tag,
149 msixoff + PCI_MSIX_TBLOFFSET);
150 table_offset = msixtbl & PCI_MSIX_TBLOFFSET_MASK;
151 bir = msixtbl & PCI_MSIX_PBABIR_MASK;
152 if (bir == 0) {
153 sc->sc_ios = table_offset;
154 }
155 }
156 error = bus_space_map(sc->sc_iot, memaddr, sc->sc_ios, flags,
157 &sc->sc_ioh);
158 if (error != 0) {
159 aprint_error_dev(self, "can't map mem space (error=%d)\n",
160 error);
161 return;
162 }
163
164 /* Establish interrupts */
165 if (nvme_pci_setup_intr(pa, psc) != 0) {
166 aprint_error_dev(self, "unable to allocate interrupt\n");
167 goto unmap;
168 }
169 sc->sc_intr_establish = nvme_pci_intr_establish;
170 sc->sc_intr_disestablish = nvme_pci_intr_disestablish;
171
172 sc->sc_ih = kmem_zalloc(sizeof(*sc->sc_ih) * psc->psc_nintrs, KM_SLEEP);
173 if (sc->sc_ih == NULL) {
174 aprint_error_dev(self, "unable to allocate ih memory\n");
175 goto intr_release;
176 }
177
178 if (nvme_attach(sc) != 0) {
179 /* error printed by nvme_attach() */
180 goto intr_free;
181 }
182
183 if (!pmf_device_register(self, NULL, NULL))
184 aprint_error_dev(self, "couldn't establish power handler\n");
185
186 SET(sc->sc_flags, NVME_F_ATTACHED);
187 return;
188
189 intr_free:
190 kmem_free(sc->sc_ih, sizeof(*sc->sc_ih) * psc->psc_nintrs);
191 sc->sc_nq = 0;
192 intr_release:
193 pci_intr_release(pa->pa_pc, psc->psc_intrs, psc->psc_nintrs);
194 psc->psc_nintrs = 0;
195 unmap:
196 bus_space_unmap(sc->sc_iot, sc->sc_ioh, sc->sc_ios);
197 sc->sc_ios = 0;
198 }
199
200 static int
201 nvme_pci_detach(device_t self, int flags)
202 {
203 struct nvme_pci_softc *psc = device_private(self);
204 struct nvme_softc *sc = &psc->psc_nvme;
205 int error;
206
207 if (!ISSET(sc->sc_flags, NVME_F_ATTACHED))
208 return 0;
209
210 error = nvme_detach(sc, flags);
211 if (error)
212 return error;
213
214 kmem_free(sc->sc_ih, sizeof(*sc->sc_ih) * psc->psc_nintrs);
215 pci_intr_release(psc->psc_pc, psc->psc_intrs, psc->psc_nintrs);
216 bus_space_unmap(sc->sc_iot, sc->sc_ioh, sc->sc_ios);
217 return 0;
218 }
219
220 static int
221 nvme_pci_intr_establish(struct nvme_softc *sc, uint16_t qid,
222 struct nvme_queue *q)
223 {
224 struct nvme_pci_softc *psc = (struct nvme_pci_softc *)sc;
225 char intr_xname[INTRDEVNAMEBUF];
226 char intrbuf[PCI_INTRSTR_LEN];
227 const char *intrstr = NULL;
228 int (*ih_func)(void *);
229 void *ih_arg;
230 kcpuset_t *affinity;
231 cpuid_t affinity_to;
232 int error;
233
234 if (!sc->sc_use_mq && qid > 0)
235 return 0;
236
237 KASSERT(sc->sc_ih[qid] == NULL);
238
239 if (nvme_pci_mpsafe) {
240 pci_intr_setattr(psc->psc_pc, &psc->psc_intrs[qid],
241 PCI_INTR_MPSAFE, true);
242 }
243 if (!sc->sc_use_mq) {
244 snprintf(intr_xname, sizeof(intr_xname), "%s",
245 device_xname(sc->sc_dev));
246 ih_arg = sc;
247 ih_func = nvme_intr;
248 } else {
249 if (qid == 0) {
250 snprintf(intr_xname, sizeof(intr_xname), "%s adminq",
251 device_xname(sc->sc_dev));
252 } else {
253 snprintf(intr_xname, sizeof(intr_xname), "%s ioq%d",
254 device_xname(sc->sc_dev), qid);
255 }
256 ih_arg = q;
257 if (pci_intr_type(psc->psc_pc, psc->psc_intrs[qid])
258 == PCI_INTR_TYPE_MSIX)
259 ih_func = nvme_mq_msix_intr;
260 else
261 ih_func = nvme_mq_msi_intr;
262 }
263 sc->sc_ih[qid] = pci_intr_establish_xname(psc->psc_pc,
264 psc->psc_intrs[qid], IPL_BIO, ih_func, ih_arg, intr_xname);
265 if (sc->sc_ih[qid] == NULL) {
266 aprint_error_dev(sc->sc_dev,
267 "unable to establish %s interrupt\n", intr_xname);
268 return 1;
269 }
270 intrstr = pci_intr_string(psc->psc_pc, psc->psc_intrs[qid], intrbuf,
271 sizeof(intrbuf));
272 if (!sc->sc_use_mq) {
273 aprint_normal_dev(sc->sc_dev, "interrupting at %s\n", intrstr);
274 } else if (qid == NVME_ADMIN_Q) {
275 aprint_normal_dev(sc->sc_dev,
276 "for admin queue interrupting at %s\n", intrstr);
277 } else if (!nvme_pci_mpsafe) {
278 aprint_normal_dev(sc->sc_dev,
279 "for io queue %d interrupting at %s\n", qid, intrstr);
280 } else {
281 kcpuset_create(&affinity, true);
282 affinity_to = (qid - 1) % ncpu;
283 kcpuset_set(affinity, affinity_to);
284 error = interrupt_distribute(sc->sc_ih[qid], affinity, NULL);
285 kcpuset_destroy(affinity);
286 aprint_normal_dev(sc->sc_dev,
287 "for io queue %d interrupting at %s", qid, intrstr);
288 if (error == 0)
289 aprint_normal(" affinity to cpu%lu", affinity_to);
290 aprint_normal("\n");
291 }
292 return 0;
293 }
294
295 static int
296 nvme_pci_intr_disestablish(struct nvme_softc *sc, uint16_t qid)
297 {
298 struct nvme_pci_softc *psc = (struct nvme_pci_softc *)sc;
299
300 if (!sc->sc_use_mq && qid > 0)
301 return 0;
302
303 KASSERT(sc->sc_ih[qid] != NULL);
304
305 pci_intr_disestablish(psc->psc_pc, sc->sc_ih[qid]);
306 sc->sc_ih[qid] = NULL;
307
308 return 0;
309 }
310
311 static int
312 nvme_pci_setup_intr(struct pci_attach_args *pa, struct nvme_pci_softc *psc)
313 {
314 struct nvme_softc *sc = &psc->psc_nvme;
315 pci_intr_handle_t *ihps;
316 int counts[PCI_INTR_TYPE_SIZE], alloced_counts[PCI_INTR_TYPE_SIZE];
317 int max_type, intr_type;
318 int error;
319
320 if (nvme_pci_force_intx) {
321 max_type = PCI_INTR_TYPE_INTX;
322 goto force_intx;
323 }
324
325 /* MSI-X */
326 max_type = PCI_INTR_TYPE_MSIX;
327 counts[PCI_INTR_TYPE_MSIX] = min(pci_msix_count(pa->pa_pc, pa->pa_tag),
328 ncpu + 1);
329 if (counts[PCI_INTR_TYPE_MSIX] > 0) {
330 memset(alloced_counts, 0, sizeof(alloced_counts));
331 alloced_counts[PCI_INTR_TYPE_MSIX] = counts[PCI_INTR_TYPE_MSIX];
332 if (pci_intr_alloc(pa, &ihps, alloced_counts,
333 PCI_INTR_TYPE_MSIX)) {
334 counts[PCI_INTR_TYPE_MSIX] = 0;
335 } else {
336 counts[PCI_INTR_TYPE_MSIX] =
337 alloced_counts[PCI_INTR_TYPE_MSIX];
338 pci_intr_release(pa->pa_pc, ihps,
339 alloced_counts[PCI_INTR_TYPE_MSIX]);
340 }
341 }
342 if (counts[PCI_INTR_TYPE_MSIX] < 2) {
343 counts[PCI_INTR_TYPE_MSIX] = 0;
344 max_type = PCI_INTR_TYPE_MSI;
345 } else if (!nvme_pci_mq || !nvme_pci_mpsafe) {
346 counts[PCI_INTR_TYPE_MSIX] = 2; /* adminq + 1 ioq */
347 }
348
349 retry_msi:
350 /* MSI */
351 counts[PCI_INTR_TYPE_MSI] = pci_msi_count(pa->pa_pc, pa->pa_tag);
352 if (counts[PCI_INTR_TYPE_MSI] > 0) {
353 while (counts[PCI_INTR_TYPE_MSI] > ncpu + 1) {
354 if (counts[PCI_INTR_TYPE_MSI] / 2 <= ncpu + 1)
355 break;
356 counts[PCI_INTR_TYPE_MSI] /= 2;
357 }
358 memset(alloced_counts, 0, sizeof(alloced_counts));
359 alloced_counts[PCI_INTR_TYPE_MSI] = counts[PCI_INTR_TYPE_MSI];
360 if (pci_intr_alloc(pa, &ihps, alloced_counts,
361 PCI_INTR_TYPE_MSI)) {
362 counts[PCI_INTR_TYPE_MSI] = 0;
363 } else {
364 counts[PCI_INTR_TYPE_MSI] =
365 alloced_counts[PCI_INTR_TYPE_MSI];
366 pci_intr_release(pa->pa_pc, ihps,
367 alloced_counts[PCI_INTR_TYPE_MSI]);
368 }
369 }
370 if (counts[PCI_INTR_TYPE_MSI] < 1) {
371 counts[PCI_INTR_TYPE_MSI] = 0;
372 if (max_type == PCI_INTR_TYPE_MSI)
373 max_type = PCI_INTR_TYPE_INTX;
374 } else if (!nvme_pci_mq || !nvme_pci_mpsafe) {
375 if (counts[PCI_INTR_TYPE_MSI] > 2)
376 counts[PCI_INTR_TYPE_MSI] = 2; /* adminq + 1 ioq */
377 }
378
379 force_intx:
380 /* INTx */
381 counts[PCI_INTR_TYPE_INTX] = 1;
382
383 memcpy(alloced_counts, counts, sizeof(counts));
384 error = pci_intr_alloc(pa, &ihps, alloced_counts, max_type);
385 if (error) {
386 if (max_type != PCI_INTR_TYPE_INTX) {
387 retry:
388 memset(counts, 0, sizeof(counts));
389 if (max_type == PCI_INTR_TYPE_MSIX) {
390 max_type = PCI_INTR_TYPE_MSI;
391 goto retry_msi;
392 } else {
393 max_type = PCI_INTR_TYPE_INTX;
394 goto force_intx;
395 }
396 }
397 return error;
398 }
399
400 intr_type = pci_intr_type(pa->pa_pc, ihps[0]);
401 if (alloced_counts[intr_type] < counts[intr_type]) {
402 if (intr_type != PCI_INTR_TYPE_INTX) {
403 pci_intr_release(pa->pa_pc, ihps,
404 alloced_counts[intr_type]);
405 max_type = intr_type;
406 goto retry;
407 }
408 return EBUSY;
409 }
410
411 psc->psc_intrs = ihps;
412 psc->psc_nintrs = alloced_counts[intr_type];
413 if (intr_type == PCI_INTR_TYPE_MSI) {
414 if (alloced_counts[intr_type] > ncpu + 1)
415 alloced_counts[intr_type] = ncpu + 1;
416 }
417 sc->sc_use_mq = alloced_counts[intr_type] > 1;
418 sc->sc_nq = sc->sc_use_mq ? alloced_counts[intr_type] - 1 : 1;
419 return 0;
420 }
421
422 MODULE(MODULE_CLASS_DRIVER, nvme, "pci");
423
424 #ifdef _MODULE
425 #include "ioconf.c"
426
427 extern const struct bdevsw ld_bdevsw;
428 extern const struct cdevsw ld_cdevsw;
429 #endif
430
431 static int
432 nvme_modcmd(modcmd_t cmd, void *opaque)
433 {
434 #ifdef _MODULE
435 devmajor_t cmajor, bmajor;
436 #endif
437 int error = 0;
438
439 switch (cmd) {
440 case MODULE_CMD_INIT:
441 #ifdef _MODULE
442 /* devsw must be done before configuring the pci device,
443 * otherwise ldattach() fails
444 */
445 bmajor = cmajor = NODEVMAJOR;
446 error = devsw_attach(ld_cd.cd_name, &ld_bdevsw, &bmajor,
447 &ld_cdevsw, &cmajor);
448 if (error && error != EEXIST) {
449 aprint_error("%s: unable to register devsw\n",
450 ld_cd.cd_name);
451 return error;
452 }
453
454 error = config_init_component(cfdriver_ioconf_nvme_pci,
455 cfattach_ioconf_nvme_pci, cfdata_ioconf_nvme_pci);
456 if (error)
457 return error;
458
459 #endif
460 return error;
461 case MODULE_CMD_FINI:
462 #ifdef _MODULE
463 error = config_fini_component(cfdriver_ioconf_nvme_pci,
464 cfattach_ioconf_nvme_pci, cfdata_ioconf_nvme_pci);
465 if (error)
466 return error;
467
468 /* devsw not detached, it's static data and fine to stay */
469 #endif
470 return error;
471 default:
472 return ENOTTY;
473 }
474 }
475