nvme_pci.c revision 1.2 1 /* $NetBSD: nvme_pci.c,v 1.2 2016/05/11 13:55:28 nonaka Exp $ */
2 /* $OpenBSD: nvme_pci.c,v 1.3 2016/04/14 11:18:32 dlg Exp $ */
3
4 /*
5 * Copyright (c) 2014 David Gwynne <dlg (at) openbsd.org>
6 *
7 * Permission to use, copy, modify, and distribute this software for any
8 * purpose with or without fee is hereby granted, provided that the above
9 * copyright notice and this permission notice appear in all copies.
10 *
11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18 */
19
20 /*-
21 * Copyright (C) 2016 NONAKA Kimihiro <nonaka (at) netbsd.org>
22 * All rights reserved.
23 *
24 * Redistribution and use in source and binary forms, with or without
25 * modification, are permitted provided that the following conditions
26 * are met:
27 * 1. Redistributions of source code must retain the above copyright
28 * notice, this list of conditions and the following disclaimer.
29 * 2. Redistributions in binary form must reproduce the above copyright
30 * notice, this list of conditions and the following disclaimer in the
31 * documentation and/or other materials provided with the distribution.
32 *
33 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
34 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
35 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
36 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
37 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
38 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
39 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
40 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
41 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
42 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43 */
44
45 #include <sys/cdefs.h>
46 __KERNEL_RCSID(0, "$NetBSD: nvme_pci.c,v 1.2 2016/05/11 13:55:28 nonaka Exp $");
47
48 #include <sys/param.h>
49 #include <sys/systm.h>
50 #include <sys/kernel.h>
51 #include <sys/device.h>
52 #include <sys/bitops.h>
53 #include <sys/bus.h>
54 #include <sys/cpu.h>
55 #include <sys/interrupt.h>
56 #include <sys/kmem.h>
57 #include <sys/pmf.h>
58
59 #include <dev/pci/pcireg.h>
60 #include <dev/pci/pcivar.h>
61
62 #include <dev/ic/nvmereg.h>
63 #include <dev/ic/nvmevar.h>
64
65 int nvme_pci_force_intx = 0;
66 int nvme_pci_mpsafe = 0;
67 int nvme_pci_mq = 1; /* INTx: ioq=1, MSI/MSI-X: ioq=ncpu */
68
69 #define NVME_PCI_BAR 0x10
70
71 struct nvme_pci_softc {
72 struct nvme_softc psc_nvme;
73
74 pci_chipset_tag_t psc_pc;
75 pci_intr_handle_t *psc_intrs;
76 int psc_nintrs;
77 };
78
79 static int nvme_pci_match(device_t, cfdata_t, void *);
80 static void nvme_pci_attach(device_t, device_t, void *);
81 static int nvme_pci_detach(device_t, int);
82
83 CFATTACH_DECL3_NEW(nvme_pci, sizeof(struct nvme_pci_softc),
84 nvme_pci_match, nvme_pci_attach, nvme_pci_detach, NULL, NULL,
85 nvme_childdet, DVF_DETACH_SHUTDOWN);
86
87 static int nvme_pci_intr_establish(struct nvme_softc *,
88 uint16_t, struct nvme_queue *);
89 static int nvme_pci_intr_disestablish(struct nvme_softc *, uint16_t);
90 static int nvme_pci_setup_intr(struct pci_attach_args *,
91 struct nvme_pci_softc *);
92
93 static int
94 nvme_pci_match(device_t parent, cfdata_t match, void *aux)
95 {
96 struct pci_attach_args *pa = aux;
97
98 if (PCI_CLASS(pa->pa_class) == PCI_CLASS_MASS_STORAGE &&
99 PCI_SUBCLASS(pa->pa_class) == PCI_SUBCLASS_MASS_STORAGE_NVM &&
100 PCI_INTERFACE(pa->pa_class) == PCI_INTERFACE_NVM_NVME)
101 return 1;
102
103 return 0;
104 }
105
106 static void
107 nvme_pci_attach(device_t parent, device_t self, void *aux)
108 {
109 struct nvme_pci_softc *psc = device_private(self);
110 struct nvme_softc *sc = &psc->psc_nvme;
111 struct pci_attach_args *pa = aux;
112 pcireg_t memtype;
113 bus_addr_t memaddr;
114 int flags, msixoff;
115 int nq, error;
116
117 sc->sc_dev = self;
118 psc->psc_pc = pa->pa_pc;
119 if (pci_dma64_available(pa))
120 sc->sc_dmat = pa->pa_dmat64;
121 else
122 sc->sc_dmat = pa->pa_dmat;
123
124 pci_aprint_devinfo(pa, NULL);
125
126 /* Map registers */
127 memtype = pci_mapreg_type(pa->pa_pc, pa->pa_tag, NVME_PCI_BAR);
128 if (PCI_MAPREG_TYPE(memtype) != PCI_MAPREG_TYPE_MEM) {
129 aprint_error_dev(self, "invalid type (type=0x%x)\n", memtype);
130 return;
131 }
132 sc->sc_iot = pa->pa_memt;
133 error = pci_mapreg_info(pa->pa_pc, pa->pa_tag, PCI_MAPREG_START,
134 memtype, &memaddr, &sc->sc_ios, &flags);
135 if (error) {
136 aprint_error_dev(self, "can't get map info\n");
137 return;
138 }
139 if (pci_get_capability(pa->pa_pc, pa->pa_tag, PCI_CAP_MSIX, &msixoff,
140 NULL)) {
141 pcireg_t msixtbl;
142 uint32_t table_offset;
143 int bir;
144
145 msixtbl = pci_conf_read(pa->pa_pc, pa->pa_tag,
146 msixoff + PCI_MSIX_TBLOFFSET);
147 table_offset = msixtbl & PCI_MSIX_TBLOFFSET_MASK;
148 bir = msixtbl & PCI_MSIX_PBABIR_MASK;
149 if (bir == 0) {
150 sc->sc_ios = table_offset;
151 }
152 }
153 error = bus_space_map(sc->sc_iot, memaddr, sc->sc_ios, flags,
154 &sc->sc_ioh);
155 if (error != 0) {
156 aprint_error_dev(self, "can't map mem space (error=%d)\n",
157 error);
158 return;
159 }
160
161 /* Establish interrupts */
162 if (nvme_pci_setup_intr(pa, psc) != 0) {
163 aprint_error_dev(self, "unable to allocate interrupt\n");
164 goto unmap;
165 }
166 sc->sc_intr_establish = nvme_pci_intr_establish;
167 sc->sc_intr_disestablish = nvme_pci_intr_disestablish;
168
169 nq = sc->sc_nq + (sc->sc_use_mq ? 1 : 0);
170 sc->sc_ih = kmem_zalloc(sizeof(*sc->sc_ih) * nq, KM_SLEEP);
171 if (sc->sc_ih == NULL) {
172 aprint_error_dev(self, "unable to allocate ih memory\n");
173 goto intr_release;
174 }
175
176 if (nvme_attach(sc) != 0) {
177 /* error printed by nvme_attach() */
178 goto intr_free;
179 }
180
181 if (!pmf_device_register(self, NULL, NULL))
182 aprint_error_dev(self, "couldn't establish power handler\n");
183
184 SET(sc->sc_flags, NVME_F_ATTACHED);
185 return;
186
187 intr_free:
188 kmem_free(sc->sc_ih, sizeof(*sc->sc_ih) * sc->sc_nq);
189 sc->sc_nq = 0;
190 intr_release:
191 pci_intr_release(pa->pa_pc, psc->psc_intrs, psc->psc_nintrs);
192 psc->psc_nintrs = 0;
193 unmap:
194 bus_space_unmap(sc->sc_iot, sc->sc_ioh, sc->sc_ios);
195 sc->sc_ios = 0;
196 }
197
198 static int
199 nvme_pci_detach(device_t self, int flags)
200 {
201 struct nvme_pci_softc *psc = device_private(self);
202 struct nvme_softc *sc = &psc->psc_nvme;
203 int i, nq, error;
204
205 if (!ISSET(sc->sc_flags, NVME_F_ATTACHED))
206 return 0;
207
208 error = nvme_detach(sc, flags);
209 if (error)
210 return error;
211
212 nq = sc->sc_nq + (sc->sc_use_mq ? 1 : 0);
213 if (!sc->sc_use_mq) {
214 for (i = 0; i < nq; i++)
215 pci_intr_disestablish(psc->psc_pc, sc->sc_ih[i]);
216 }
217 kmem_free(sc->sc_ih, sizeof(*sc->sc_ih) * nq);
218 pci_intr_release(psc->psc_pc, psc->psc_intrs, psc->psc_nintrs);
219 bus_space_unmap(sc->sc_iot, sc->sc_ioh, sc->sc_ios);
220 return 0;
221 }
222
223 static int
224 nvme_pci_intr_establish(struct nvme_softc *sc, uint16_t qid,
225 struct nvme_queue *q)
226 {
227 struct nvme_pci_softc *psc = (struct nvme_pci_softc *)sc;
228 char intr_xname[INTRDEVNAMEBUF];
229 char intrbuf[PCI_INTRSTR_LEN];
230 const char *intrstr = NULL;
231 int (*ih_func)(void *);
232 void *ih_arg;
233 kcpuset_t *affinity;
234 cpuid_t affinity_to;
235 int error;
236
237 if (!sc->sc_use_mq && qid > 0)
238 return 0;
239
240 KASSERT(sc->sc_ih[qid] == NULL);
241
242 if (nvme_pci_mpsafe) {
243 pci_intr_setattr(psc->psc_pc, &psc->psc_intrs[qid],
244 PCI_INTR_MPSAFE, true);
245 }
246 if (!sc->sc_use_mq) {
247 snprintf(intr_xname, sizeof(intr_xname), "%s",
248 device_xname(sc->sc_dev));
249 ih_arg = sc;
250 ih_func = nvme_intr;
251 } else {
252 if (qid == 0) {
253 snprintf(intr_xname, sizeof(intr_xname), "%s adminq",
254 device_xname(sc->sc_dev));
255 } else {
256 snprintf(intr_xname, sizeof(intr_xname), "%s ioq%d",
257 device_xname(sc->sc_dev), qid);
258 }
259 ih_arg = q;
260 if (pci_intr_type(psc->psc_intrs[qid]) == PCI_INTR_TYPE_MSIX)
261 ih_func = nvme_mq_msix_intr;
262 else
263 ih_func = nvme_mq_msi_intr;
264 }
265 sc->sc_ih[qid] = pci_intr_establish_xname(psc->psc_pc,
266 psc->psc_intrs[qid], IPL_BIO, ih_func, ih_arg, intr_xname);
267 if (sc->sc_ih[qid] == NULL) {
268 aprint_error_dev(sc->sc_dev,
269 "unable to establish %s interrupt\n", intr_xname);
270 return 1;
271 }
272 intrstr = pci_intr_string(psc->psc_pc, psc->psc_intrs[qid], intrbuf,
273 sizeof(intrbuf));
274 if (!sc->sc_use_mq) {
275 aprint_normal_dev(sc->sc_dev, "interrupting at %s\n", intrstr);
276 } else if (qid == 0) {
277 aprint_normal_dev(sc->sc_dev,
278 "for admin queue interrupting at %s\n", intrstr);
279 } else if (!nvme_pci_mpsafe) {
280 aprint_normal_dev(sc->sc_dev,
281 "for io queue %d interrupting at %s\n", qid, intrstr);
282 } else {
283 kcpuset_create(&affinity, true);
284 affinity_to = (qid - 1) % ncpu;
285 kcpuset_set(affinity, affinity_to);
286 error = interrupt_distribute(sc->sc_ih[qid], affinity, NULL);
287 kcpuset_destroy(affinity);
288 aprint_normal_dev(sc->sc_dev,
289 "for io queue %d interrupting at %s", qid, intrstr);
290 if (error == 0)
291 aprint_normal(" affinity to cpu%lu", affinity_to);
292 aprint_normal("\n");
293 }
294 return 0;
295 }
296
297 static int
298 nvme_pci_intr_disestablish(struct nvme_softc *sc, uint16_t qid)
299 {
300 struct nvme_pci_softc *psc = (struct nvme_pci_softc *)sc;
301
302 if (!sc->sc_use_mq && qid > 0)
303 return 0;
304
305 KASSERT(sc->sc_ih[qid] != NULL);
306
307 pci_intr_disestablish(psc->psc_pc, sc->sc_ih[qid]);
308 sc->sc_ih[qid] = NULL;
309
310 return 0;
311 }
312
313 static int
314 nvme_pci_setup_intr(struct pci_attach_args *pa, struct nvme_pci_softc *psc)
315 {
316 struct nvme_softc *sc = &psc->psc_nvme;
317 pci_intr_handle_t *ihps;
318 int counts[PCI_INTR_TYPE_SIZE], alloced_counts[PCI_INTR_TYPE_SIZE];
319 int max_type, intr_type;
320 int error;
321
322 if (nvme_pci_force_intx) {
323 max_type = PCI_INTR_TYPE_INTX;
324 goto force_intx;
325 }
326
327 /* MSI-X */
328 max_type = PCI_INTR_TYPE_MSIX;
329 counts[PCI_INTR_TYPE_MSIX] = min(pci_msix_count(pa->pa_pc, pa->pa_tag),
330 ncpu + 1);
331 if (counts[PCI_INTR_TYPE_MSIX] > 0) {
332 memset(alloced_counts, 0, sizeof(alloced_counts));
333 alloced_counts[PCI_INTR_TYPE_MSIX] = counts[PCI_INTR_TYPE_MSIX];
334 if (pci_intr_alloc(pa, &ihps, alloced_counts,
335 PCI_INTR_TYPE_MSIX)) {
336 counts[PCI_INTR_TYPE_MSIX] = 0;
337 } else {
338 counts[PCI_INTR_TYPE_MSIX] =
339 alloced_counts[PCI_INTR_TYPE_MSIX];
340 pci_intr_release(pa->pa_pc, ihps,
341 alloced_counts[PCI_INTR_TYPE_MSIX]);
342 }
343 }
344 if (counts[PCI_INTR_TYPE_MSIX] < 2) {
345 counts[PCI_INTR_TYPE_MSIX] = 0;
346 max_type = PCI_INTR_TYPE_MSI;
347 } else if (!nvme_pci_mq || !nvme_pci_mpsafe) {
348 counts[PCI_INTR_TYPE_MSIX] = 2; /* adminq + 1 ioq */
349 }
350
351 retry_msi:
352 /* MSI */
353 counts[PCI_INTR_TYPE_MSI] = pci_msi_count(pa->pa_pc, pa->pa_tag);
354 if (counts[PCI_INTR_TYPE_MSI] > 0) {
355 while (counts[PCI_INTR_TYPE_MSI] > ncpu + 1) {
356 if (counts[PCI_INTR_TYPE_MSI] / 2 <= ncpu + 1)
357 break;
358 counts[PCI_INTR_TYPE_MSI] /= 2;
359 }
360 memset(alloced_counts, 0, sizeof(alloced_counts));
361 alloced_counts[PCI_INTR_TYPE_MSI] = counts[PCI_INTR_TYPE_MSI];
362 if (pci_intr_alloc(pa, &ihps, alloced_counts,
363 PCI_INTR_TYPE_MSI)) {
364 counts[PCI_INTR_TYPE_MSI] = 0;
365 } else {
366 counts[PCI_INTR_TYPE_MSI] =
367 alloced_counts[PCI_INTR_TYPE_MSI];
368 pci_intr_release(pa->pa_pc, ihps,
369 alloced_counts[PCI_INTR_TYPE_MSI]);
370 }
371 }
372 if (counts[PCI_INTR_TYPE_MSI] < 1) {
373 counts[PCI_INTR_TYPE_MSI] = 0;
374 if (max_type == PCI_INTR_TYPE_MSI)
375 max_type = PCI_INTR_TYPE_INTX;
376 } else if (!nvme_pci_mq || !nvme_pci_mpsafe) {
377 if (counts[PCI_INTR_TYPE_MSI] > 2)
378 counts[PCI_INTR_TYPE_MSI] = 2; /* adminq + 1 ioq */
379 }
380
381 force_intx:
382 /* INTx */
383 counts[PCI_INTR_TYPE_INTX] = 1;
384
385 memcpy(alloced_counts, counts, sizeof(counts));
386 error = pci_intr_alloc(pa, &ihps, alloced_counts, max_type);
387 if (error) {
388 if (max_type != PCI_INTR_TYPE_INTX) {
389 retry:
390 memset(counts, 0, sizeof(counts));
391 if (max_type == PCI_INTR_TYPE_MSIX) {
392 max_type = PCI_INTR_TYPE_MSI;
393 goto retry_msi;
394 } else {
395 max_type = PCI_INTR_TYPE_INTX;
396 goto force_intx;
397 }
398 }
399 return error;
400 }
401
402 intr_type = pci_intr_type(ihps[0]);
403 if (alloced_counts[intr_type] < counts[intr_type]) {
404 if (intr_type != PCI_INTR_TYPE_INTX) {
405 pci_intr_release(pa->pa_pc, ihps,
406 alloced_counts[intr_type]);
407 max_type = intr_type;
408 goto retry;
409 }
410 return EBUSY;
411 }
412
413 psc->psc_intrs = ihps;
414 psc->psc_nintrs = alloced_counts[intr_type];
415 if (intr_type == PCI_INTR_TYPE_MSI) {
416 if (alloced_counts[intr_type] > ncpu + 1)
417 alloced_counts[intr_type] = ncpu + 1;
418 }
419 sc->sc_use_mq = alloced_counts[intr_type] > 1;
420 sc->sc_nq = sc->sc_use_mq ? alloced_counts[intr_type] - 1 : 1;
421 return 0;
422 }
423