nvme.c revision 1.49 1 /* $NetBSD: nvme.c,v 1.49 2020/07/28 15:59:19 jdolecek Exp $ */
2 /* $OpenBSD: nvme.c,v 1.49 2016/04/18 05:59:50 dlg Exp $ */
3
4 /*
5 * Copyright (c) 2014 David Gwynne <dlg (at) openbsd.org>
6 *
7 * Permission to use, copy, modify, and distribute this software for any
8 * purpose with or without fee is hereby granted, provided that the above
9 * copyright notice and this permission notice appear in all copies.
10 *
11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18 */
19
20 #include <sys/cdefs.h>
21 __KERNEL_RCSID(0, "$NetBSD: nvme.c,v 1.49 2020/07/28 15:59:19 jdolecek Exp $");
22
23 #include <sys/param.h>
24 #include <sys/systm.h>
25 #include <sys/kernel.h>
26 #include <sys/atomic.h>
27 #include <sys/bus.h>
28 #include <sys/buf.h>
29 #include <sys/conf.h>
30 #include <sys/device.h>
31 #include <sys/kmem.h>
32 #include <sys/once.h>
33 #include <sys/proc.h>
34 #include <sys/queue.h>
35 #include <sys/mutex.h>
36
37 #include <uvm/uvm_extern.h>
38
39 #include <dev/ic/nvmereg.h>
40 #include <dev/ic/nvmevar.h>
41 #include <dev/ic/nvmeio.h>
42
43 #include "ioconf.h"
44
45 #define B4_CHK_RDY_DELAY_MS 2300 /* workaround controller bug */
46
47 int nvme_adminq_size = 32;
48 int nvme_ioq_size = 1024;
49
50 static int nvme_print(void *, const char *);
51
52 static int nvme_ready(struct nvme_softc *, uint32_t);
53 static int nvme_enable(struct nvme_softc *, u_int);
54 static int nvme_disable(struct nvme_softc *);
55 static int nvme_shutdown(struct nvme_softc *);
56
57 #ifdef NVME_DEBUG
58 static void nvme_dumpregs(struct nvme_softc *);
59 #endif
60 static int nvme_identify(struct nvme_softc *, u_int);
61 static void nvme_fill_identify(struct nvme_queue *, struct nvme_ccb *,
62 void *);
63
64 static int nvme_ccbs_alloc(struct nvme_queue *, uint16_t);
65 static void nvme_ccbs_free(struct nvme_queue *);
66
67 static struct nvme_ccb *
68 nvme_ccb_get(struct nvme_queue *, bool);
69 static void nvme_ccb_put(struct nvme_queue *, struct nvme_ccb *);
70
71 static int nvme_poll(struct nvme_softc *, struct nvme_queue *,
72 struct nvme_ccb *, void (*)(struct nvme_queue *,
73 struct nvme_ccb *, void *), int);
74 static void nvme_poll_fill(struct nvme_queue *, struct nvme_ccb *, void *);
75 static void nvme_poll_done(struct nvme_queue *, struct nvme_ccb *,
76 struct nvme_cqe *);
77 static void nvme_sqe_fill(struct nvme_queue *, struct nvme_ccb *, void *);
78 static void nvme_empty_done(struct nvme_queue *, struct nvme_ccb *,
79 struct nvme_cqe *);
80
81 static struct nvme_queue *
82 nvme_q_alloc(struct nvme_softc *, uint16_t, u_int, u_int);
83 static int nvme_q_create(struct nvme_softc *, struct nvme_queue *);
84 static int nvme_q_delete(struct nvme_softc *, struct nvme_queue *);
85 static void nvme_q_submit(struct nvme_softc *, struct nvme_queue *,
86 struct nvme_ccb *, void (*)(struct nvme_queue *,
87 struct nvme_ccb *, void *));
88 static int nvme_q_complete(struct nvme_softc *, struct nvme_queue *q);
89 static void nvme_q_free(struct nvme_softc *, struct nvme_queue *);
90 static void nvme_q_wait_complete(struct nvme_softc *, struct nvme_queue *,
91 bool (*)(void *), void *);
92
93 static struct nvme_dmamem *
94 nvme_dmamem_alloc(struct nvme_softc *, size_t);
95 static void nvme_dmamem_free(struct nvme_softc *, struct nvme_dmamem *);
96 static void nvme_dmamem_sync(struct nvme_softc *, struct nvme_dmamem *,
97 int);
98
99 static void nvme_ns_io_fill(struct nvme_queue *, struct nvme_ccb *,
100 void *);
101 static void nvme_ns_io_done(struct nvme_queue *, struct nvme_ccb *,
102 struct nvme_cqe *);
103 static void nvme_ns_sync_fill(struct nvme_queue *, struct nvme_ccb *,
104 void *);
105 static void nvme_ns_sync_done(struct nvme_queue *, struct nvme_ccb *,
106 struct nvme_cqe *);
107 static void nvme_getcache_fill(struct nvme_queue *, struct nvme_ccb *,
108 void *);
109 static void nvme_getcache_done(struct nvme_queue *, struct nvme_ccb *,
110 struct nvme_cqe *);
111
112 static void nvme_pt_fill(struct nvme_queue *, struct nvme_ccb *,
113 void *);
114 static void nvme_pt_done(struct nvme_queue *, struct nvme_ccb *,
115 struct nvme_cqe *);
116 static int nvme_command_passthrough(struct nvme_softc *,
117 struct nvme_pt_command *, uint16_t, struct lwp *, bool);
118
119 static int nvme_set_number_of_queues(struct nvme_softc *, u_int, u_int *,
120 u_int *);
121
122 #define NVME_TIMO_QOP 5 /* queue create and delete timeout */
123 #define NVME_TIMO_IDENT 10 /* probe identify timeout */
124 #define NVME_TIMO_PT -1 /* passthrough cmd timeout */
125 #define NVME_TIMO_SY 60 /* sync cache timeout */
126
127 #define nvme_read4(_s, _r) \
128 bus_space_read_4((_s)->sc_iot, (_s)->sc_ioh, (_r))
129 #define nvme_write4(_s, _r, _v) \
130 bus_space_write_4((_s)->sc_iot, (_s)->sc_ioh, (_r), (_v))
131 /*
132 * Some controllers, at least Apple NVMe, always require split
133 * transfers, so don't use bus_space_{read,write}_8() on LP64.
134 */
135 static inline uint64_t
136 nvme_read8(struct nvme_softc *sc, bus_size_t r)
137 {
138 uint64_t v;
139 uint32_t *a = (uint32_t *)&v;
140
141 #if _BYTE_ORDER == _LITTLE_ENDIAN
142 a[0] = nvme_read4(sc, r);
143 a[1] = nvme_read4(sc, r + 4);
144 #else /* _BYTE_ORDER == _LITTLE_ENDIAN */
145 a[1] = nvme_read4(sc, r);
146 a[0] = nvme_read4(sc, r + 4);
147 #endif
148
149 return v;
150 }
151
152 static inline void
153 nvme_write8(struct nvme_softc *sc, bus_size_t r, uint64_t v)
154 {
155 uint32_t *a = (uint32_t *)&v;
156
157 #if _BYTE_ORDER == _LITTLE_ENDIAN
158 nvme_write4(sc, r, a[0]);
159 nvme_write4(sc, r + 4, a[1]);
160 #else /* _BYTE_ORDER == _LITTLE_ENDIAN */
161 nvme_write4(sc, r, a[1]);
162 nvme_write4(sc, r + 4, a[0]);
163 #endif
164 }
165 #define nvme_barrier(_s, _r, _l, _f) \
166 bus_space_barrier((_s)->sc_iot, (_s)->sc_ioh, (_r), (_l), (_f))
167
168 #ifdef NVME_DEBUG
169 static __used void
170 nvme_dumpregs(struct nvme_softc *sc)
171 {
172 uint64_t r8;
173 uint32_t r4;
174
175 #define DEVNAME(_sc) device_xname((_sc)->sc_dev)
176 r8 = nvme_read8(sc, NVME_CAP);
177 printf("%s: cap 0x%016"PRIx64"\n", DEVNAME(sc), nvme_read8(sc, NVME_CAP));
178 printf("%s: mpsmax %u (%u)\n", DEVNAME(sc),
179 (u_int)NVME_CAP_MPSMAX(r8), (1 << NVME_CAP_MPSMAX(r8)));
180 printf("%s: mpsmin %u (%u)\n", DEVNAME(sc),
181 (u_int)NVME_CAP_MPSMIN(r8), (1 << NVME_CAP_MPSMIN(r8)));
182 printf("%s: css %"PRIu64"\n", DEVNAME(sc), NVME_CAP_CSS(r8));
183 printf("%s: nssrs %"PRIu64"\n", DEVNAME(sc), NVME_CAP_NSSRS(r8));
184 printf("%s: dstrd %"PRIu64"\n", DEVNAME(sc), NVME_CAP_DSTRD(r8));
185 printf("%s: to %"PRIu64" msec\n", DEVNAME(sc), NVME_CAP_TO(r8));
186 printf("%s: ams %"PRIu64"\n", DEVNAME(sc), NVME_CAP_AMS(r8));
187 printf("%s: cqr %"PRIu64"\n", DEVNAME(sc), NVME_CAP_CQR(r8));
188 printf("%s: mqes %"PRIu64"\n", DEVNAME(sc), NVME_CAP_MQES(r8));
189
190 printf("%s: vs 0x%04x\n", DEVNAME(sc), nvme_read4(sc, NVME_VS));
191
192 r4 = nvme_read4(sc, NVME_CC);
193 printf("%s: cc 0x%04x\n", DEVNAME(sc), r4);
194 printf("%s: iocqes %u (%u)\n", DEVNAME(sc), NVME_CC_IOCQES_R(r4),
195 (1 << NVME_CC_IOCQES_R(r4)));
196 printf("%s: iosqes %u (%u)\n", DEVNAME(sc), NVME_CC_IOSQES_R(r4),
197 (1 << NVME_CC_IOSQES_R(r4)));
198 printf("%s: shn %u\n", DEVNAME(sc), NVME_CC_SHN_R(r4));
199 printf("%s: ams %u\n", DEVNAME(sc), NVME_CC_AMS_R(r4));
200 printf("%s: mps %u (%u)\n", DEVNAME(sc), NVME_CC_MPS_R(r4),
201 (1 << NVME_CC_MPS_R(r4)));
202 printf("%s: css %u\n", DEVNAME(sc), NVME_CC_CSS_R(r4));
203 printf("%s: en %u\n", DEVNAME(sc), ISSET(r4, NVME_CC_EN) ? 1 : 0);
204
205 r4 = nvme_read4(sc, NVME_CSTS);
206 printf("%s: csts 0x%08x\n", DEVNAME(sc), r4);
207 printf("%s: rdy %u\n", DEVNAME(sc), r4 & NVME_CSTS_RDY);
208 printf("%s: cfs %u\n", DEVNAME(sc), r4 & NVME_CSTS_CFS);
209 printf("%s: shst %x\n", DEVNAME(sc), r4 & NVME_CSTS_SHST_MASK);
210
211 r4 = nvme_read4(sc, NVME_AQA);
212 printf("%s: aqa 0x%08x\n", DEVNAME(sc), r4);
213 printf("%s: acqs %u\n", DEVNAME(sc), NVME_AQA_ACQS_R(r4));
214 printf("%s: asqs %u\n", DEVNAME(sc), NVME_AQA_ASQS_R(r4));
215
216 printf("%s: asq 0x%016"PRIx64"\n", DEVNAME(sc), nvme_read8(sc, NVME_ASQ));
217 printf("%s: acq 0x%016"PRIx64"\n", DEVNAME(sc), nvme_read8(sc, NVME_ACQ));
218 #undef DEVNAME
219 }
220 #endif /* NVME_DEBUG */
221
222 static int
223 nvme_ready(struct nvme_softc *sc, uint32_t rdy)
224 {
225 u_int i = 0;
226
227 while ((nvme_read4(sc, NVME_CSTS) & NVME_CSTS_RDY) != rdy) {
228 if (i++ > sc->sc_rdy_to)
229 return ENXIO;
230
231 delay(1000);
232 nvme_barrier(sc, NVME_CSTS, 4, BUS_SPACE_BARRIER_READ);
233 }
234
235 return 0;
236 }
237
238 static int
239 nvme_enable(struct nvme_softc *sc, u_int mps)
240 {
241 uint32_t cc, csts;
242 int error;
243
244 cc = nvme_read4(sc, NVME_CC);
245 csts = nvme_read4(sc, NVME_CSTS);
246
247 /*
248 * See note in nvme_disable. Short circuit if we're already enabled.
249 */
250 if (ISSET(cc, NVME_CC_EN)) {
251 if (ISSET(csts, NVME_CSTS_RDY))
252 return 0;
253
254 goto waitready;
255 } else {
256 /* EN == 0 already wait for RDY == 0 or fail */
257 error = nvme_ready(sc, 0);
258 if (error)
259 return error;
260 }
261
262 nvme_write8(sc, NVME_ASQ, NVME_DMA_DVA(sc->sc_admin_q->q_sq_dmamem));
263 nvme_barrier(sc, 0, sc->sc_ios, BUS_SPACE_BARRIER_WRITE);
264 delay(5000);
265 nvme_write8(sc, NVME_ACQ, NVME_DMA_DVA(sc->sc_admin_q->q_cq_dmamem));
266 nvme_barrier(sc, 0, sc->sc_ios, BUS_SPACE_BARRIER_WRITE);
267 delay(5000);
268
269 nvme_write4(sc, NVME_AQA, NVME_AQA_ACQS(sc->sc_admin_q->q_entries) |
270 NVME_AQA_ASQS(sc->sc_admin_q->q_entries));
271 nvme_barrier(sc, 0, sc->sc_ios, BUS_SPACE_BARRIER_WRITE);
272 delay(5000);
273
274 CLR(cc, NVME_CC_IOCQES_MASK | NVME_CC_IOSQES_MASK | NVME_CC_SHN_MASK |
275 NVME_CC_AMS_MASK | NVME_CC_MPS_MASK | NVME_CC_CSS_MASK);
276 SET(cc, NVME_CC_IOSQES(ffs(64) - 1) | NVME_CC_IOCQES(ffs(16) - 1));
277 SET(cc, NVME_CC_SHN(NVME_CC_SHN_NONE));
278 SET(cc, NVME_CC_CSS(NVME_CC_CSS_NVM));
279 SET(cc, NVME_CC_AMS(NVME_CC_AMS_RR));
280 SET(cc, NVME_CC_MPS(mps));
281 SET(cc, NVME_CC_EN);
282
283 nvme_write4(sc, NVME_CC, cc);
284 nvme_barrier(sc, 0, sc->sc_ios,
285 BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
286
287 waitready:
288 return nvme_ready(sc, NVME_CSTS_RDY);
289 }
290
291 static int
292 nvme_disable(struct nvme_softc *sc)
293 {
294 uint32_t cc, csts;
295 int error;
296
297 cc = nvme_read4(sc, NVME_CC);
298 csts = nvme_read4(sc, NVME_CSTS);
299
300 /*
301 * Per 3.1.5 in NVME 1.3 spec, transitioning CC.EN from 0 to 1
302 * when CSTS.RDY is 1 or transitioning CC.EN from 1 to 0 when
303 * CSTS.RDY is 0 "has undefined results" So make sure that CSTS.RDY
304 * isn't the desired value. Short circuit if we're already disabled.
305 */
306 if (ISSET(cc, NVME_CC_EN)) {
307 if (!ISSET(csts, NVME_CSTS_RDY)) {
308 /* EN == 1, wait for RDY == 1 or fail */
309 error = nvme_ready(sc, NVME_CSTS_RDY);
310 if (error)
311 return error;
312 }
313 } else {
314 /* EN == 0 already wait for RDY == 0 */
315 if (!ISSET(csts, NVME_CSTS_RDY))
316 return 0;
317
318 goto waitready;
319 }
320
321 CLR(cc, NVME_CC_EN);
322 nvme_write4(sc, NVME_CC, cc);
323 nvme_barrier(sc, 0, sc->sc_ios, BUS_SPACE_BARRIER_READ);
324
325 /*
326 * Some drives have issues with accessing the mmio after we disable,
327 * so delay for a bit after we write the bit to cope with these issues.
328 */
329 if (ISSET(sc->sc_quirks, NVME_QUIRK_DELAY_B4_CHK_RDY))
330 delay(B4_CHK_RDY_DELAY_MS);
331
332 waitready:
333 return nvme_ready(sc, 0);
334 }
335
336 int
337 nvme_attach(struct nvme_softc *sc)
338 {
339 uint64_t cap;
340 uint32_t reg;
341 u_int dstrd;
342 u_int mps = PAGE_SHIFT;
343 u_int ncq, nsq;
344 uint16_t adminq_entries = nvme_adminq_size;
345 uint16_t ioq_entries = nvme_ioq_size;
346 int i;
347
348 reg = nvme_read4(sc, NVME_VS);
349 if (reg == 0xffffffff) {
350 aprint_error_dev(sc->sc_dev, "invalid mapping\n");
351 return 1;
352 }
353
354 if (NVME_VS_TER(reg) == 0)
355 aprint_normal_dev(sc->sc_dev, "NVMe %d.%d\n", NVME_VS_MJR(reg),
356 NVME_VS_MNR(reg));
357 else
358 aprint_normal_dev(sc->sc_dev, "NVMe %d.%d.%d\n", NVME_VS_MJR(reg),
359 NVME_VS_MNR(reg), NVME_VS_TER(reg));
360
361 cap = nvme_read8(sc, NVME_CAP);
362 dstrd = NVME_CAP_DSTRD(cap);
363 if (NVME_CAP_MPSMIN(cap) > PAGE_SHIFT) {
364 aprint_error_dev(sc->sc_dev, "NVMe minimum page size %u "
365 "is greater than CPU page size %u\n",
366 1 << NVME_CAP_MPSMIN(cap), 1 << PAGE_SHIFT);
367 return 1;
368 }
369 if (NVME_CAP_MPSMAX(cap) < mps)
370 mps = NVME_CAP_MPSMAX(cap);
371 if (ioq_entries > NVME_CAP_MQES(cap))
372 ioq_entries = NVME_CAP_MQES(cap);
373
374 /* set initial values to be used for admin queue during probe */
375 sc->sc_rdy_to = NVME_CAP_TO(cap);
376 sc->sc_mps = 1 << mps;
377 sc->sc_mdts = MAXPHYS;
378 sc->sc_max_sgl = btoc(round_page(sc->sc_mdts));
379
380 if (nvme_disable(sc) != 0) {
381 aprint_error_dev(sc->sc_dev, "unable to disable controller\n");
382 return 1;
383 }
384
385 sc->sc_admin_q = nvme_q_alloc(sc, NVME_ADMIN_Q, adminq_entries, dstrd);
386 if (sc->sc_admin_q == NULL) {
387 aprint_error_dev(sc->sc_dev,
388 "unable to allocate admin queue\n");
389 return 1;
390 }
391 if (sc->sc_intr_establish(sc, NVME_ADMIN_Q, sc->sc_admin_q))
392 goto free_admin_q;
393
394 if (nvme_enable(sc, mps) != 0) {
395 aprint_error_dev(sc->sc_dev, "unable to enable controller\n");
396 goto disestablish_admin_q;
397 }
398
399 if (nvme_identify(sc, NVME_CAP_MPSMIN(cap)) != 0) {
400 aprint_error_dev(sc->sc_dev, "unable to identify controller\n");
401 goto disable;
402 }
403 if (sc->sc_nn == 0) {
404 aprint_error_dev(sc->sc_dev, "namespace not found\n");
405 goto disable;
406 }
407
408 /* we know how big things are now */
409 sc->sc_max_sgl = sc->sc_mdts / sc->sc_mps;
410
411 /* reallocate ccbs of admin queue with new max sgl. */
412 nvme_ccbs_free(sc->sc_admin_q);
413 nvme_ccbs_alloc(sc->sc_admin_q, sc->sc_admin_q->q_entries);
414
415 if (sc->sc_use_mq) {
416 /* Limit the number of queues to the number allocated in HW */
417 if (nvme_set_number_of_queues(sc, sc->sc_nq, &ncq, &nsq) != 0) {
418 aprint_error_dev(sc->sc_dev,
419 "unable to get number of queues\n");
420 goto disable;
421 }
422 if (sc->sc_nq > ncq)
423 sc->sc_nq = ncq;
424 if (sc->sc_nq > nsq)
425 sc->sc_nq = nsq;
426 }
427
428 sc->sc_q = kmem_zalloc(sizeof(*sc->sc_q) * sc->sc_nq, KM_SLEEP);
429 for (i = 0; i < sc->sc_nq; i++) {
430 sc->sc_q[i] = nvme_q_alloc(sc, i + 1, ioq_entries, dstrd);
431 if (sc->sc_q[i] == NULL) {
432 aprint_error_dev(sc->sc_dev,
433 "unable to allocate io queue\n");
434 goto free_q;
435 }
436 if (nvme_q_create(sc, sc->sc_q[i]) != 0) {
437 aprint_error_dev(sc->sc_dev,
438 "unable to create io queue\n");
439 nvme_q_free(sc, sc->sc_q[i]);
440 goto free_q;
441 }
442 }
443
444 if (!sc->sc_use_mq)
445 nvme_write4(sc, NVME_INTMC, 1);
446
447 /* probe subdevices */
448 sc->sc_namespaces = kmem_zalloc(sizeof(*sc->sc_namespaces) * sc->sc_nn,
449 KM_SLEEP);
450 nvme_rescan(sc->sc_dev, "nvme", &i);
451
452 return 0;
453
454 free_q:
455 while (--i >= 0) {
456 nvme_q_delete(sc, sc->sc_q[i]);
457 nvme_q_free(sc, sc->sc_q[i]);
458 }
459 disable:
460 nvme_disable(sc);
461 disestablish_admin_q:
462 sc->sc_intr_disestablish(sc, NVME_ADMIN_Q);
463 free_admin_q:
464 nvme_q_free(sc, sc->sc_admin_q);
465
466 return 1;
467 }
468
469 int
470 nvme_rescan(device_t self, const char *attr, const int *flags)
471 {
472 struct nvme_softc *sc = device_private(self);
473 struct nvme_attach_args naa;
474 uint64_t cap;
475 int ioq_entries = nvme_ioq_size;
476 int i;
477
478 cap = nvme_read8(sc, NVME_CAP);
479 if (ioq_entries > NVME_CAP_MQES(cap))
480 ioq_entries = NVME_CAP_MQES(cap);
481
482 for (i = 0; i < sc->sc_nn; i++) {
483 if (sc->sc_namespaces[i].dev)
484 continue;
485 memset(&naa, 0, sizeof(naa));
486 naa.naa_nsid = i + 1;
487 naa.naa_qentries = (ioq_entries - 1) * sc->sc_nq;
488 naa.naa_maxphys = sc->sc_mdts;
489 naa.naa_typename = sc->sc_modelname;
490 sc->sc_namespaces[i].dev = config_found(sc->sc_dev, &naa,
491 nvme_print);
492 }
493 return 0;
494 }
495
496 static int
497 nvme_print(void *aux, const char *pnp)
498 {
499 struct nvme_attach_args *naa = aux;
500
501 if (pnp)
502 aprint_normal("ld at %s", pnp);
503
504 if (naa->naa_nsid > 0)
505 aprint_normal(" nsid %d", naa->naa_nsid);
506
507 return UNCONF;
508 }
509
510 int
511 nvme_detach(struct nvme_softc *sc, int flags)
512 {
513 int i, error;
514
515 error = config_detach_children(sc->sc_dev, flags);
516 if (error)
517 return error;
518
519 error = nvme_shutdown(sc);
520 if (error)
521 return error;
522
523 /* from now on we are committed to detach, following will never fail */
524 for (i = 0; i < sc->sc_nq; i++)
525 nvme_q_free(sc, sc->sc_q[i]);
526 kmem_free(sc->sc_q, sizeof(*sc->sc_q) * sc->sc_nq);
527 nvme_q_free(sc, sc->sc_admin_q);
528
529 return 0;
530 }
531
532 static int
533 nvme_shutdown(struct nvme_softc *sc)
534 {
535 uint32_t cc, csts;
536 bool disabled = false;
537 int i;
538
539 if (!sc->sc_use_mq)
540 nvme_write4(sc, NVME_INTMS, 1);
541
542 for (i = 0; i < sc->sc_nq; i++) {
543 if (nvme_q_delete(sc, sc->sc_q[i]) != 0) {
544 aprint_error_dev(sc->sc_dev,
545 "unable to delete io queue %d, disabling\n", i + 1);
546 disabled = true;
547 }
548 }
549 sc->sc_intr_disestablish(sc, NVME_ADMIN_Q);
550 if (disabled)
551 goto disable;
552
553 cc = nvme_read4(sc, NVME_CC);
554 CLR(cc, NVME_CC_SHN_MASK);
555 SET(cc, NVME_CC_SHN(NVME_CC_SHN_NORMAL));
556 nvme_write4(sc, NVME_CC, cc);
557
558 for (i = 0; i < 4000; i++) {
559 nvme_barrier(sc, 0, sc->sc_ios,
560 BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
561 csts = nvme_read4(sc, NVME_CSTS);
562 if ((csts & NVME_CSTS_SHST_MASK) == NVME_CSTS_SHST_DONE)
563 return 0;
564
565 delay(1000);
566 }
567
568 aprint_error_dev(sc->sc_dev, "unable to shudown, disabling\n");
569
570 disable:
571 nvme_disable(sc);
572 return 0;
573 }
574
575 void
576 nvme_childdet(device_t self, device_t child)
577 {
578 struct nvme_softc *sc = device_private(self);
579 int i;
580
581 for (i = 0; i < sc->sc_nn; i++) {
582 if (sc->sc_namespaces[i].dev == child) {
583 /* Already freed ns->ident. */
584 sc->sc_namespaces[i].dev = NULL;
585 break;
586 }
587 }
588 }
589
590 int
591 nvme_ns_identify(struct nvme_softc *sc, uint16_t nsid)
592 {
593 struct nvme_sqe sqe;
594 struct nvm_identify_namespace *identify;
595 struct nvme_dmamem *mem;
596 struct nvme_ccb *ccb;
597 struct nvme_namespace *ns;
598 int rv;
599
600 KASSERT(nsid > 0);
601
602 ccb = nvme_ccb_get(sc->sc_admin_q, false);
603 KASSERT(ccb != NULL); /* it's a bug if we don't have spare ccb here */
604
605 mem = nvme_dmamem_alloc(sc, sizeof(*identify));
606 if (mem == NULL) {
607 nvme_ccb_put(sc->sc_admin_q, ccb);
608 return ENOMEM;
609 }
610
611 memset(&sqe, 0, sizeof(sqe));
612 sqe.opcode = NVM_ADMIN_IDENTIFY;
613 htolem32(&sqe.nsid, nsid);
614 htolem64(&sqe.entry.prp[0], NVME_DMA_DVA(mem));
615 htolem32(&sqe.cdw10, 0);
616
617 ccb->ccb_done = nvme_empty_done;
618 ccb->ccb_cookie = &sqe;
619
620 nvme_dmamem_sync(sc, mem, BUS_DMASYNC_PREREAD);
621 rv = nvme_poll(sc, sc->sc_admin_q, ccb, nvme_sqe_fill, NVME_TIMO_IDENT);
622 nvme_dmamem_sync(sc, mem, BUS_DMASYNC_POSTREAD);
623
624 nvme_ccb_put(sc->sc_admin_q, ccb);
625
626 if (rv != 0) {
627 rv = EIO;
628 goto done;
629 }
630
631 /* commit */
632
633 identify = kmem_zalloc(sizeof(*identify), KM_SLEEP);
634 *identify = *((volatile struct nvm_identify_namespace *)NVME_DMA_KVA(mem));
635
636 /* Convert data to host endian */
637 nvme_identify_namespace_swapbytes(identify);
638
639 ns = nvme_ns_get(sc, nsid);
640 KASSERT(ns);
641 KASSERT(ns->ident == NULL);
642 ns->ident = identify;
643
644 done:
645 nvme_dmamem_free(sc, mem);
646
647 return rv;
648 }
649
650 int
651 nvme_ns_dobio(struct nvme_softc *sc, uint16_t nsid, void *cookie,
652 struct buf *bp, void *data, size_t datasize,
653 int secsize, daddr_t blkno, int flags, nvme_nnc_done nnc_done)
654 {
655 struct nvme_queue *q = nvme_get_q(sc, bp, false);
656 struct nvme_ccb *ccb;
657 bus_dmamap_t dmap;
658 int i, error;
659
660 ccb = nvme_ccb_get(q, false);
661 if (ccb == NULL)
662 return EAGAIN;
663
664 ccb->ccb_done = nvme_ns_io_done;
665 ccb->ccb_cookie = cookie;
666
667 /* namespace context */
668 ccb->nnc_nsid = nsid;
669 ccb->nnc_flags = flags;
670 ccb->nnc_buf = bp;
671 ccb->nnc_datasize = datasize;
672 ccb->nnc_secsize = secsize;
673 ccb->nnc_blkno = blkno;
674 ccb->nnc_done = nnc_done;
675
676 dmap = ccb->ccb_dmamap;
677 error = bus_dmamap_load(sc->sc_dmat, dmap, data,
678 datasize, NULL,
679 (ISSET(flags, NVME_NS_CTX_F_POLL) ?
680 BUS_DMA_NOWAIT : BUS_DMA_WAITOK) |
681 (ISSET(flags, NVME_NS_CTX_F_READ) ?
682 BUS_DMA_READ : BUS_DMA_WRITE));
683 if (error) {
684 nvme_ccb_put(q, ccb);
685 return error;
686 }
687
688 bus_dmamap_sync(sc->sc_dmat, dmap, 0, dmap->dm_mapsize,
689 ISSET(flags, NVME_NS_CTX_F_READ) ?
690 BUS_DMASYNC_PREREAD : BUS_DMASYNC_PREWRITE);
691
692 if (dmap->dm_nsegs > 2) {
693 for (i = 1; i < dmap->dm_nsegs; i++) {
694 htolem64(&ccb->ccb_prpl[i - 1],
695 dmap->dm_segs[i].ds_addr);
696 }
697 bus_dmamap_sync(sc->sc_dmat,
698 NVME_DMA_MAP(q->q_ccb_prpls),
699 ccb->ccb_prpl_off,
700 sizeof(*ccb->ccb_prpl) * (dmap->dm_nsegs - 1),
701 BUS_DMASYNC_PREWRITE);
702 }
703
704 if (ISSET(flags, NVME_NS_CTX_F_POLL)) {
705 if (nvme_poll(sc, q, ccb, nvme_ns_io_fill, NVME_TIMO_PT) != 0)
706 return EIO;
707 return 0;
708 }
709
710 nvme_q_submit(sc, q, ccb, nvme_ns_io_fill);
711 return 0;
712 }
713
714 static void
715 nvme_ns_io_fill(struct nvme_queue *q, struct nvme_ccb *ccb, void *slot)
716 {
717 struct nvme_sqe_io *sqe = slot;
718 bus_dmamap_t dmap = ccb->ccb_dmamap;
719
720 sqe->opcode = ISSET(ccb->nnc_flags, NVME_NS_CTX_F_READ) ?
721 NVM_CMD_READ : NVM_CMD_WRITE;
722 htolem32(&sqe->nsid, ccb->nnc_nsid);
723
724 htolem64(&sqe->entry.prp[0], dmap->dm_segs[0].ds_addr);
725 switch (dmap->dm_nsegs) {
726 case 1:
727 break;
728 case 2:
729 htolem64(&sqe->entry.prp[1], dmap->dm_segs[1].ds_addr);
730 break;
731 default:
732 /* the prp list is already set up and synced */
733 htolem64(&sqe->entry.prp[1], ccb->ccb_prpl_dva);
734 break;
735 }
736
737 htolem64(&sqe->slba, ccb->nnc_blkno);
738
739 if (ISSET(ccb->nnc_flags, NVME_NS_CTX_F_FUA))
740 htolem16(&sqe->ioflags, NVM_SQE_IO_FUA);
741
742 /* guaranteed by upper layers, but check just in case */
743 KASSERT((ccb->nnc_datasize % ccb->nnc_secsize) == 0);
744 htolem16(&sqe->nlb, (ccb->nnc_datasize / ccb->nnc_secsize) - 1);
745 }
746
747 static void
748 nvme_ns_io_done(struct nvme_queue *q, struct nvme_ccb *ccb,
749 struct nvme_cqe *cqe)
750 {
751 struct nvme_softc *sc = q->q_sc;
752 bus_dmamap_t dmap = ccb->ccb_dmamap;
753 void *nnc_cookie = ccb->ccb_cookie;
754 nvme_nnc_done nnc_done = ccb->nnc_done;
755 struct buf *bp = ccb->nnc_buf;
756
757 if (dmap->dm_nsegs > 2) {
758 bus_dmamap_sync(sc->sc_dmat,
759 NVME_DMA_MAP(q->q_ccb_prpls),
760 ccb->ccb_prpl_off,
761 sizeof(*ccb->ccb_prpl) * (dmap->dm_nsegs - 1),
762 BUS_DMASYNC_POSTWRITE);
763 }
764
765 bus_dmamap_sync(sc->sc_dmat, dmap, 0, dmap->dm_mapsize,
766 ISSET(ccb->nnc_flags, NVME_NS_CTX_F_READ) ?
767 BUS_DMASYNC_POSTREAD : BUS_DMASYNC_POSTWRITE);
768
769 bus_dmamap_unload(sc->sc_dmat, dmap);
770 nvme_ccb_put(q, ccb);
771
772 nnc_done(nnc_cookie, bp, lemtoh16(&cqe->flags), lemtoh32(&cqe->cdw0));
773 }
774
775 /*
776 * If there is no volatile write cache, it makes no sense to issue
777 * flush commands or query for the status.
778 */
779 static bool
780 nvme_has_volatile_write_cache(struct nvme_softc *sc)
781 {
782 /* sc_identify is filled during attachment */
783 return ((sc->sc_identify.vwc & NVME_ID_CTRLR_VWC_PRESENT) != 0);
784 }
785
786 static bool
787 nvme_ns_sync_finished(void *cookie)
788 {
789 int *result = cookie;
790
791 return (*result != 0);
792 }
793
794 int
795 nvme_ns_sync(struct nvme_softc *sc, uint16_t nsid, int flags)
796 {
797 struct nvme_queue *q = nvme_get_q(sc, NULL, true);
798 struct nvme_ccb *ccb;
799 int result = 0;
800
801 if (!nvme_has_volatile_write_cache(sc)) {
802 /* cache not present, no value in trying to flush it */
803 return 0;
804 }
805
806 ccb = nvme_ccb_get(q, true);
807 KASSERT(ccb != NULL);
808
809 ccb->ccb_done = nvme_ns_sync_done;
810 ccb->ccb_cookie = &result;
811
812 /* namespace context */
813 ccb->nnc_nsid = nsid;
814 ccb->nnc_flags = flags;
815 ccb->nnc_done = NULL;
816
817 if (ISSET(flags, NVME_NS_CTX_F_POLL)) {
818 if (nvme_poll(sc, q, ccb, nvme_ns_sync_fill, NVME_TIMO_SY) != 0)
819 return EIO;
820 return 0;
821 }
822
823 nvme_q_submit(sc, q, ccb, nvme_ns_sync_fill);
824
825 /* wait for completion */
826 nvme_q_wait_complete(sc, q, nvme_ns_sync_finished, &result);
827 KASSERT(result != 0);
828
829 return (result > 0) ? 0 : EIO;
830 }
831
832 static void
833 nvme_ns_sync_fill(struct nvme_queue *q, struct nvme_ccb *ccb, void *slot)
834 {
835 struct nvme_sqe *sqe = slot;
836
837 sqe->opcode = NVM_CMD_FLUSH;
838 htolem32(&sqe->nsid, ccb->nnc_nsid);
839 }
840
841 static void
842 nvme_ns_sync_done(struct nvme_queue *q, struct nvme_ccb *ccb,
843 struct nvme_cqe *cqe)
844 {
845 int *result = ccb->ccb_cookie;
846 uint16_t status = NVME_CQE_SC(lemtoh16(&cqe->flags));
847
848 if (status == NVME_CQE_SC_SUCCESS)
849 *result = 1;
850 else
851 *result = -1;
852
853 nvme_ccb_put(q, ccb);
854 }
855
856 static bool
857 nvme_getcache_finished(void *xc)
858 {
859 int *addr = xc;
860
861 return (*addr != 0);
862 }
863
864 /*
865 * Get status of volatile write cache. Always asynchronous.
866 */
867 int
868 nvme_admin_getcache(struct nvme_softc *sc, int *addr)
869 {
870 struct nvme_ccb *ccb;
871 struct nvme_queue *q = sc->sc_admin_q;
872 int result = 0, error;
873
874 if (!nvme_has_volatile_write_cache(sc)) {
875 /* cache simply not present */
876 *addr = 0;
877 return 0;
878 }
879
880 ccb = nvme_ccb_get(q, true);
881 KASSERT(ccb != NULL);
882
883 ccb->ccb_done = nvme_getcache_done;
884 ccb->ccb_cookie = &result;
885
886 /* namespace context */
887 ccb->nnc_flags = 0;
888 ccb->nnc_done = NULL;
889
890 nvme_q_submit(sc, q, ccb, nvme_getcache_fill);
891
892 /* wait for completion */
893 nvme_q_wait_complete(sc, q, nvme_getcache_finished, &result);
894 KASSERT(result != 0);
895
896 if (result > 0) {
897 *addr = result;
898 error = 0;
899 } else
900 error = EINVAL;
901
902 return error;
903 }
904
905 static void
906 nvme_getcache_fill(struct nvme_queue *q, struct nvme_ccb *ccb, void *slot)
907 {
908 struct nvme_sqe *sqe = slot;
909
910 sqe->opcode = NVM_ADMIN_GET_FEATURES;
911 htolem32(&sqe->cdw10, NVM_FEATURE_VOLATILE_WRITE_CACHE);
912 htolem32(&sqe->cdw11, NVM_VOLATILE_WRITE_CACHE_WCE);
913 }
914
915 static void
916 nvme_getcache_done(struct nvme_queue *q, struct nvme_ccb *ccb,
917 struct nvme_cqe *cqe)
918 {
919 int *addr = ccb->ccb_cookie;
920 uint16_t status = NVME_CQE_SC(lemtoh16(&cqe->flags));
921 uint32_t cdw0 = lemtoh32(&cqe->cdw0);
922 int result;
923
924 if (status == NVME_CQE_SC_SUCCESS) {
925 result = 0;
926
927 /*
928 * DPO not supported, Dataset Management (DSM) field doesn't
929 * specify the same semantics. FUA is always supported.
930 */
931 result = DKCACHE_FUA;
932
933 if (cdw0 & NVM_VOLATILE_WRITE_CACHE_WCE)
934 result |= DKCACHE_WRITE;
935
936 /*
937 * If volatile write cache is present, the flag shall also be
938 * settable.
939 */
940 result |= DKCACHE_WCHANGE;
941
942 /*
943 * ONCS field indicates whether the optional SAVE is also
944 * supported for Set Features. According to spec v1.3,
945 * Volatile Write Cache however doesn't support persistency
946 * across power cycle/reset.
947 */
948
949 } else {
950 result = -1;
951 }
952
953 *addr = result;
954
955 nvme_ccb_put(q, ccb);
956 }
957
958 struct nvme_setcache_state {
959 int dkcache;
960 int result;
961 };
962
963 static bool
964 nvme_setcache_finished(void *xc)
965 {
966 struct nvme_setcache_state *st = xc;
967
968 return (st->result != 0);
969 }
970
971 static void
972 nvme_setcache_fill(struct nvme_queue *q, struct nvme_ccb *ccb, void *slot)
973 {
974 struct nvme_sqe *sqe = slot;
975 struct nvme_setcache_state *st = ccb->ccb_cookie;
976
977 sqe->opcode = NVM_ADMIN_SET_FEATURES;
978 htolem32(&sqe->cdw10, NVM_FEATURE_VOLATILE_WRITE_CACHE);
979 if (st->dkcache & DKCACHE_WRITE)
980 htolem32(&sqe->cdw11, NVM_VOLATILE_WRITE_CACHE_WCE);
981 }
982
983 static void
984 nvme_setcache_done(struct nvme_queue *q, struct nvme_ccb *ccb,
985 struct nvme_cqe *cqe)
986 {
987 struct nvme_setcache_state *st = ccb->ccb_cookie;
988 uint16_t status = NVME_CQE_SC(lemtoh16(&cqe->flags));
989
990 if (status == NVME_CQE_SC_SUCCESS) {
991 st->result = 1;
992 } else {
993 st->result = -1;
994 }
995
996 nvme_ccb_put(q, ccb);
997 }
998
999 /*
1000 * Set status of volatile write cache. Always asynchronous.
1001 */
1002 int
1003 nvme_admin_setcache(struct nvme_softc *sc, int dkcache)
1004 {
1005 struct nvme_ccb *ccb;
1006 struct nvme_queue *q = sc->sc_admin_q;
1007 int error;
1008 struct nvme_setcache_state st;
1009
1010 if (!nvme_has_volatile_write_cache(sc)) {
1011 /* cache simply not present */
1012 return EOPNOTSUPP;
1013 }
1014
1015 if (dkcache & ~(DKCACHE_WRITE)) {
1016 /* unsupported parameters */
1017 return EOPNOTSUPP;
1018 }
1019
1020 ccb = nvme_ccb_get(q, true);
1021 KASSERT(ccb != NULL);
1022
1023 memset(&st, 0, sizeof(st));
1024 st.dkcache = dkcache;
1025
1026 ccb->ccb_done = nvme_setcache_done;
1027 ccb->ccb_cookie = &st;
1028
1029 /* namespace context */
1030 ccb->nnc_flags = 0;
1031 ccb->nnc_done = NULL;
1032
1033 nvme_q_submit(sc, q, ccb, nvme_setcache_fill);
1034
1035 /* wait for completion */
1036 nvme_q_wait_complete(sc, q, nvme_setcache_finished, &st);
1037 KASSERT(st.result != 0);
1038
1039 if (st.result > 0)
1040 error = 0;
1041 else
1042 error = EINVAL;
1043
1044 return error;
1045 }
1046
1047 void
1048 nvme_ns_free(struct nvme_softc *sc, uint16_t nsid)
1049 {
1050 struct nvme_namespace *ns;
1051 struct nvm_identify_namespace *identify;
1052
1053 ns = nvme_ns_get(sc, nsid);
1054 KASSERT(ns);
1055
1056 identify = ns->ident;
1057 ns->ident = NULL;
1058 if (identify != NULL)
1059 kmem_free(identify, sizeof(*identify));
1060 }
1061
1062 struct nvme_pt_state {
1063 struct nvme_pt_command *pt;
1064 bool finished;
1065 };
1066
1067 static void
1068 nvme_pt_fill(struct nvme_queue *q, struct nvme_ccb *ccb, void *slot)
1069 {
1070 struct nvme_softc *sc = q->q_sc;
1071 struct nvme_sqe *sqe = slot;
1072 struct nvme_pt_state *state = ccb->ccb_cookie;
1073 struct nvme_pt_command *pt = state->pt;
1074 bus_dmamap_t dmap = ccb->ccb_dmamap;
1075 int i;
1076
1077 sqe->opcode = pt->cmd.opcode;
1078 htolem32(&sqe->nsid, pt->cmd.nsid);
1079
1080 if (pt->buf != NULL && pt->len > 0) {
1081 htolem64(&sqe->entry.prp[0], dmap->dm_segs[0].ds_addr);
1082 switch (dmap->dm_nsegs) {
1083 case 1:
1084 break;
1085 case 2:
1086 htolem64(&sqe->entry.prp[1], dmap->dm_segs[1].ds_addr);
1087 break;
1088 default:
1089 for (i = 1; i < dmap->dm_nsegs; i++) {
1090 htolem64(&ccb->ccb_prpl[i - 1],
1091 dmap->dm_segs[i].ds_addr);
1092 }
1093 bus_dmamap_sync(sc->sc_dmat,
1094 NVME_DMA_MAP(q->q_ccb_prpls),
1095 ccb->ccb_prpl_off,
1096 sizeof(*ccb->ccb_prpl) * (dmap->dm_nsegs - 1),
1097 BUS_DMASYNC_PREWRITE);
1098 htolem64(&sqe->entry.prp[1], ccb->ccb_prpl_dva);
1099 break;
1100 }
1101 }
1102
1103 htolem32(&sqe->cdw10, pt->cmd.cdw10);
1104 htolem32(&sqe->cdw11, pt->cmd.cdw11);
1105 htolem32(&sqe->cdw12, pt->cmd.cdw12);
1106 htolem32(&sqe->cdw13, pt->cmd.cdw13);
1107 htolem32(&sqe->cdw14, pt->cmd.cdw14);
1108 htolem32(&sqe->cdw15, pt->cmd.cdw15);
1109 }
1110
1111 static void
1112 nvme_pt_done(struct nvme_queue *q, struct nvme_ccb *ccb, struct nvme_cqe *cqe)
1113 {
1114 struct nvme_softc *sc = q->q_sc;
1115 struct nvme_pt_state *state = ccb->ccb_cookie;
1116 struct nvme_pt_command *pt = state->pt;
1117 bus_dmamap_t dmap = ccb->ccb_dmamap;
1118
1119 if (pt->buf != NULL && pt->len > 0) {
1120 if (dmap->dm_nsegs > 2) {
1121 bus_dmamap_sync(sc->sc_dmat,
1122 NVME_DMA_MAP(q->q_ccb_prpls),
1123 ccb->ccb_prpl_off,
1124 sizeof(*ccb->ccb_prpl) * (dmap->dm_nsegs - 1),
1125 BUS_DMASYNC_POSTWRITE);
1126 }
1127
1128 bus_dmamap_sync(sc->sc_dmat, dmap, 0, dmap->dm_mapsize,
1129 pt->is_read ? BUS_DMASYNC_POSTREAD : BUS_DMASYNC_POSTWRITE);
1130 bus_dmamap_unload(sc->sc_dmat, dmap);
1131 }
1132
1133 pt->cpl.cdw0 = lemtoh32(&cqe->cdw0);
1134 pt->cpl.flags = lemtoh16(&cqe->flags) & ~NVME_CQE_PHASE;
1135
1136 state->finished = true;
1137
1138 nvme_ccb_put(q, ccb);
1139 }
1140
1141 static bool
1142 nvme_pt_finished(void *cookie)
1143 {
1144 struct nvme_pt_state *state = cookie;
1145
1146 return state->finished;
1147 }
1148
1149 static int
1150 nvme_command_passthrough(struct nvme_softc *sc, struct nvme_pt_command *pt,
1151 uint16_t nsid, struct lwp *l, bool is_adminq)
1152 {
1153 struct nvme_queue *q;
1154 struct nvme_ccb *ccb;
1155 void *buf = NULL;
1156 struct nvme_pt_state state;
1157 int error;
1158
1159 /* limit command size to maximum data transfer size */
1160 if ((pt->buf == NULL && pt->len > 0) ||
1161 (pt->buf != NULL && (pt->len == 0 || pt->len > sc->sc_mdts)))
1162 return EINVAL;
1163
1164 q = is_adminq ? sc->sc_admin_q : nvme_get_q(sc, NULL, true);
1165 ccb = nvme_ccb_get(q, true);
1166 KASSERT(ccb != NULL);
1167
1168 if (pt->buf != NULL) {
1169 KASSERT(pt->len > 0);
1170 buf = kmem_alloc(pt->len, KM_SLEEP);
1171 if (!pt->is_read) {
1172 error = copyin(pt->buf, buf, pt->len);
1173 if (error)
1174 goto kmem_free;
1175 }
1176 error = bus_dmamap_load(sc->sc_dmat, ccb->ccb_dmamap, buf,
1177 pt->len, NULL,
1178 BUS_DMA_WAITOK |
1179 (pt->is_read ? BUS_DMA_READ : BUS_DMA_WRITE));
1180 if (error)
1181 goto kmem_free;
1182 bus_dmamap_sync(sc->sc_dmat, ccb->ccb_dmamap,
1183 0, ccb->ccb_dmamap->dm_mapsize,
1184 pt->is_read ? BUS_DMASYNC_PREREAD : BUS_DMASYNC_PREWRITE);
1185 }
1186
1187 memset(&state, 0, sizeof(state));
1188 state.pt = pt;
1189 state.finished = false;
1190
1191 ccb->ccb_done = nvme_pt_done;
1192 ccb->ccb_cookie = &state;
1193
1194 pt->cmd.nsid = nsid;
1195
1196 nvme_q_submit(sc, q, ccb, nvme_pt_fill);
1197
1198 /* wait for completion */
1199 nvme_q_wait_complete(sc, q, nvme_pt_finished, &state);
1200 KASSERT(state.finished);
1201
1202 error = 0;
1203
1204 if (buf != NULL) {
1205 if (error == 0 && pt->is_read)
1206 error = copyout(buf, pt->buf, pt->len);
1207 kmem_free:
1208 kmem_free(buf, pt->len);
1209 }
1210
1211 return error;
1212 }
1213
1214 static void
1215 nvme_q_submit(struct nvme_softc *sc, struct nvme_queue *q, struct nvme_ccb *ccb,
1216 void (*fill)(struct nvme_queue *, struct nvme_ccb *, void *))
1217 {
1218 struct nvme_sqe *sqe = NVME_DMA_KVA(q->q_sq_dmamem);
1219 uint32_t tail;
1220
1221 mutex_enter(&q->q_sq_mtx);
1222 tail = q->q_sq_tail;
1223 if (++q->q_sq_tail >= q->q_entries)
1224 q->q_sq_tail = 0;
1225
1226 sqe += tail;
1227
1228 bus_dmamap_sync(sc->sc_dmat, NVME_DMA_MAP(q->q_sq_dmamem),
1229 sizeof(*sqe) * tail, sizeof(*sqe), BUS_DMASYNC_POSTWRITE);
1230 memset(sqe, 0, sizeof(*sqe));
1231 (*fill)(q, ccb, sqe);
1232 htolem16(&sqe->cid, ccb->ccb_id);
1233 bus_dmamap_sync(sc->sc_dmat, NVME_DMA_MAP(q->q_sq_dmamem),
1234 sizeof(*sqe) * tail, sizeof(*sqe), BUS_DMASYNC_PREWRITE);
1235
1236 nvme_write4(sc, q->q_sqtdbl, q->q_sq_tail);
1237 mutex_exit(&q->q_sq_mtx);
1238 }
1239
1240 struct nvme_poll_state {
1241 struct nvme_sqe s;
1242 struct nvme_cqe c;
1243 void *cookie;
1244 void (*done)(struct nvme_queue *, struct nvme_ccb *, struct nvme_cqe *);
1245 };
1246
1247 static int
1248 nvme_poll(struct nvme_softc *sc, struct nvme_queue *q, struct nvme_ccb *ccb,
1249 void (*fill)(struct nvme_queue *, struct nvme_ccb *, void *), int timo_sec)
1250 {
1251 struct nvme_poll_state state;
1252 uint16_t flags;
1253 int step = 10;
1254 int maxloop = timo_sec * 1000000 / step;
1255 int error = 0;
1256
1257 memset(&state, 0, sizeof(state));
1258 (*fill)(q, ccb, &state.s);
1259
1260 state.done = ccb->ccb_done;
1261 state.cookie = ccb->ccb_cookie;
1262
1263 ccb->ccb_done = nvme_poll_done;
1264 ccb->ccb_cookie = &state;
1265
1266 nvme_q_submit(sc, q, ccb, nvme_poll_fill);
1267 while (!ISSET(state.c.flags, htole16(NVME_CQE_PHASE))) {
1268 if (nvme_q_complete(sc, q) == 0)
1269 delay(step);
1270
1271 if (timo_sec >= 0 && --maxloop <= 0) {
1272 error = ETIMEDOUT;
1273 break;
1274 }
1275 }
1276
1277 if (error == 0) {
1278 flags = lemtoh16(&state.c.flags);
1279 return flags & ~NVME_CQE_PHASE;
1280 } else {
1281 /*
1282 * If it succeds later, it would hit ccb which will have been
1283 * already reused for something else. Not good. Cross
1284 * fingers and hope for best. XXX do controller reset?
1285 */
1286 aprint_error_dev(sc->sc_dev, "polled command timed out\n");
1287
1288 /* Invoke the callback to clean state anyway */
1289 struct nvme_cqe cqe;
1290 memset(&cqe, 0, sizeof(cqe));
1291 ccb->ccb_done(q, ccb, &cqe);
1292
1293 return 1;
1294 }
1295 }
1296
1297 static void
1298 nvme_poll_fill(struct nvme_queue *q, struct nvme_ccb *ccb, void *slot)
1299 {
1300 struct nvme_sqe *sqe = slot;
1301 struct nvme_poll_state *state = ccb->ccb_cookie;
1302
1303 *sqe = state->s;
1304 }
1305
1306 static void
1307 nvme_poll_done(struct nvme_queue *q, struct nvme_ccb *ccb,
1308 struct nvme_cqe *cqe)
1309 {
1310 struct nvme_poll_state *state = ccb->ccb_cookie;
1311
1312 state->c = *cqe;
1313 SET(state->c.flags, htole16(NVME_CQE_PHASE));
1314
1315 ccb->ccb_cookie = state->cookie;
1316 state->done(q, ccb, &state->c);
1317 }
1318
1319 static void
1320 nvme_sqe_fill(struct nvme_queue *q, struct nvme_ccb *ccb, void *slot)
1321 {
1322 struct nvme_sqe *src = ccb->ccb_cookie;
1323 struct nvme_sqe *dst = slot;
1324
1325 *dst = *src;
1326 }
1327
1328 static void
1329 nvme_empty_done(struct nvme_queue *q, struct nvme_ccb *ccb,
1330 struct nvme_cqe *cqe)
1331 {
1332 }
1333
1334 static int
1335 nvme_q_complete(struct nvme_softc *sc, struct nvme_queue *q)
1336 {
1337 struct nvme_ccb *ccb;
1338 struct nvme_cqe *ring = NVME_DMA_KVA(q->q_cq_dmamem), *cqe;
1339 uint16_t flags;
1340 int rv = 0;
1341
1342 mutex_enter(&q->q_cq_mtx);
1343
1344 nvme_dmamem_sync(sc, q->q_cq_dmamem, BUS_DMASYNC_POSTREAD);
1345 for (;;) {
1346 cqe = &ring[q->q_cq_head];
1347 flags = lemtoh16(&cqe->flags);
1348 if ((flags & NVME_CQE_PHASE) != q->q_cq_phase)
1349 break;
1350
1351 ccb = &q->q_ccbs[cqe->cid];
1352
1353 if (++q->q_cq_head >= q->q_entries) {
1354 q->q_cq_head = 0;
1355 q->q_cq_phase ^= NVME_CQE_PHASE;
1356 }
1357
1358 #ifdef DEBUG
1359 /*
1360 * If we get spurious completion notification, something
1361 * is seriously hosed up. Very likely DMA to some random
1362 * memory place happened, so just bail out.
1363 */
1364 if ((intptr_t)ccb->ccb_cookie == NVME_CCB_FREE) {
1365 panic("%s: invalid ccb detected",
1366 device_xname(sc->sc_dev));
1367 /* NOTREACHED */
1368 }
1369 #endif
1370
1371 rv++;
1372
1373 /*
1374 * Unlock the mutex before calling the ccb_done callback
1375 * and re-lock afterwards. The callback triggers lddone()
1376 * which schedules another i/o, and also calls nvme_ccb_put().
1377 * Unlock/relock avoids possibility of deadlock.
1378 */
1379 mutex_exit(&q->q_cq_mtx);
1380 ccb->ccb_done(q, ccb, cqe);
1381 mutex_enter(&q->q_cq_mtx);
1382 }
1383 nvme_dmamem_sync(sc, q->q_cq_dmamem, BUS_DMASYNC_PREREAD);
1384
1385 if (rv)
1386 nvme_write4(sc, q->q_cqhdbl, q->q_cq_head);
1387
1388 mutex_exit(&q->q_cq_mtx);
1389
1390 return rv;
1391 }
1392
1393 static void
1394 nvme_q_wait_complete(struct nvme_softc *sc,
1395 struct nvme_queue *q, bool (*finished)(void *), void *cookie)
1396 {
1397 mutex_enter(&q->q_ccb_mtx);
1398 if (finished(cookie))
1399 goto out;
1400
1401 for(;;) {
1402 q->q_ccb_waiting = true;
1403 cv_wait(&q->q_ccb_wait, &q->q_ccb_mtx);
1404
1405 if (finished(cookie))
1406 break;
1407 }
1408
1409 out:
1410 mutex_exit(&q->q_ccb_mtx);
1411 }
1412
1413 static int
1414 nvme_identify(struct nvme_softc *sc, u_int mps)
1415 {
1416 char sn[41], mn[81], fr[17];
1417 struct nvm_identify_controller *identify;
1418 struct nvme_dmamem *mem;
1419 struct nvme_ccb *ccb;
1420 u_int mdts;
1421 int rv = 1;
1422
1423 ccb = nvme_ccb_get(sc->sc_admin_q, false);
1424 KASSERT(ccb != NULL); /* it's a bug if we don't have spare ccb here */
1425
1426 mem = nvme_dmamem_alloc(sc, sizeof(*identify));
1427 if (mem == NULL)
1428 return 1;
1429
1430 ccb->ccb_done = nvme_empty_done;
1431 ccb->ccb_cookie = mem;
1432
1433 nvme_dmamem_sync(sc, mem, BUS_DMASYNC_PREREAD);
1434 rv = nvme_poll(sc, sc->sc_admin_q, ccb, nvme_fill_identify,
1435 NVME_TIMO_IDENT);
1436 nvme_dmamem_sync(sc, mem, BUS_DMASYNC_POSTREAD);
1437
1438 nvme_ccb_put(sc->sc_admin_q, ccb);
1439
1440 if (rv != 0)
1441 goto done;
1442
1443 identify = NVME_DMA_KVA(mem);
1444 sc->sc_identify = *identify;
1445 identify = NULL;
1446
1447 /* Convert data to host endian */
1448 nvme_identify_controller_swapbytes(&sc->sc_identify);
1449
1450 strnvisx(sn, sizeof(sn), (const char *)sc->sc_identify.sn,
1451 sizeof(sc->sc_identify.sn), VIS_TRIM|VIS_SAFE|VIS_OCTAL);
1452 strnvisx(mn, sizeof(mn), (const char *)sc->sc_identify.mn,
1453 sizeof(sc->sc_identify.mn), VIS_TRIM|VIS_SAFE|VIS_OCTAL);
1454 strnvisx(fr, sizeof(fr), (const char *)sc->sc_identify.fr,
1455 sizeof(sc->sc_identify.fr), VIS_TRIM|VIS_SAFE|VIS_OCTAL);
1456 aprint_normal_dev(sc->sc_dev, "%s, firmware %s, serial %s\n", mn, fr,
1457 sn);
1458
1459 strlcpy(sc->sc_modelname, mn, sizeof(sc->sc_modelname));
1460
1461 if (sc->sc_identify.mdts > 0) {
1462 mdts = (1 << sc->sc_identify.mdts) * (1 << mps);
1463 if (mdts < sc->sc_mdts)
1464 sc->sc_mdts = mdts;
1465 }
1466
1467 sc->sc_nn = sc->sc_identify.nn;
1468
1469 done:
1470 nvme_dmamem_free(sc, mem);
1471
1472 return rv;
1473 }
1474
1475 static int
1476 nvme_q_create(struct nvme_softc *sc, struct nvme_queue *q)
1477 {
1478 struct nvme_sqe_q sqe;
1479 struct nvme_ccb *ccb;
1480 int rv;
1481
1482 if (sc->sc_use_mq && sc->sc_intr_establish(sc, q->q_id, q) != 0)
1483 return 1;
1484
1485 ccb = nvme_ccb_get(sc->sc_admin_q, false);
1486 KASSERT(ccb != NULL);
1487
1488 ccb->ccb_done = nvme_empty_done;
1489 ccb->ccb_cookie = &sqe;
1490
1491 memset(&sqe, 0, sizeof(sqe));
1492 sqe.opcode = NVM_ADMIN_ADD_IOCQ;
1493 htolem64(&sqe.prp1, NVME_DMA_DVA(q->q_cq_dmamem));
1494 htolem16(&sqe.qsize, q->q_entries - 1);
1495 htolem16(&sqe.qid, q->q_id);
1496 sqe.qflags = NVM_SQE_CQ_IEN | NVM_SQE_Q_PC;
1497 if (sc->sc_use_mq)
1498 htolem16(&sqe.cqid, q->q_id); /* qid == vector */
1499
1500 rv = nvme_poll(sc, sc->sc_admin_q, ccb, nvme_sqe_fill, NVME_TIMO_QOP);
1501 if (rv != 0)
1502 goto fail;
1503
1504 ccb->ccb_done = nvme_empty_done;
1505 ccb->ccb_cookie = &sqe;
1506
1507 memset(&sqe, 0, sizeof(sqe));
1508 sqe.opcode = NVM_ADMIN_ADD_IOSQ;
1509 htolem64(&sqe.prp1, NVME_DMA_DVA(q->q_sq_dmamem));
1510 htolem16(&sqe.qsize, q->q_entries - 1);
1511 htolem16(&sqe.qid, q->q_id);
1512 htolem16(&sqe.cqid, q->q_id);
1513 sqe.qflags = NVM_SQE_Q_PC;
1514
1515 rv = nvme_poll(sc, sc->sc_admin_q, ccb, nvme_sqe_fill, NVME_TIMO_QOP);
1516 if (rv != 0)
1517 goto fail;
1518
1519 nvme_ccb_put(sc->sc_admin_q, ccb);
1520 return 0;
1521
1522 fail:
1523 if (sc->sc_use_mq)
1524 sc->sc_intr_disestablish(sc, q->q_id);
1525
1526 nvme_ccb_put(sc->sc_admin_q, ccb);
1527 return rv;
1528 }
1529
1530 static int
1531 nvme_q_delete(struct nvme_softc *sc, struct nvme_queue *q)
1532 {
1533 struct nvme_sqe_q sqe;
1534 struct nvme_ccb *ccb;
1535 int rv;
1536
1537 ccb = nvme_ccb_get(sc->sc_admin_q, false);
1538 KASSERT(ccb != NULL);
1539
1540 ccb->ccb_done = nvme_empty_done;
1541 ccb->ccb_cookie = &sqe;
1542
1543 memset(&sqe, 0, sizeof(sqe));
1544 sqe.opcode = NVM_ADMIN_DEL_IOSQ;
1545 htolem16(&sqe.qid, q->q_id);
1546
1547 rv = nvme_poll(sc, sc->sc_admin_q, ccb, nvme_sqe_fill, NVME_TIMO_QOP);
1548 if (rv != 0)
1549 goto fail;
1550
1551 ccb->ccb_done = nvme_empty_done;
1552 ccb->ccb_cookie = &sqe;
1553
1554 memset(&sqe, 0, sizeof(sqe));
1555 sqe.opcode = NVM_ADMIN_DEL_IOCQ;
1556 htolem16(&sqe.qid, q->q_id);
1557
1558 rv = nvme_poll(sc, sc->sc_admin_q, ccb, nvme_sqe_fill, NVME_TIMO_QOP);
1559 if (rv != 0)
1560 goto fail;
1561
1562 fail:
1563 nvme_ccb_put(sc->sc_admin_q, ccb);
1564
1565 if (rv == 0 && sc->sc_use_mq) {
1566 if (sc->sc_intr_disestablish(sc, q->q_id))
1567 rv = 1;
1568 }
1569
1570 return rv;
1571 }
1572
1573 static void
1574 nvme_fill_identify(struct nvme_queue *q, struct nvme_ccb *ccb, void *slot)
1575 {
1576 struct nvme_sqe *sqe = slot;
1577 struct nvme_dmamem *mem = ccb->ccb_cookie;
1578
1579 sqe->opcode = NVM_ADMIN_IDENTIFY;
1580 htolem64(&sqe->entry.prp[0], NVME_DMA_DVA(mem));
1581 htolem32(&sqe->cdw10, 1);
1582 }
1583
1584 static int
1585 nvme_set_number_of_queues(struct nvme_softc *sc, u_int nq, u_int *ncqa,
1586 u_int *nsqa)
1587 {
1588 struct nvme_pt_state state;
1589 struct nvme_pt_command pt;
1590 struct nvme_ccb *ccb;
1591 int rv;
1592
1593 ccb = nvme_ccb_get(sc->sc_admin_q, false);
1594 KASSERT(ccb != NULL); /* it's a bug if we don't have spare ccb here */
1595
1596 memset(&pt, 0, sizeof(pt));
1597 pt.cmd.opcode = NVM_ADMIN_SET_FEATURES;
1598 htolem32(&pt.cmd.cdw10, NVM_FEATURE_NUMBER_OF_QUEUES);
1599 htolem32(&pt.cmd.cdw11, ((nq - 1) << 16) | (nq - 1));
1600
1601 memset(&state, 0, sizeof(state));
1602 state.pt = &pt;
1603 state.finished = false;
1604
1605 ccb->ccb_done = nvme_pt_done;
1606 ccb->ccb_cookie = &state;
1607
1608 rv = nvme_poll(sc, sc->sc_admin_q, ccb, nvme_pt_fill, NVME_TIMO_QOP);
1609
1610 if (rv != 0) {
1611 *ncqa = *nsqa = 0;
1612 return EIO;
1613 }
1614
1615 *ncqa = (pt.cpl.cdw0 >> 16) + 1;
1616 *nsqa = (pt.cpl.cdw0 & 0xffff) + 1;
1617
1618 return 0;
1619 }
1620
1621 static int
1622 nvme_ccbs_alloc(struct nvme_queue *q, uint16_t nccbs)
1623 {
1624 struct nvme_softc *sc = q->q_sc;
1625 struct nvme_ccb *ccb;
1626 bus_addr_t off;
1627 uint64_t *prpl;
1628 u_int i;
1629
1630 mutex_init(&q->q_ccb_mtx, MUTEX_DEFAULT, IPL_BIO);
1631 cv_init(&q->q_ccb_wait, "nvmeqw");
1632 q->q_ccb_waiting = false;
1633 SIMPLEQ_INIT(&q->q_ccb_list);
1634
1635 q->q_ccbs = kmem_alloc(sizeof(*ccb) * nccbs, KM_SLEEP);
1636
1637 q->q_nccbs = nccbs;
1638 q->q_ccb_prpls = nvme_dmamem_alloc(sc,
1639 sizeof(*prpl) * sc->sc_max_sgl * nccbs);
1640
1641 prpl = NVME_DMA_KVA(q->q_ccb_prpls);
1642 off = 0;
1643
1644 for (i = 0; i < nccbs; i++) {
1645 ccb = &q->q_ccbs[i];
1646
1647 if (bus_dmamap_create(sc->sc_dmat, sc->sc_mdts,
1648 sc->sc_max_sgl + 1 /* we get a free prp in the sqe */,
1649 sc->sc_mps, sc->sc_mps, BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW,
1650 &ccb->ccb_dmamap) != 0)
1651 goto free_maps;
1652
1653 ccb->ccb_id = i;
1654 ccb->ccb_prpl = prpl;
1655 ccb->ccb_prpl_off = off;
1656 ccb->ccb_prpl_dva = NVME_DMA_DVA(q->q_ccb_prpls) + off;
1657
1658 SIMPLEQ_INSERT_TAIL(&q->q_ccb_list, ccb, ccb_entry);
1659
1660 prpl += sc->sc_max_sgl;
1661 off += sizeof(*prpl) * sc->sc_max_sgl;
1662 }
1663
1664 return 0;
1665
1666 free_maps:
1667 nvme_ccbs_free(q);
1668 return 1;
1669 }
1670
1671 static struct nvme_ccb *
1672 nvme_ccb_get(struct nvme_queue *q, bool wait)
1673 {
1674 struct nvme_ccb *ccb = NULL;
1675
1676 mutex_enter(&q->q_ccb_mtx);
1677 again:
1678 ccb = SIMPLEQ_FIRST(&q->q_ccb_list);
1679 if (ccb != NULL) {
1680 SIMPLEQ_REMOVE_HEAD(&q->q_ccb_list, ccb_entry);
1681 #ifdef DEBUG
1682 ccb->ccb_cookie = NULL;
1683 #endif
1684 } else {
1685 if (__predict_false(wait)) {
1686 q->q_ccb_waiting = true;
1687 cv_wait(&q->q_ccb_wait, &q->q_ccb_mtx);
1688 goto again;
1689 }
1690 }
1691 mutex_exit(&q->q_ccb_mtx);
1692
1693 return ccb;
1694 }
1695
1696 static void
1697 nvme_ccb_put(struct nvme_queue *q, struct nvme_ccb *ccb)
1698 {
1699
1700 mutex_enter(&q->q_ccb_mtx);
1701 #ifdef DEBUG
1702 ccb->ccb_cookie = (void *)NVME_CCB_FREE;
1703 #endif
1704 SIMPLEQ_INSERT_HEAD(&q->q_ccb_list, ccb, ccb_entry);
1705
1706 /* It's unlikely there are any waiters, it's not used for regular I/O */
1707 if (__predict_false(q->q_ccb_waiting)) {
1708 q->q_ccb_waiting = false;
1709 cv_broadcast(&q->q_ccb_wait);
1710 }
1711
1712 mutex_exit(&q->q_ccb_mtx);
1713 }
1714
1715 static void
1716 nvme_ccbs_free(struct nvme_queue *q)
1717 {
1718 struct nvme_softc *sc = q->q_sc;
1719 struct nvme_ccb *ccb;
1720
1721 mutex_enter(&q->q_ccb_mtx);
1722 while ((ccb = SIMPLEQ_FIRST(&q->q_ccb_list)) != NULL) {
1723 SIMPLEQ_REMOVE_HEAD(&q->q_ccb_list, ccb_entry);
1724 /*
1725 * bus_dmamap_destroy() may call vm_map_lock() and rw_enter()
1726 * internally. don't hold spin mutex
1727 */
1728 mutex_exit(&q->q_ccb_mtx);
1729 bus_dmamap_destroy(sc->sc_dmat, ccb->ccb_dmamap);
1730 mutex_enter(&q->q_ccb_mtx);
1731 }
1732 mutex_exit(&q->q_ccb_mtx);
1733
1734 nvme_dmamem_free(sc, q->q_ccb_prpls);
1735 kmem_free(q->q_ccbs, sizeof(*ccb) * q->q_nccbs);
1736 q->q_ccbs = NULL;
1737 cv_destroy(&q->q_ccb_wait);
1738 mutex_destroy(&q->q_ccb_mtx);
1739 }
1740
1741 static struct nvme_queue *
1742 nvme_q_alloc(struct nvme_softc *sc, uint16_t id, u_int entries, u_int dstrd)
1743 {
1744 struct nvme_queue *q;
1745
1746 q = kmem_alloc(sizeof(*q), KM_SLEEP);
1747 q->q_sc = sc;
1748 q->q_sq_dmamem = nvme_dmamem_alloc(sc,
1749 sizeof(struct nvme_sqe) * entries);
1750 if (q->q_sq_dmamem == NULL)
1751 goto free;
1752
1753 q->q_cq_dmamem = nvme_dmamem_alloc(sc,
1754 sizeof(struct nvme_cqe) * entries);
1755 if (q->q_cq_dmamem == NULL)
1756 goto free_sq;
1757
1758 memset(NVME_DMA_KVA(q->q_sq_dmamem), 0, NVME_DMA_LEN(q->q_sq_dmamem));
1759 memset(NVME_DMA_KVA(q->q_cq_dmamem), 0, NVME_DMA_LEN(q->q_cq_dmamem));
1760
1761 mutex_init(&q->q_sq_mtx, MUTEX_DEFAULT, IPL_BIO);
1762 mutex_init(&q->q_cq_mtx, MUTEX_DEFAULT, IPL_BIO);
1763 q->q_sqtdbl = NVME_SQTDBL(id, dstrd);
1764 q->q_cqhdbl = NVME_CQHDBL(id, dstrd);
1765 q->q_id = id;
1766 q->q_entries = entries;
1767 q->q_sq_tail = 0;
1768 q->q_cq_head = 0;
1769 q->q_cq_phase = NVME_CQE_PHASE;
1770
1771 nvme_dmamem_sync(sc, q->q_sq_dmamem, BUS_DMASYNC_PREWRITE);
1772 nvme_dmamem_sync(sc, q->q_cq_dmamem, BUS_DMASYNC_PREREAD);
1773
1774 /*
1775 * Due to definition of full and empty queue (queue is empty
1776 * when head == tail, full when tail is one less then head),
1777 * we can actually only have (entries - 1) in-flight commands.
1778 */
1779 if (nvme_ccbs_alloc(q, entries - 1) != 0) {
1780 aprint_error_dev(sc->sc_dev, "unable to allocate ccbs\n");
1781 goto free_cq;
1782 }
1783
1784 return q;
1785
1786 free_cq:
1787 nvme_dmamem_free(sc, q->q_cq_dmamem);
1788 free_sq:
1789 nvme_dmamem_free(sc, q->q_sq_dmamem);
1790 free:
1791 kmem_free(q, sizeof(*q));
1792
1793 return NULL;
1794 }
1795
1796 static void
1797 nvme_q_free(struct nvme_softc *sc, struct nvme_queue *q)
1798 {
1799 nvme_ccbs_free(q);
1800 mutex_destroy(&q->q_sq_mtx);
1801 mutex_destroy(&q->q_cq_mtx);
1802 nvme_dmamem_sync(sc, q->q_cq_dmamem, BUS_DMASYNC_POSTREAD);
1803 nvme_dmamem_sync(sc, q->q_sq_dmamem, BUS_DMASYNC_POSTWRITE);
1804 nvme_dmamem_free(sc, q->q_cq_dmamem);
1805 nvme_dmamem_free(sc, q->q_sq_dmamem);
1806 kmem_free(q, sizeof(*q));
1807 }
1808
1809 int
1810 nvme_intr(void *xsc)
1811 {
1812 struct nvme_softc *sc = xsc;
1813
1814 /*
1815 * INTx is level triggered, controller deasserts the interrupt only
1816 * when we advance command queue head via write to the doorbell.
1817 * Tell the controller to block the interrupts while we process
1818 * the queue(s).
1819 */
1820 nvme_write4(sc, NVME_INTMS, 1);
1821
1822 softint_schedule(sc->sc_softih[0]);
1823
1824 /* don't know, might not have been for us */
1825 return 1;
1826 }
1827
1828 void
1829 nvme_softintr_intx(void *xq)
1830 {
1831 struct nvme_queue *q = xq;
1832 struct nvme_softc *sc = q->q_sc;
1833
1834 nvme_q_complete(sc, sc->sc_admin_q);
1835 if (sc->sc_q != NULL)
1836 nvme_q_complete(sc, sc->sc_q[0]);
1837
1838 /*
1839 * Processing done, tell controller to issue interrupts again. There
1840 * is no race, as NVMe spec requires the controller to maintain state,
1841 * and assert the interrupt whenever there are unacknowledged
1842 * completion queue entries.
1843 */
1844 nvme_write4(sc, NVME_INTMC, 1);
1845 }
1846
1847 int
1848 nvme_intr_msi(void *xq)
1849 {
1850 struct nvme_queue *q = xq;
1851
1852 KASSERT(q && q->q_sc && q->q_sc->sc_softih
1853 && q->q_sc->sc_softih[q->q_id]);
1854
1855 /*
1856 * MSI/MSI-X are edge triggered, so can handover processing to softint
1857 * without masking the interrupt.
1858 */
1859 softint_schedule(q->q_sc->sc_softih[q->q_id]);
1860
1861 return 1;
1862 }
1863
1864 void
1865 nvme_softintr_msi(void *xq)
1866 {
1867 struct nvme_queue *q = xq;
1868 struct nvme_softc *sc = q->q_sc;
1869
1870 nvme_q_complete(sc, q);
1871 }
1872
1873 static struct nvme_dmamem *
1874 nvme_dmamem_alloc(struct nvme_softc *sc, size_t size)
1875 {
1876 struct nvme_dmamem *ndm;
1877 int nsegs;
1878
1879 ndm = kmem_zalloc(sizeof(*ndm), KM_SLEEP);
1880 if (ndm == NULL)
1881 return NULL;
1882
1883 ndm->ndm_size = size;
1884
1885 if (bus_dmamap_create(sc->sc_dmat, size, btoc(round_page(size)), size, 0,
1886 BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW, &ndm->ndm_map) != 0)
1887 goto ndmfree;
1888
1889 if (bus_dmamem_alloc(sc->sc_dmat, size, sc->sc_mps, 0, &ndm->ndm_seg,
1890 1, &nsegs, BUS_DMA_WAITOK) != 0)
1891 goto destroy;
1892
1893 if (bus_dmamem_map(sc->sc_dmat, &ndm->ndm_seg, nsegs, size,
1894 &ndm->ndm_kva, BUS_DMA_WAITOK) != 0)
1895 goto free;
1896 memset(ndm->ndm_kva, 0, size);
1897
1898 if (bus_dmamap_load(sc->sc_dmat, ndm->ndm_map, ndm->ndm_kva, size,
1899 NULL, BUS_DMA_WAITOK) != 0)
1900 goto unmap;
1901
1902 return ndm;
1903
1904 unmap:
1905 bus_dmamem_unmap(sc->sc_dmat, ndm->ndm_kva, size);
1906 free:
1907 bus_dmamem_free(sc->sc_dmat, &ndm->ndm_seg, 1);
1908 destroy:
1909 bus_dmamap_destroy(sc->sc_dmat, ndm->ndm_map);
1910 ndmfree:
1911 kmem_free(ndm, sizeof(*ndm));
1912 return NULL;
1913 }
1914
1915 static void
1916 nvme_dmamem_sync(struct nvme_softc *sc, struct nvme_dmamem *mem, int ops)
1917 {
1918 bus_dmamap_sync(sc->sc_dmat, NVME_DMA_MAP(mem),
1919 0, NVME_DMA_LEN(mem), ops);
1920 }
1921
1922 void
1923 nvme_dmamem_free(struct nvme_softc *sc, struct nvme_dmamem *ndm)
1924 {
1925 bus_dmamap_unload(sc->sc_dmat, ndm->ndm_map);
1926 bus_dmamem_unmap(sc->sc_dmat, ndm->ndm_kva, ndm->ndm_size);
1927 bus_dmamem_free(sc->sc_dmat, &ndm->ndm_seg, 1);
1928 bus_dmamap_destroy(sc->sc_dmat, ndm->ndm_map);
1929 kmem_free(ndm, sizeof(*ndm));
1930 }
1931
1932 /*
1933 * ioctl
1934 */
1935
1936 dev_type_open(nvmeopen);
1937 dev_type_close(nvmeclose);
1938 dev_type_ioctl(nvmeioctl);
1939
1940 const struct cdevsw nvme_cdevsw = {
1941 .d_open = nvmeopen,
1942 .d_close = nvmeclose,
1943 .d_read = noread,
1944 .d_write = nowrite,
1945 .d_ioctl = nvmeioctl,
1946 .d_stop = nostop,
1947 .d_tty = notty,
1948 .d_poll = nopoll,
1949 .d_mmap = nommap,
1950 .d_kqfilter = nokqfilter,
1951 .d_discard = nodiscard,
1952 .d_flag = D_OTHER,
1953 };
1954
1955 /*
1956 * Accept an open operation on the control device.
1957 */
1958 int
1959 nvmeopen(dev_t dev, int flag, int mode, struct lwp *l)
1960 {
1961 struct nvme_softc *sc;
1962 int unit = minor(dev) / 0x10000;
1963 int nsid = minor(dev) & 0xffff;
1964 int nsidx;
1965
1966 if ((sc = device_lookup_private(&nvme_cd, unit)) == NULL)
1967 return ENXIO;
1968 if ((sc->sc_flags & NVME_F_ATTACHED) == 0)
1969 return ENXIO;
1970
1971 if (nsid == 0) {
1972 /* controller */
1973 if (ISSET(sc->sc_flags, NVME_F_OPEN))
1974 return EBUSY;
1975 SET(sc->sc_flags, NVME_F_OPEN);
1976 } else {
1977 /* namespace */
1978 nsidx = nsid - 1;
1979 if (nsidx >= sc->sc_nn || sc->sc_namespaces[nsidx].dev == NULL)
1980 return ENXIO;
1981 if (ISSET(sc->sc_namespaces[nsidx].flags, NVME_NS_F_OPEN))
1982 return EBUSY;
1983 SET(sc->sc_namespaces[nsidx].flags, NVME_NS_F_OPEN);
1984 }
1985 return 0;
1986 }
1987
1988 /*
1989 * Accept the last close on the control device.
1990 */
1991 int
1992 nvmeclose(dev_t dev, int flag, int mode, struct lwp *l)
1993 {
1994 struct nvme_softc *sc;
1995 int unit = minor(dev) / 0x10000;
1996 int nsid = minor(dev) & 0xffff;
1997 int nsidx;
1998
1999 sc = device_lookup_private(&nvme_cd, unit);
2000 if (sc == NULL)
2001 return ENXIO;
2002
2003 if (nsid == 0) {
2004 /* controller */
2005 CLR(sc->sc_flags, NVME_F_OPEN);
2006 } else {
2007 /* namespace */
2008 nsidx = nsid - 1;
2009 if (nsidx >= sc->sc_nn)
2010 return ENXIO;
2011 CLR(sc->sc_namespaces[nsidx].flags, NVME_NS_F_OPEN);
2012 }
2013
2014 return 0;
2015 }
2016
2017 /*
2018 * Handle control operations.
2019 */
2020 int
2021 nvmeioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
2022 {
2023 struct nvme_softc *sc;
2024 int unit = minor(dev) / 0x10000;
2025 int nsid = minor(dev) & 0xffff;
2026 struct nvme_pt_command *pt;
2027
2028 sc = device_lookup_private(&nvme_cd, unit);
2029 if (sc == NULL)
2030 return ENXIO;
2031
2032 switch (cmd) {
2033 case NVME_PASSTHROUGH_CMD:
2034 pt = data;
2035 return nvme_command_passthrough(sc, data,
2036 nsid == 0 ? pt->cmd.nsid : nsid, l, nsid == 0);
2037 }
2038
2039 return ENOTTY;
2040 }
2041