if_ena.c revision 1.17 1 /*-
2 * BSD LICENSE
3 *
4 * Copyright (c) 2015-2017 Amazon.com, Inc. or its affiliates.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 *
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30 #include <sys/cdefs.h>
31 #if 0
32 __FBSDID("$FreeBSD: head/sys/dev/ena/ena.c 333456 2018-05-10 09:37:54Z mw $");
33 #endif
34 __KERNEL_RCSID(0, "$NetBSD: if_ena.c,v 1.17 2019/10/18 04:09:02 msaitoh Exp $");
35
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/bus.h>
39 #include <sys/endian.h>
40 #include <sys/kernel.h>
41 #include <sys/kthread.h>
42 #include <sys/malloc.h>
43 #include <sys/mbuf.h>
44 #include <sys/module.h>
45 #include <sys/socket.h>
46 #include <sys/sockio.h>
47 #include <sys/sysctl.h>
48 #include <sys/time.h>
49 #include <sys/workqueue.h>
50 #include <sys/callout.h>
51 #include <sys/interrupt.h>
52 #include <sys/cpu.h>
53
54 #include <net/if_ether.h>
55 #include <net/if_vlanvar.h>
56
57 #include <dev/pci/if_enavar.h>
58
59 /*********************************************************
60 * Function prototypes
61 *********************************************************/
62 static int ena_probe(device_t, cfdata_t, void *);
63 static int ena_intr_msix_mgmnt(void *);
64 static int ena_allocate_pci_resources(struct pci_attach_args *,
65 struct ena_adapter *);
66 static void ena_free_pci_resources(struct ena_adapter *);
67 static int ena_change_mtu(struct ifnet *, int);
68 static void ena_init_io_rings_common(struct ena_adapter *,
69 struct ena_ring *, uint16_t);
70 static void ena_init_io_rings(struct ena_adapter *);
71 static void ena_free_io_ring_resources(struct ena_adapter *, unsigned int);
72 static void ena_free_all_io_rings_resources(struct ena_adapter *);
73 #if 0
74 static int ena_setup_tx_dma_tag(struct ena_adapter *);
75 static int ena_free_tx_dma_tag(struct ena_adapter *);
76 static int ena_setup_rx_dma_tag(struct ena_adapter *);
77 static int ena_free_rx_dma_tag(struct ena_adapter *);
78 #endif
79 static int ena_setup_tx_resources(struct ena_adapter *, int);
80 static void ena_free_tx_resources(struct ena_adapter *, int);
81 static int ena_setup_all_tx_resources(struct ena_adapter *);
82 static void ena_free_all_tx_resources(struct ena_adapter *);
83 static inline int validate_rx_req_id(struct ena_ring *, uint16_t);
84 static int ena_setup_rx_resources(struct ena_adapter *, unsigned int);
85 static void ena_free_rx_resources(struct ena_adapter *, unsigned int);
86 static int ena_setup_all_rx_resources(struct ena_adapter *);
87 static void ena_free_all_rx_resources(struct ena_adapter *);
88 static inline int ena_alloc_rx_mbuf(struct ena_adapter *, struct ena_ring *,
89 struct ena_rx_buffer *);
90 static void ena_free_rx_mbuf(struct ena_adapter *, struct ena_ring *,
91 struct ena_rx_buffer *);
92 static int ena_refill_rx_bufs(struct ena_ring *, uint32_t);
93 static void ena_free_rx_bufs(struct ena_adapter *, unsigned int);
94 static void ena_refill_all_rx_bufs(struct ena_adapter *);
95 static void ena_free_all_rx_bufs(struct ena_adapter *);
96 static void ena_free_tx_bufs(struct ena_adapter *, unsigned int);
97 static void ena_free_all_tx_bufs(struct ena_adapter *);
98 static void ena_destroy_all_tx_queues(struct ena_adapter *);
99 static void ena_destroy_all_rx_queues(struct ena_adapter *);
100 static void ena_destroy_all_io_queues(struct ena_adapter *);
101 static int ena_create_io_queues(struct ena_adapter *);
102 static int ena_tx_cleanup(struct ena_ring *);
103 static void ena_deferred_rx_cleanup(struct work *, void *);
104 static int ena_rx_cleanup(struct ena_ring *);
105 static inline int validate_tx_req_id(struct ena_ring *, uint16_t);
106 #if 0
107 static void ena_rx_hash_mbuf(struct ena_ring *, struct ena_com_rx_ctx *,
108 struct mbuf *);
109 #endif
110 static struct mbuf* ena_rx_mbuf(struct ena_ring *, struct ena_com_rx_buf_info *,
111 struct ena_com_rx_ctx *, uint16_t *);
112 static inline void ena_rx_checksum(struct ena_ring *, struct ena_com_rx_ctx *,
113 struct mbuf *);
114 static int ena_handle_msix(void *);
115 static int ena_enable_msix(struct ena_adapter *);
116 static int ena_request_mgmnt_irq(struct ena_adapter *);
117 static int ena_request_io_irq(struct ena_adapter *);
118 static void ena_free_mgmnt_irq(struct ena_adapter *);
119 static void ena_free_io_irq(struct ena_adapter *);
120 static void ena_free_irqs(struct ena_adapter*);
121 static void ena_disable_msix(struct ena_adapter *);
122 static void ena_unmask_all_io_irqs(struct ena_adapter *);
123 static int ena_rss_configure(struct ena_adapter *);
124 static int ena_up_complete(struct ena_adapter *);
125 static int ena_up(struct ena_adapter *);
126 static void ena_down(struct ena_adapter *);
127 #if 0
128 static uint64_t ena_get_counter(struct ifnet *, ift_counter);
129 #endif
130 static int ena_media_change(struct ifnet *);
131 static void ena_media_status(struct ifnet *, struct ifmediareq *);
132 static int ena_init(struct ifnet *);
133 static int ena_ioctl(struct ifnet *, u_long, void *);
134 static int ena_get_dev_offloads(struct ena_com_dev_get_features_ctx *);
135 static void ena_update_host_info(struct ena_admin_host_info *, struct ifnet *);
136 static void ena_update_hwassist(struct ena_adapter *);
137 static int ena_setup_ifnet(device_t, struct ena_adapter *,
138 struct ena_com_dev_get_features_ctx *);
139 static void ena_tx_csum(struct ena_com_tx_ctx *, struct mbuf *);
140 static int ena_check_and_collapse_mbuf(struct ena_ring *tx_ring,
141 struct mbuf **mbuf);
142 static int ena_xmit_mbuf(struct ena_ring *, struct mbuf **);
143 static void ena_start_xmit(struct ena_ring *);
144 static int ena_mq_start(struct ifnet *, struct mbuf *);
145 static void ena_deferred_mq_start(struct work *, void *);
146 #if 0
147 static void ena_qflush(struct ifnet *);
148 #endif
149 static int ena_calc_io_queue_num(struct pci_attach_args *,
150 struct ena_adapter *, struct ena_com_dev_get_features_ctx *);
151 static int ena_calc_queue_size(struct ena_adapter *, uint16_t *,
152 uint16_t *, struct ena_com_dev_get_features_ctx *);
153 #if 0
154 static int ena_rss_init_default(struct ena_adapter *);
155 static void ena_rss_init_default_deferred(void *);
156 #endif
157 static void ena_config_host_info(struct ena_com_dev *);
158 static void ena_attach(device_t, device_t, void *);
159 static int ena_detach(device_t, int);
160 static int ena_device_init(struct ena_adapter *, device_t,
161 struct ena_com_dev_get_features_ctx *, int *);
162 static int ena_enable_msix_and_set_admin_interrupts(struct ena_adapter *,
163 int);
164 static void ena_update_on_link_change(void *, struct ena_admin_aenq_entry *);
165 static void unimplemented_aenq_handler(void *,
166 struct ena_admin_aenq_entry *);
167 static void ena_timer_service(void *);
168
169 static const char ena_version[] =
170 DEVICE_NAME DRV_MODULE_NAME " v" DRV_MODULE_VERSION;
171
172 #if 0
173 static SYSCTL_NODE(_hw, OID_AUTO, ena, CTLFLAG_RD, 0, "ENA driver parameters");
174 #endif
175
176 /*
177 * Tuneable number of buffers in the buf-ring (drbr)
178 */
179 static int ena_buf_ring_size = 4096;
180 #if 0
181 SYSCTL_INT(_hw_ena, OID_AUTO, buf_ring_size, CTLFLAG_RWTUN,
182 &ena_buf_ring_size, 0, "Size of the bufring");
183 #endif
184
185 /*
186 * Logging level for changing verbosity of the output
187 */
188 int ena_log_level = ENA_ALERT | ENA_WARNING;
189 #if 0
190 SYSCTL_INT(_hw_ena, OID_AUTO, log_level, CTLFLAG_RWTUN,
191 &ena_log_level, 0, "Logging level indicating verbosity of the logs");
192 #endif
193
194 static const ena_vendor_info_t ena_vendor_info_array[] = {
195 { PCI_VENDOR_ID_AMAZON, PCI_DEV_ID_ENA_PF, 0},
196 { PCI_VENDOR_ID_AMAZON, PCI_DEV_ID_ENA_LLQ_PF, 0},
197 { PCI_VENDOR_ID_AMAZON, PCI_DEV_ID_ENA_VF, 0},
198 { PCI_VENDOR_ID_AMAZON, PCI_DEV_ID_ENA_LLQ_VF, 0},
199 /* Last entry */
200 { 0, 0, 0 }
201 };
202
203 /*
204 * Contains pointers to event handlers, e.g. link state chage.
205 */
206 static struct ena_aenq_handlers aenq_handlers;
207
208 int
209 ena_dma_alloc(device_t dmadev, bus_size_t size,
210 ena_mem_handle_t *dma , int mapflags)
211 {
212 struct ena_adapter *adapter = device_private(dmadev);
213 uint32_t maxsize;
214 bus_dma_segment_t seg;
215 int error, nsegs;
216
217 maxsize = ((size - 1) / PAGE_SIZE + 1) * PAGE_SIZE;
218
219 #if 0
220 /* XXX what is this needed for ? */
221 dma_space_addr = ENA_DMA_BIT_MASK(adapter->dma_width);
222 if (unlikely(dma_space_addr == 0))
223 dma_space_addr = BUS_SPACE_MAXADDR;
224 #endif
225
226 dma->tag = adapter->sc_dmat;
227
228 if ((error = bus_dmamap_create(dma->tag, maxsize, 1, maxsize, 0,
229 BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW, &dma->map)) != 0) {
230 ena_trace(ENA_ALERT, "bus_dmamap_create(%ju) failed: %d\n",
231 (uintmax_t)maxsize, error);
232 goto fail_create;
233 }
234
235 error = bus_dmamem_alloc(dma->tag, maxsize, 8, 0, &seg, 1, &nsegs,
236 BUS_DMA_ALLOCNOW);
237 if (error) {
238 ena_trace(ENA_ALERT, "bus_dmamem_alloc(%ju) failed: %d\n",
239 (uintmax_t)maxsize, error);
240 goto fail_alloc;
241 }
242
243 error = bus_dmamem_map(dma->tag, &seg, nsegs, maxsize,
244 &dma->vaddr, BUS_DMA_COHERENT);
245 if (error) {
246 ena_trace(ENA_ALERT, "bus_dmamem_map(%ju) failed: %d\n",
247 (uintmax_t)maxsize, error);
248 goto fail_map;
249 }
250 memset(dma->vaddr, 0, maxsize);
251
252 error = bus_dmamap_load(dma->tag, dma->map, dma->vaddr,
253 maxsize, NULL, mapflags);
254 if (error) {
255 ena_trace(ENA_ALERT, ": bus_dmamap_load failed: %d\n", error);
256 goto fail_load;
257 }
258 dma->paddr = dma->map->dm_segs[0].ds_addr;
259
260 return (0);
261
262 fail_load:
263 bus_dmamem_unmap(dma->tag, dma->vaddr, maxsize);
264 fail_map:
265 bus_dmamem_free(dma->tag, &seg, nsegs);
266 fail_alloc:
267 bus_dmamap_destroy(adapter->sc_dmat, dma->map);
268 fail_create:
269 return (error);
270 }
271
272 static int
273 ena_allocate_pci_resources(struct pci_attach_args *pa,
274 struct ena_adapter *adapter)
275 {
276 pcireg_t memtype, reg;
277 bus_addr_t memaddr;
278 bus_size_t mapsize;
279 int flags, error;
280 int msixoff;
281
282 memtype = pci_mapreg_type(pa->pa_pc, pa->pa_tag, ENA_REG_BAR);
283 if (PCI_MAPREG_TYPE(memtype) != PCI_MAPREG_TYPE_MEM) {
284 aprint_error_dev(adapter->pdev, "invalid type (type=0x%x)\n",
285 memtype);
286 return ENXIO;
287 }
288 reg = pci_conf_read(pa->pa_pc, pa->pa_tag, PCI_COMMAND_STATUS_REG);
289 if (((reg & PCI_COMMAND_MASTER_ENABLE) == 0) ||
290 ((reg & PCI_COMMAND_MEM_ENABLE) == 0)) {
291 /*
292 * Enable address decoding for memory range in case BIOS or
293 * UEFI didn't set it.
294 */
295 reg |= PCI_COMMAND_MASTER_ENABLE | PCI_COMMAND_MEM_ENABLE;
296 pci_conf_write(pa->pa_pc, pa->pa_tag, PCI_COMMAND_STATUS_REG,
297 reg);
298 }
299
300 adapter->sc_btag = pa->pa_memt;
301 error = pci_mapreg_info(pa->pa_pc, pa->pa_tag, ENA_REG_BAR,
302 memtype, &memaddr, &mapsize, &flags);
303 if (error) {
304 aprint_error_dev(adapter->pdev, "can't get map info\n");
305 return ENXIO;
306 }
307
308 if (pci_get_capability(pa->pa_pc, pa->pa_tag, PCI_CAP_MSIX, &msixoff,
309 NULL)) {
310 pcireg_t msixtbl;
311 uint32_t table_offset;
312 int bir;
313
314 msixtbl = pci_conf_read(pa->pa_pc, pa->pa_tag,
315 msixoff + PCI_MSIX_TBLOFFSET);
316 table_offset = msixtbl & PCI_MSIX_TBLOFFSET_MASK;
317 bir = msixtbl & PCI_MSIX_PBABIR_MASK;
318 if (bir == PCI_MAPREG_NUM(ENA_REG_BAR))
319 mapsize = table_offset;
320 }
321
322 error = bus_space_map(adapter->sc_btag, memaddr, mapsize, flags,
323 &adapter->sc_bhandle);
324 if (error != 0) {
325 aprint_error_dev(adapter->pdev,
326 "can't map mem space (error=%d)\n", error);
327 return ENXIO;
328 }
329
330 return (0);
331 }
332
333 static void
334 ena_free_pci_resources(struct ena_adapter *adapter)
335 {
336 /* Nothing to do */
337 }
338
339 static int
340 ena_probe(device_t parent, cfdata_t match, void *aux)
341 {
342 struct pci_attach_args *pa = aux;
343 const ena_vendor_info_t *ent;
344
345 for (int i = 0; i < __arraycount(ena_vendor_info_array); i++) {
346 ent = &ena_vendor_info_array[i];
347
348 if ((PCI_VENDOR(pa->pa_id) == ent->vendor_id) &&
349 (PCI_PRODUCT(pa->pa_id) == ent->device_id)) {
350 return 1;
351 }
352 }
353
354 return 0;
355 }
356
357 static int
358 ena_change_mtu(struct ifnet *ifp, int new_mtu)
359 {
360 struct ena_adapter *adapter = if_getsoftc(ifp);
361 int rc;
362
363 if ((new_mtu > adapter->max_mtu) || (new_mtu < ENA_MIN_MTU)) {
364 device_printf(adapter->pdev, "Invalid MTU setting. "
365 "new_mtu: %d max mtu: %d min mtu: %d\n",
366 new_mtu, adapter->max_mtu, ENA_MIN_MTU);
367 return (EINVAL);
368 }
369
370 rc = ena_com_set_dev_mtu(adapter->ena_dev, new_mtu);
371 if (likely(rc == 0)) {
372 ena_trace(ENA_DBG, "set MTU to %d\n", new_mtu);
373 if_setmtu(ifp, new_mtu);
374 } else {
375 device_printf(adapter->pdev, "Failed to set MTU to %d\n",
376 new_mtu);
377 }
378
379 return (rc);
380 }
381
382 #define EVCNT_INIT(st, f) \
383 do { \
384 evcnt_attach_dynamic(&st->f, EVCNT_TYPE_MISC, NULL, \
385 st->name, #f); \
386 } while (0)
387
388 static inline void
389 ena_alloc_counters_rx(struct ena_stats_rx *st, int queue)
390 {
391 snprintf(st->name, sizeof(st->name), "ena rxq%d", queue);
392
393 EVCNT_INIT(st, cnt);
394 EVCNT_INIT(st, bytes);
395 EVCNT_INIT(st, refil_partial);
396 EVCNT_INIT(st, bad_csum);
397 EVCNT_INIT(st, mjum_alloc_fail);
398 EVCNT_INIT(st, mbuf_alloc_fail);
399 EVCNT_INIT(st, dma_mapping_err);
400 EVCNT_INIT(st, bad_desc_num);
401 EVCNT_INIT(st, bad_req_id);
402 EVCNT_INIT(st, empty_rx_ring);
403
404 /* Make sure all code is updated when new fields added */
405 CTASSERT(offsetof(struct ena_stats_rx, empty_rx_ring)
406 + sizeof(st->empty_rx_ring) == sizeof(*st));
407 }
408
409 static inline void
410 ena_alloc_counters_tx(struct ena_stats_tx *st, int queue)
411 {
412 snprintf(st->name, sizeof(st->name), "ena txq%d", queue);
413
414 EVCNT_INIT(st, cnt);
415 EVCNT_INIT(st, bytes);
416 EVCNT_INIT(st, prepare_ctx_err);
417 EVCNT_INIT(st, dma_mapping_err);
418 EVCNT_INIT(st, doorbells);
419 EVCNT_INIT(st, missing_tx_comp);
420 EVCNT_INIT(st, bad_req_id);
421 EVCNT_INIT(st, collapse);
422 EVCNT_INIT(st, collapse_err);
423
424 /* Make sure all code is updated when new fields added */
425 CTASSERT(offsetof(struct ena_stats_tx, collapse_err)
426 + sizeof(st->collapse_err) == sizeof(*st));
427 }
428
429 static inline void
430 ena_alloc_counters_dev(struct ena_stats_dev *st, int queue)
431 {
432 snprintf(st->name, sizeof(st->name), "ena dev ioq%d", queue);
433
434 EVCNT_INIT(st, wd_expired);
435 EVCNT_INIT(st, interface_up);
436 EVCNT_INIT(st, interface_down);
437 EVCNT_INIT(st, admin_q_pause);
438
439 /* Make sure all code is updated when new fields added */
440 CTASSERT(offsetof(struct ena_stats_dev, admin_q_pause)
441 + sizeof(st->admin_q_pause) == sizeof(*st));
442 }
443
444 static inline void
445 ena_alloc_counters_hwstats(struct ena_hw_stats *st, int queue)
446 {
447 snprintf(st->name, sizeof(st->name), "ena hw ioq%d", queue);
448
449 EVCNT_INIT(st, rx_packets);
450 EVCNT_INIT(st, tx_packets);
451 EVCNT_INIT(st, rx_bytes);
452 EVCNT_INIT(st, tx_bytes);
453 EVCNT_INIT(st, rx_drops);
454
455 /* Make sure all code is updated when new fields added */
456 CTASSERT(offsetof(struct ena_hw_stats, rx_drops)
457 + sizeof(st->rx_drops) == sizeof(*st));
458 }
459 static inline void
460 ena_free_counters(struct evcnt *begin, int size)
461 {
462 struct evcnt *end = (struct evcnt *)((char *)begin + size);
463
464 for (; begin < end; ++begin)
465 counter_u64_free(*begin);
466 }
467
468 static inline void
469 ena_reset_counters(struct evcnt *begin, int size)
470 {
471 struct evcnt *end = (struct evcnt *)((char *)begin + size);
472
473 for (; begin < end; ++begin)
474 counter_u64_zero(*begin);
475 }
476
477 static void
478 ena_init_io_rings_common(struct ena_adapter *adapter, struct ena_ring *ring,
479 uint16_t qid)
480 {
481
482 ring->qid = qid;
483 ring->adapter = adapter;
484 ring->ena_dev = adapter->ena_dev;
485 }
486
487 static void
488 ena_init_io_rings(struct ena_adapter *adapter)
489 {
490 struct ena_com_dev *ena_dev;
491 struct ena_ring *txr, *rxr;
492 struct ena_que *que;
493 int i;
494
495 ena_dev = adapter->ena_dev;
496
497 for (i = 0; i < adapter->num_queues; i++) {
498 txr = &adapter->tx_ring[i];
499 rxr = &adapter->rx_ring[i];
500
501 /* TX/RX common ring state */
502 ena_init_io_rings_common(adapter, txr, i);
503 ena_init_io_rings_common(adapter, rxr, i);
504
505 /* TX specific ring state */
506 txr->ring_size = adapter->tx_ring_size;
507 txr->tx_max_header_size = ena_dev->tx_max_header_size;
508 txr->tx_mem_queue_type = ena_dev->tx_mem_queue_type;
509 txr->smoothed_interval =
510 ena_com_get_nonadaptive_moderation_interval_tx(ena_dev);
511
512 /* Allocate a buf ring */
513 txr->br = buf_ring_alloc(ena_buf_ring_size, M_DEVBUF,
514 M_WAITOK, &txr->ring_mtx);
515
516 /* Alloc TX statistics. */
517 ena_alloc_counters_tx(&txr->tx_stats, i);
518
519 /* RX specific ring state */
520 rxr->ring_size = adapter->rx_ring_size;
521 rxr->smoothed_interval =
522 ena_com_get_nonadaptive_moderation_interval_rx(ena_dev);
523
524 /* Alloc RX statistics. */
525 ena_alloc_counters_rx(&rxr->rx_stats, i);
526
527 /* Initialize locks */
528 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
529 device_xname(adapter->pdev), i);
530 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
531 device_xname(adapter->pdev), i);
532
533 mutex_init(&txr->ring_mtx, MUTEX_DEFAULT, IPL_NET);
534 mutex_init(&rxr->ring_mtx, MUTEX_DEFAULT, IPL_NET);
535
536 que = &adapter->que[i];
537 que->adapter = adapter;
538 que->id = i;
539 que->tx_ring = txr;
540 que->rx_ring = rxr;
541
542 txr->que = que;
543 rxr->que = que;
544
545 rxr->empty_rx_queue = 0;
546 }
547 }
548
549 static void
550 ena_free_io_ring_resources(struct ena_adapter *adapter, unsigned int qid)
551 {
552 struct ena_ring *txr = &adapter->tx_ring[qid];
553 struct ena_ring *rxr = &adapter->rx_ring[qid];
554
555 ena_free_counters((struct evcnt *)&txr->tx_stats,
556 sizeof(txr->tx_stats));
557 ena_free_counters((struct evcnt *)&rxr->rx_stats,
558 sizeof(rxr->rx_stats));
559
560 ENA_RING_MTX_LOCK(txr);
561 drbr_free(txr->br, M_DEVBUF);
562 ENA_RING_MTX_UNLOCK(txr);
563
564 mutex_destroy(&txr->ring_mtx);
565 mutex_destroy(&rxr->ring_mtx);
566 }
567
568 static void
569 ena_free_all_io_rings_resources(struct ena_adapter *adapter)
570 {
571 int i;
572
573 for (i = 0; i < adapter->num_queues; i++)
574 ena_free_io_ring_resources(adapter, i);
575
576 }
577
578 #if 0
579 static int
580 ena_setup_tx_dma_tag(struct ena_adapter *adapter)
581 {
582 int ret;
583
584 /* Create DMA tag for Tx buffers */
585 ret = bus_dma_tag_create(bus_get_dma_tag(adapter->pdev),
586 1, 0, /* alignment, bounds */
587 ENA_DMA_BIT_MASK(adapter->dma_width), /* lowaddr of excl window */
588 BUS_SPACE_MAXADDR, /* highaddr of excl window */
589 NULL, NULL, /* filter, filterarg */
590 ENA_TSO_MAXSIZE, /* maxsize */
591 adapter->max_tx_sgl_size - 1, /* nsegments */
592 ENA_TSO_MAXSIZE, /* maxsegsize */
593 0, /* flags */
594 NULL, /* lockfunc */
595 NULL, /* lockfuncarg */
596 &adapter->tx_buf_tag);
597
598 return (ret);
599 }
600 #endif
601
602 #if 0
603 static int
604 ena_setup_rx_dma_tag(struct ena_adapter *adapter)
605 {
606 int ret;
607
608 /* Create DMA tag for Rx buffers*/
609 ret = bus_dma_tag_create(bus_get_dma_tag(adapter->pdev), /* parent */
610 1, 0, /* alignment, bounds */
611 ENA_DMA_BIT_MASK(adapter->dma_width), /* lowaddr of excl window */
612 BUS_SPACE_MAXADDR, /* highaddr of excl window */
613 NULL, NULL, /* filter, filterarg */
614 MJUM16BYTES, /* maxsize */
615 adapter->max_rx_sgl_size, /* nsegments */
616 MJUM16BYTES, /* maxsegsize */
617 0, /* flags */
618 NULL, /* lockfunc */
619 NULL, /* lockarg */
620 &adapter->rx_buf_tag);
621
622 return (ret);
623 }
624 #endif
625
626 /**
627 * ena_setup_tx_resources - allocate Tx resources (Descriptors)
628 * @adapter: network interface device structure
629 * @qid: queue index
630 *
631 * Returns 0 on success, otherwise on failure.
632 **/
633 static int
634 ena_setup_tx_resources(struct ena_adapter *adapter, int qid)
635 {
636 struct ena_que *que = &adapter->que[qid];
637 struct ena_ring *tx_ring = que->tx_ring;
638 int size, i, err;
639 #ifdef RSS
640 cpuset_t cpu_mask;
641 #endif
642
643 size = sizeof(struct ena_tx_buffer) * tx_ring->ring_size;
644
645 tx_ring->tx_buffer_info = malloc(size, M_DEVBUF, M_NOWAIT | M_ZERO);
646 if (unlikely(tx_ring->tx_buffer_info == NULL))
647 return (ENOMEM);
648
649 size = sizeof(uint16_t) * tx_ring->ring_size;
650 tx_ring->free_tx_ids = malloc(size, M_DEVBUF, M_NOWAIT | M_ZERO);
651 if (unlikely(tx_ring->free_tx_ids == NULL))
652 goto err_buf_info_free;
653
654 /* Req id stack for TX OOO completions */
655 for (i = 0; i < tx_ring->ring_size; i++)
656 tx_ring->free_tx_ids[i] = i;
657
658 /* Reset TX statistics. */
659 ena_reset_counters((struct evcnt *)&tx_ring->tx_stats,
660 sizeof(tx_ring->tx_stats));
661
662 tx_ring->next_to_use = 0;
663 tx_ring->next_to_clean = 0;
664
665 /* Make sure that drbr is empty */
666 ENA_RING_MTX_LOCK(tx_ring);
667 drbr_flush(adapter->ifp, tx_ring->br);
668 ENA_RING_MTX_UNLOCK(tx_ring);
669
670 /* ... and create the buffer DMA maps */
671 for (i = 0; i < tx_ring->ring_size; i++) {
672 err = bus_dmamap_create(adapter->sc_dmat,
673 ENA_TSO_MAXSIZE, adapter->max_tx_sgl_size - 1,
674 ENA_TSO_MAXSIZE, 0, 0,
675 &tx_ring->tx_buffer_info[i].map);
676 if (unlikely(err != 0)) {
677 ena_trace(ENA_ALERT,
678 "Unable to create Tx DMA map for buffer %d\n", i);
679 goto err_buf_info_unmap;
680 }
681 }
682
683 /* Allocate workqueues */
684 int rc = workqueue_create(&tx_ring->enqueue_tq, "ena_tx_enq",
685 ena_deferred_mq_start, tx_ring, 0, IPL_NET, WQ_PERCPU | WQ_MPSAFE);
686 if (unlikely(rc != 0)) {
687 ena_trace(ENA_ALERT,
688 "Unable to create workqueue for enqueue task\n");
689 i = tx_ring->ring_size;
690 goto err_buf_info_unmap;
691 }
692
693 #if 0
694 /* RSS set cpu for thread */
695 #ifdef RSS
696 CPU_SETOF(que->cpu, &cpu_mask);
697 taskqueue_start_threads_cpuset(&tx_ring->enqueue_tq, 1, IPL_NET,
698 &cpu_mask, "%s tx_ring enq (bucket %d)",
699 device_xname(adapter->pdev), que->cpu);
700 #else /* RSS */
701 taskqueue_start_threads(&tx_ring->enqueue_tq, 1, IPL_NET,
702 "%s txeq %d", device_xname(adapter->pdev), que->cpu);
703 #endif /* RSS */
704 #endif
705
706 return (0);
707
708 err_buf_info_unmap:
709 while (i--) {
710 bus_dmamap_destroy(adapter->sc_dmat,
711 tx_ring->tx_buffer_info[i].map);
712 }
713 free(tx_ring->free_tx_ids, M_DEVBUF);
714 tx_ring->free_tx_ids = NULL;
715 err_buf_info_free:
716 free(tx_ring->tx_buffer_info, M_DEVBUF);
717 tx_ring->tx_buffer_info = NULL;
718
719 return (ENOMEM);
720 }
721
722 /**
723 * ena_free_tx_resources - Free Tx Resources per Queue
724 * @adapter: network interface device structure
725 * @qid: queue index
726 *
727 * Free all transmit software resources
728 **/
729 static void
730 ena_free_tx_resources(struct ena_adapter *adapter, int qid)
731 {
732 struct ena_ring *tx_ring = &adapter->tx_ring[qid];
733
734 workqueue_wait(tx_ring->enqueue_tq, &tx_ring->enqueue_task);
735 workqueue_destroy(tx_ring->enqueue_tq);
736 tx_ring->enqueue_tq = NULL;
737
738 ENA_RING_MTX_LOCK(tx_ring);
739 /* Flush buffer ring, */
740 drbr_flush(adapter->ifp, tx_ring->br);
741
742 /* Free buffer DMA maps, */
743 for (int i = 0; i < tx_ring->ring_size; i++) {
744 m_freem(tx_ring->tx_buffer_info[i].mbuf);
745 tx_ring->tx_buffer_info[i].mbuf = NULL;
746 bus_dmamap_unload(adapter->sc_dmat,
747 tx_ring->tx_buffer_info[i].map);
748 bus_dmamap_destroy(adapter->sc_dmat,
749 tx_ring->tx_buffer_info[i].map);
750 }
751 ENA_RING_MTX_UNLOCK(tx_ring);
752
753 /* And free allocated memory. */
754 free(tx_ring->tx_buffer_info, M_DEVBUF);
755 tx_ring->tx_buffer_info = NULL;
756
757 free(tx_ring->free_tx_ids, M_DEVBUF);
758 tx_ring->free_tx_ids = NULL;
759 }
760
761 /**
762 * ena_setup_all_tx_resources - allocate all queues Tx resources
763 * @adapter: network interface device structure
764 *
765 * Returns 0 on success, otherwise on failure.
766 **/
767 static int
768 ena_setup_all_tx_resources(struct ena_adapter *adapter)
769 {
770 int i, rc;
771
772 for (i = 0; i < adapter->num_queues; i++) {
773 rc = ena_setup_tx_resources(adapter, i);
774 if (rc != 0) {
775 device_printf(adapter->pdev,
776 "Allocation for Tx Queue %u failed\n", i);
777 goto err_setup_tx;
778 }
779 }
780
781 return (0);
782
783 err_setup_tx:
784 /* Rewind the index freeing the rings as we go */
785 while (i--)
786 ena_free_tx_resources(adapter, i);
787 return (rc);
788 }
789
790 /**
791 * ena_free_all_tx_resources - Free Tx Resources for All Queues
792 * @adapter: network interface device structure
793 *
794 * Free all transmit software resources
795 **/
796 static void
797 ena_free_all_tx_resources(struct ena_adapter *adapter)
798 {
799 int i;
800
801 for (i = 0; i < adapter->num_queues; i++)
802 ena_free_tx_resources(adapter, i);
803 }
804
805 static inline int
806 validate_rx_req_id(struct ena_ring *rx_ring, uint16_t req_id)
807 {
808 if (likely(req_id < rx_ring->ring_size))
809 return (0);
810
811 device_printf(rx_ring->adapter->pdev, "Invalid rx req_id: %hu\n",
812 req_id);
813 counter_u64_add(rx_ring->rx_stats.bad_req_id, 1);
814
815 /* Trigger device reset */
816 rx_ring->adapter->reset_reason = ENA_REGS_RESET_INV_RX_REQ_ID;
817 rx_ring->adapter->trigger_reset = true;
818
819 return (EFAULT);
820 }
821
822 /**
823 * ena_setup_rx_resources - allocate Rx resources (Descriptors)
824 * @adapter: network interface device structure
825 * @qid: queue index
826 *
827 * Returns 0 on success, otherwise on failure.
828 **/
829 static int
830 ena_setup_rx_resources(struct ena_adapter *adapter, unsigned int qid)
831 {
832 struct ena_que *que = &adapter->que[qid];
833 struct ena_ring *rx_ring = que->rx_ring;
834 int size, err, i;
835 #ifdef RSS
836 cpuset_t cpu_mask;
837 #endif
838
839 size = sizeof(struct ena_rx_buffer) * rx_ring->ring_size;
840
841 /*
842 * Alloc extra element so in rx path
843 * we can always prefetch rx_info + 1
844 */
845 size += sizeof(struct ena_rx_buffer);
846
847 rx_ring->rx_buffer_info = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
848
849 size = sizeof(uint16_t) * rx_ring->ring_size;
850 rx_ring->free_rx_ids = malloc(size, M_DEVBUF, M_WAITOK);
851
852 for (i = 0; i < rx_ring->ring_size; i++)
853 rx_ring->free_rx_ids[i] = i;
854
855 /* Reset RX statistics. */
856 ena_reset_counters((struct evcnt *)&rx_ring->rx_stats,
857 sizeof(rx_ring->rx_stats));
858
859 rx_ring->next_to_clean = 0;
860 rx_ring->next_to_use = 0;
861
862 /* ... and create the buffer DMA maps */
863 for (i = 0; i < rx_ring->ring_size; i++) {
864 err = bus_dmamap_create(adapter->sc_dmat,
865 MJUM16BYTES, adapter->max_rx_sgl_size, MJUM16BYTES,
866 0, 0,
867 &(rx_ring->rx_buffer_info[i].map));
868 if (err != 0) {
869 ena_trace(ENA_ALERT,
870 "Unable to create Rx DMA map for buffer %d\n", i);
871 goto err_buf_info_unmap;
872 }
873 }
874
875 #ifdef LRO
876 /* Create LRO for the ring */
877 if ((adapter->ifp->if_capenable & IFCAP_LRO) != 0) {
878 int err = tcp_lro_init(&rx_ring->lro);
879 if (err != 0) {
880 device_printf(adapter->pdev,
881 "LRO[%d] Initialization failed!\n", qid);
882 } else {
883 ena_trace(ENA_INFO,
884 "RX Soft LRO[%d] Initialized\n", qid);
885 rx_ring->lro.ifp = adapter->ifp;
886 }
887 }
888 #endif
889
890 /* Allocate workqueues */
891 int rc = workqueue_create(&rx_ring->cmpl_tq, "ena_rx_comp",
892 ena_deferred_rx_cleanup, rx_ring, 0, IPL_NET, WQ_PERCPU | WQ_MPSAFE);
893 if (unlikely(rc != 0)) {
894 ena_trace(ENA_ALERT,
895 "Unable to create workqueue for RX completion task\n");
896 goto err_buf_info_unmap;
897 }
898
899 #if 0
900 /* RSS set cpu for thread */
901 #ifdef RSS
902 CPU_SETOF(que->cpu, &cpu_mask);
903 taskqueue_start_threads_cpuset(&rx_ring->cmpl_tq, 1, IPL_NET, &cpu_mask,
904 "%s rx_ring cmpl (bucket %d)",
905 device_xname(adapter->pdev), que->cpu);
906 #else
907 taskqueue_start_threads(&rx_ring->cmpl_tq, 1, IPL_NET,
908 "%s rx_ring cmpl %d", device_xname(adapter->pdev), que->cpu);
909 #endif
910 #endif
911
912 return (0);
913
914 err_buf_info_unmap:
915 while (i--) {
916 bus_dmamap_destroy(adapter->sc_dmat,
917 rx_ring->rx_buffer_info[i].map);
918 }
919
920 free(rx_ring->free_rx_ids, M_DEVBUF);
921 rx_ring->free_rx_ids = NULL;
922 free(rx_ring->rx_buffer_info, M_DEVBUF);
923 rx_ring->rx_buffer_info = NULL;
924 return (ENOMEM);
925 }
926
927 /**
928 * ena_free_rx_resources - Free Rx Resources
929 * @adapter: network interface device structure
930 * @qid: queue index
931 *
932 * Free all receive software resources
933 **/
934 static void
935 ena_free_rx_resources(struct ena_adapter *adapter, unsigned int qid)
936 {
937 struct ena_ring *rx_ring = &adapter->rx_ring[qid];
938
939 workqueue_wait(rx_ring->cmpl_tq, &rx_ring->cmpl_task);
940 workqueue_destroy(rx_ring->cmpl_tq);
941 rx_ring->cmpl_tq = NULL;
942
943 /* Free buffer DMA maps, */
944 for (int i = 0; i < rx_ring->ring_size; i++) {
945 m_freem(rx_ring->rx_buffer_info[i].mbuf);
946 rx_ring->rx_buffer_info[i].mbuf = NULL;
947 bus_dmamap_unload(adapter->sc_dmat,
948 rx_ring->rx_buffer_info[i].map);
949 bus_dmamap_destroy(adapter->sc_dmat,
950 rx_ring->rx_buffer_info[i].map);
951 }
952
953 #ifdef LRO
954 /* free LRO resources, */
955 tcp_lro_free(&rx_ring->lro);
956 #endif
957
958 /* free allocated memory */
959 free(rx_ring->rx_buffer_info, M_DEVBUF);
960 rx_ring->rx_buffer_info = NULL;
961
962 free(rx_ring->free_rx_ids, M_DEVBUF);
963 rx_ring->free_rx_ids = NULL;
964 }
965
966 /**
967 * ena_setup_all_rx_resources - allocate all queues Rx resources
968 * @adapter: network interface device structure
969 *
970 * Returns 0 on success, otherwise on failure.
971 **/
972 static int
973 ena_setup_all_rx_resources(struct ena_adapter *adapter)
974 {
975 int i, rc = 0;
976
977 for (i = 0; i < adapter->num_queues; i++) {
978 rc = ena_setup_rx_resources(adapter, i);
979 if (rc != 0) {
980 device_printf(adapter->pdev,
981 "Allocation for Rx Queue %u failed\n", i);
982 goto err_setup_rx;
983 }
984 }
985 return (0);
986
987 err_setup_rx:
988 /* rewind the index freeing the rings as we go */
989 while (i--)
990 ena_free_rx_resources(adapter, i);
991 return (rc);
992 }
993
994 /**
995 * ena_free_all_rx_resources - Free Rx resources for all queues
996 * @adapter: network interface device structure
997 *
998 * Free all receive software resources
999 **/
1000 static void
1001 ena_free_all_rx_resources(struct ena_adapter *adapter)
1002 {
1003 int i;
1004
1005 for (i = 0; i < adapter->num_queues; i++)
1006 ena_free_rx_resources(adapter, i);
1007 }
1008
1009 static inline int
1010 ena_alloc_rx_mbuf(struct ena_adapter *adapter,
1011 struct ena_ring *rx_ring, struct ena_rx_buffer *rx_info)
1012 {
1013 struct ena_com_buf *ena_buf;
1014 int error;
1015 int mlen;
1016
1017 /* if previous allocated frag is not used */
1018 if (unlikely(rx_info->mbuf != NULL))
1019 return (0);
1020
1021 /* Get mbuf using UMA allocator */
1022 rx_info->mbuf = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, MJUM16BYTES);
1023
1024 if (unlikely(rx_info->mbuf == NULL)) {
1025 counter_u64_add(rx_ring->rx_stats.mjum_alloc_fail, 1);
1026 rx_info->mbuf = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
1027 if (unlikely(rx_info->mbuf == NULL)) {
1028 counter_u64_add(rx_ring->rx_stats.mbuf_alloc_fail, 1);
1029 return (ENOMEM);
1030 }
1031 mlen = MCLBYTES;
1032 } else {
1033 mlen = MJUM16BYTES;
1034 }
1035 /* Set mbuf length*/
1036 rx_info->mbuf->m_pkthdr.len = rx_info->mbuf->m_len = mlen;
1037
1038 /* Map packets for DMA */
1039 ena_trace(ENA_DBG | ENA_RSC | ENA_RXPTH,
1040 "Using tag %p for buffers' DMA mapping, mbuf %p len: %d",
1041 adapter->sc_dmat,rx_info->mbuf, rx_info->mbuf->m_len);
1042 error = bus_dmamap_load_mbuf(adapter->sc_dmat, rx_info->map,
1043 rx_info->mbuf, BUS_DMA_NOWAIT);
1044 if (unlikely((error != 0) || (rx_info->map->dm_nsegs != 1))) {
1045 ena_trace(ENA_WARNING, "failed to map mbuf, error: %d, "
1046 "nsegs: %d\n", error, rx_info->map->dm_nsegs);
1047 counter_u64_add(rx_ring->rx_stats.dma_mapping_err, 1);
1048 goto exit;
1049
1050 }
1051
1052 bus_dmamap_sync(adapter->sc_dmat, rx_info->map, 0,
1053 rx_info->map->dm_mapsize, BUS_DMASYNC_PREREAD);
1054
1055 ena_buf = &rx_info->ena_buf;
1056 ena_buf->paddr = rx_info->map->dm_segs[0].ds_addr;
1057 ena_buf->len = mlen;
1058
1059 ena_trace(ENA_DBG | ENA_RSC | ENA_RXPTH,
1060 "ALLOC RX BUF: mbuf %p, rx_info %p, len %d, paddr %#jx\n",
1061 rx_info->mbuf, rx_info,ena_buf->len, (uintmax_t)ena_buf->paddr);
1062
1063 return (0);
1064
1065 exit:
1066 m_freem(rx_info->mbuf);
1067 rx_info->mbuf = NULL;
1068 return (EFAULT);
1069 }
1070
1071 static void
1072 ena_free_rx_mbuf(struct ena_adapter *adapter, struct ena_ring *rx_ring,
1073 struct ena_rx_buffer *rx_info)
1074 {
1075
1076 if (rx_info->mbuf == NULL) {
1077 ena_trace(ENA_WARNING, "Trying to free unallocated buffer\n");
1078 return;
1079 }
1080
1081 bus_dmamap_unload(adapter->sc_dmat, rx_info->map);
1082 m_freem(rx_info->mbuf);
1083 rx_info->mbuf = NULL;
1084 }
1085
1086 /**
1087 * ena_refill_rx_bufs - Refills ring with descriptors
1088 * @rx_ring: the ring which we want to feed with free descriptors
1089 * @num: number of descriptors to refill
1090 * Refills the ring with newly allocated DMA-mapped mbufs for receiving
1091 **/
1092 static int
1093 ena_refill_rx_bufs(struct ena_ring *rx_ring, uint32_t num)
1094 {
1095 struct ena_adapter *adapter = rx_ring->adapter;
1096 uint16_t next_to_use, req_id;
1097 uint32_t i;
1098 int rc;
1099
1100 ena_trace(ENA_DBG | ENA_RXPTH | ENA_RSC, "refill qid: %d",
1101 rx_ring->qid);
1102
1103 next_to_use = rx_ring->next_to_use;
1104
1105 for (i = 0; i < num; i++) {
1106 struct ena_rx_buffer *rx_info;
1107
1108 ena_trace(ENA_DBG | ENA_RXPTH | ENA_RSC,
1109 "RX buffer - next to use: %d", next_to_use);
1110
1111 req_id = rx_ring->free_rx_ids[next_to_use];
1112 rc = validate_rx_req_id(rx_ring, req_id);
1113 if (unlikely(rc != 0))
1114 break;
1115
1116 rx_info = &rx_ring->rx_buffer_info[req_id];
1117
1118 rc = ena_alloc_rx_mbuf(adapter, rx_ring, rx_info);
1119 if (unlikely(rc != 0)) {
1120 ena_trace(ENA_WARNING,
1121 "failed to alloc buffer for rx queue %d\n",
1122 rx_ring->qid);
1123 break;
1124 }
1125 rc = ena_com_add_single_rx_desc(rx_ring->ena_com_io_sq,
1126 &rx_info->ena_buf, req_id);
1127 if (unlikely(rc != 0)) {
1128 ena_trace(ENA_WARNING,
1129 "failed to add buffer for rx queue %d\n",
1130 rx_ring->qid);
1131 break;
1132 }
1133 next_to_use = ENA_RX_RING_IDX_NEXT(next_to_use,
1134 rx_ring->ring_size);
1135 }
1136
1137 if (unlikely(i < num)) {
1138 counter_u64_add(rx_ring->rx_stats.refil_partial, 1);
1139 ena_trace(ENA_WARNING,
1140 "refilled rx qid %d with only %d mbufs (from %d)\n",
1141 rx_ring->qid, i, num);
1142 }
1143
1144 if (likely(i != 0)) {
1145 wmb();
1146 ena_com_write_sq_doorbell(rx_ring->ena_com_io_sq);
1147 }
1148 rx_ring->next_to_use = next_to_use;
1149 return (i);
1150 }
1151
1152 static void
1153 ena_free_rx_bufs(struct ena_adapter *adapter, unsigned int qid)
1154 {
1155 struct ena_ring *rx_ring = &adapter->rx_ring[qid];
1156 unsigned int i;
1157
1158 for (i = 0; i < rx_ring->ring_size; i++) {
1159 struct ena_rx_buffer *rx_info = &rx_ring->rx_buffer_info[i];
1160
1161 if (rx_info->mbuf != NULL)
1162 ena_free_rx_mbuf(adapter, rx_ring, rx_info);
1163 }
1164 }
1165
1166 /**
1167 * ena_refill_all_rx_bufs - allocate all queues Rx buffers
1168 * @adapter: network interface device structure
1169 *
1170 */
1171 static void
1172 ena_refill_all_rx_bufs(struct ena_adapter *adapter)
1173 {
1174 struct ena_ring *rx_ring;
1175 int i, rc, bufs_num;
1176
1177 for (i = 0; i < adapter->num_queues; i++) {
1178 rx_ring = &adapter->rx_ring[i];
1179 bufs_num = rx_ring->ring_size - 1;
1180 rc = ena_refill_rx_bufs(rx_ring, bufs_num);
1181
1182 if (unlikely(rc != bufs_num))
1183 ena_trace(ENA_WARNING, "refilling Queue %d failed. "
1184 "Allocated %d buffers from: %d\n", i, rc, bufs_num);
1185 }
1186 }
1187
1188 static void
1189 ena_free_all_rx_bufs(struct ena_adapter *adapter)
1190 {
1191 int i;
1192
1193 for (i = 0; i < adapter->num_queues; i++)
1194 ena_free_rx_bufs(adapter, i);
1195 }
1196
1197 /**
1198 * ena_free_tx_bufs - Free Tx Buffers per Queue
1199 * @adapter: network interface device structure
1200 * @qid: queue index
1201 **/
1202 static void
1203 ena_free_tx_bufs(struct ena_adapter *adapter, unsigned int qid)
1204 {
1205 bool print_once = true;
1206 struct ena_ring *tx_ring = &adapter->tx_ring[qid];
1207
1208 ENA_RING_MTX_LOCK(tx_ring);
1209 for (int i = 0; i < tx_ring->ring_size; i++) {
1210 struct ena_tx_buffer *tx_info = &tx_ring->tx_buffer_info[i];
1211
1212 if (tx_info->mbuf == NULL)
1213 continue;
1214
1215 if (print_once) {
1216 device_printf(adapter->pdev,
1217 "free uncompleted tx mbuf qid %d idx 0x%x",
1218 qid, i);
1219 print_once = false;
1220 } else {
1221 ena_trace(ENA_DBG,
1222 "free uncompleted tx mbuf qid %d idx 0x%x",
1223 qid, i);
1224 }
1225
1226 bus_dmamap_unload(adapter->sc_dmat, tx_info->map);
1227 m_free(tx_info->mbuf);
1228 tx_info->mbuf = NULL;
1229 }
1230 ENA_RING_MTX_UNLOCK(tx_ring);
1231 }
1232
1233 static void
1234 ena_free_all_tx_bufs(struct ena_adapter *adapter)
1235 {
1236
1237 for (int i = 0; i < adapter->num_queues; i++)
1238 ena_free_tx_bufs(adapter, i);
1239 }
1240
1241 static void
1242 ena_destroy_all_tx_queues(struct ena_adapter *adapter)
1243 {
1244 uint16_t ena_qid;
1245 int i;
1246
1247 for (i = 0; i < adapter->num_queues; i++) {
1248 ena_qid = ENA_IO_TXQ_IDX(i);
1249 ena_com_destroy_io_queue(adapter->ena_dev, ena_qid);
1250 }
1251 }
1252
1253 static void
1254 ena_destroy_all_rx_queues(struct ena_adapter *adapter)
1255 {
1256 uint16_t ena_qid;
1257 int i;
1258
1259 for (i = 0; i < adapter->num_queues; i++) {
1260 ena_qid = ENA_IO_RXQ_IDX(i);
1261 ena_com_destroy_io_queue(adapter->ena_dev, ena_qid);
1262 }
1263 }
1264
1265 static void
1266 ena_destroy_all_io_queues(struct ena_adapter *adapter)
1267 {
1268 ena_destroy_all_tx_queues(adapter);
1269 ena_destroy_all_rx_queues(adapter);
1270 }
1271
1272 static inline int
1273 validate_tx_req_id(struct ena_ring *tx_ring, uint16_t req_id)
1274 {
1275 struct ena_adapter *adapter = tx_ring->adapter;
1276 struct ena_tx_buffer *tx_info = NULL;
1277
1278 if (likely(req_id < tx_ring->ring_size)) {
1279 tx_info = &tx_ring->tx_buffer_info[req_id];
1280 if (tx_info->mbuf != NULL)
1281 return (0);
1282 }
1283
1284 if (tx_info->mbuf == NULL)
1285 device_printf(adapter->pdev,
1286 "tx_info doesn't have valid mbuf\n");
1287 else
1288 device_printf(adapter->pdev, "Invalid req_id: %hu\n", req_id);
1289
1290 counter_u64_add(tx_ring->tx_stats.bad_req_id, 1);
1291
1292 return (EFAULT);
1293 }
1294
1295 static int
1296 ena_create_io_queues(struct ena_adapter *adapter)
1297 {
1298 struct ena_com_dev *ena_dev = adapter->ena_dev;
1299 struct ena_com_create_io_ctx ctx;
1300 struct ena_ring *ring;
1301 uint16_t ena_qid;
1302 uint32_t msix_vector;
1303 int rc, i;
1304
1305 /* Create TX queues */
1306 for (i = 0; i < adapter->num_queues; i++) {
1307 msix_vector = ENA_IO_IRQ_IDX(i);
1308 ena_qid = ENA_IO_TXQ_IDX(i);
1309 ctx.mem_queue_type = ena_dev->tx_mem_queue_type;
1310 ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_TX;
1311 ctx.queue_size = adapter->tx_ring_size;
1312 ctx.msix_vector = msix_vector;
1313 ctx.qid = ena_qid;
1314 rc = ena_com_create_io_queue(ena_dev, &ctx);
1315 if (rc != 0) {
1316 device_printf(adapter->pdev,
1317 "Failed to create io TX queue #%d rc: %d\n", i, rc);
1318 goto err_tx;
1319 }
1320 ring = &adapter->tx_ring[i];
1321 rc = ena_com_get_io_handlers(ena_dev, ena_qid,
1322 &ring->ena_com_io_sq,
1323 &ring->ena_com_io_cq);
1324 if (rc != 0) {
1325 device_printf(adapter->pdev,
1326 "Failed to get TX queue handlers. TX queue num"
1327 " %d rc: %d\n", i, rc);
1328 ena_com_destroy_io_queue(ena_dev, ena_qid);
1329 goto err_tx;
1330 }
1331 }
1332
1333 /* Create RX queues */
1334 for (i = 0; i < adapter->num_queues; i++) {
1335 msix_vector = ENA_IO_IRQ_IDX(i);
1336 ena_qid = ENA_IO_RXQ_IDX(i);
1337 ctx.mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
1338 ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_RX;
1339 ctx.queue_size = adapter->rx_ring_size;
1340 ctx.msix_vector = msix_vector;
1341 ctx.qid = ena_qid;
1342 rc = ena_com_create_io_queue(ena_dev, &ctx);
1343 if (unlikely(rc != 0)) {
1344 device_printf(adapter->pdev,
1345 "Failed to create io RX queue[%d] rc: %d\n", i, rc);
1346 goto err_rx;
1347 }
1348
1349 ring = &adapter->rx_ring[i];
1350 rc = ena_com_get_io_handlers(ena_dev, ena_qid,
1351 &ring->ena_com_io_sq,
1352 &ring->ena_com_io_cq);
1353 if (unlikely(rc != 0)) {
1354 device_printf(adapter->pdev,
1355 "Failed to get RX queue handlers. RX queue num"
1356 " %d rc: %d\n", i, rc);
1357 ena_com_destroy_io_queue(ena_dev, ena_qid);
1358 goto err_rx;
1359 }
1360 }
1361
1362 return (0);
1363
1364 err_rx:
1365 while (i--)
1366 ena_com_destroy_io_queue(ena_dev, ENA_IO_RXQ_IDX(i));
1367 i = adapter->num_queues;
1368 err_tx:
1369 while (i--)
1370 ena_com_destroy_io_queue(ena_dev, ENA_IO_TXQ_IDX(i));
1371
1372 return (ENXIO);
1373 }
1374
1375 /**
1376 * ena_tx_cleanup - clear sent packets and corresponding descriptors
1377 * @tx_ring: ring for which we want to clean packets
1378 *
1379 * Once packets are sent, we ask the device in a loop for no longer used
1380 * descriptors. We find the related mbuf chain in a map (index in an array)
1381 * and free it, then update ring state.
1382 * This is performed in "endless" loop, updating ring pointers every
1383 * TX_COMMIT. The first check of free descriptor is performed before the actual
1384 * loop, then repeated at the loop end.
1385 **/
1386 static int
1387 ena_tx_cleanup(struct ena_ring *tx_ring)
1388 {
1389 struct ena_adapter *adapter;
1390 struct ena_com_io_cq* io_cq;
1391 uint16_t next_to_clean;
1392 uint16_t req_id;
1393 uint16_t ena_qid;
1394 unsigned int total_done = 0;
1395 int rc;
1396 int commit = TX_COMMIT;
1397 int budget = TX_BUDGET;
1398 int work_done;
1399
1400 adapter = tx_ring->que->adapter;
1401 ena_qid = ENA_IO_TXQ_IDX(tx_ring->que->id);
1402 io_cq = &adapter->ena_dev->io_cq_queues[ena_qid];
1403 next_to_clean = tx_ring->next_to_clean;
1404
1405 do {
1406 struct ena_tx_buffer *tx_info;
1407 struct mbuf *mbuf;
1408
1409 rc = ena_com_tx_comp_req_id_get(io_cq, &req_id);
1410 if (unlikely(rc != 0))
1411 break;
1412
1413 rc = validate_tx_req_id(tx_ring, req_id);
1414 if (unlikely(rc != 0))
1415 break;
1416
1417 tx_info = &tx_ring->tx_buffer_info[req_id];
1418
1419 mbuf = tx_info->mbuf;
1420
1421 tx_info->mbuf = NULL;
1422 bintime_clear(&tx_info->timestamp);
1423
1424 if (likely(tx_info->num_of_bufs != 0)) {
1425 /* Map is no longer required */
1426 bus_dmamap_unload(adapter->sc_dmat, tx_info->map);
1427 }
1428
1429 ena_trace(ENA_DBG | ENA_TXPTH, "tx: q %d mbuf %p completed",
1430 tx_ring->qid, mbuf);
1431
1432 m_freem(mbuf);
1433
1434 total_done += tx_info->tx_descs;
1435
1436 tx_ring->free_tx_ids[next_to_clean] = req_id;
1437 next_to_clean = ENA_TX_RING_IDX_NEXT(next_to_clean,
1438 tx_ring->ring_size);
1439
1440 if (unlikely(--commit == 0)) {
1441 commit = TX_COMMIT;
1442 /* update ring state every TX_COMMIT descriptor */
1443 tx_ring->next_to_clean = next_to_clean;
1444 ena_com_comp_ack(
1445 &adapter->ena_dev->io_sq_queues[ena_qid],
1446 total_done);
1447 ena_com_update_dev_comp_head(io_cq);
1448 total_done = 0;
1449 }
1450 } while (likely(--budget));
1451
1452 work_done = TX_BUDGET - budget;
1453
1454 ena_trace(ENA_DBG | ENA_TXPTH, "tx: q %d done. total pkts: %d",
1455 tx_ring->qid, work_done);
1456
1457 /* If there is still something to commit update ring state */
1458 if (likely(commit != TX_COMMIT)) {
1459 tx_ring->next_to_clean = next_to_clean;
1460 ena_com_comp_ack(&adapter->ena_dev->io_sq_queues[ena_qid],
1461 total_done);
1462 ena_com_update_dev_comp_head(io_cq);
1463 }
1464
1465 if (atomic_cas_uint(&tx_ring->task_pending, 0, 1) == 0)
1466 workqueue_enqueue(tx_ring->enqueue_tq, &tx_ring->enqueue_task, NULL);
1467
1468 return (work_done);
1469 }
1470
1471 #if 0
1472 static void
1473 ena_rx_hash_mbuf(struct ena_ring *rx_ring, struct ena_com_rx_ctx *ena_rx_ctx,
1474 struct mbuf *mbuf)
1475 {
1476 struct ena_adapter *adapter = rx_ring->adapter;
1477
1478 if (likely(adapter->rss_support)) {
1479 mbuf->m_pkthdr.flowid = ena_rx_ctx->hash;
1480
1481 if (ena_rx_ctx->frag &&
1482 (ena_rx_ctx->l3_proto != ENA_ETH_IO_L3_PROTO_UNKNOWN)) {
1483 M_HASHTYPE_SET(mbuf, M_HASHTYPE_OPAQUE_HASH);
1484 return;
1485 }
1486
1487 switch (ena_rx_ctx->l3_proto) {
1488 case ENA_ETH_IO_L3_PROTO_IPV4:
1489 switch (ena_rx_ctx->l4_proto) {
1490 case ENA_ETH_IO_L4_PROTO_TCP:
1491 M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_TCP_IPV4);
1492 break;
1493 case ENA_ETH_IO_L4_PROTO_UDP:
1494 M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_UDP_IPV4);
1495 break;
1496 default:
1497 M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_IPV4);
1498 }
1499 break;
1500 case ENA_ETH_IO_L3_PROTO_IPV6:
1501 switch (ena_rx_ctx->l4_proto) {
1502 case ENA_ETH_IO_L4_PROTO_TCP:
1503 M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_TCP_IPV6);
1504 break;
1505 case ENA_ETH_IO_L4_PROTO_UDP:
1506 M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_UDP_IPV6);
1507 break;
1508 default:
1509 M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_IPV6);
1510 }
1511 break;
1512 case ENA_ETH_IO_L3_PROTO_UNKNOWN:
1513 M_HASHTYPE_SET(mbuf, M_HASHTYPE_NONE);
1514 break;
1515 default:
1516 M_HASHTYPE_SET(mbuf, M_HASHTYPE_OPAQUE_HASH);
1517 }
1518 } else {
1519 mbuf->m_pkthdr.flowid = rx_ring->qid;
1520 M_HASHTYPE_SET(mbuf, M_HASHTYPE_NONE);
1521 }
1522 }
1523 #endif
1524
1525 /**
1526 * ena_rx_mbuf - assemble mbuf from descriptors
1527 * @rx_ring: ring for which we want to clean packets
1528 * @ena_bufs: buffer info
1529 * @ena_rx_ctx: metadata for this packet(s)
1530 * @next_to_clean: ring pointer, will be updated only upon success
1531 *
1532 **/
1533 static struct mbuf*
1534 ena_rx_mbuf(struct ena_ring *rx_ring, struct ena_com_rx_buf_info *ena_bufs,
1535 struct ena_com_rx_ctx *ena_rx_ctx, uint16_t *next_to_clean)
1536 {
1537 struct mbuf *mbuf;
1538 struct ena_rx_buffer *rx_info;
1539 struct ena_adapter *adapter;
1540 unsigned int descs = ena_rx_ctx->descs;
1541 uint16_t ntc, len, req_id, buf = 0;
1542
1543 ntc = *next_to_clean;
1544 adapter = rx_ring->adapter;
1545 rx_info = &rx_ring->rx_buffer_info[ntc];
1546
1547 if (unlikely(rx_info->mbuf == NULL)) {
1548 device_printf(adapter->pdev, "NULL mbuf in rx_info");
1549 return (NULL);
1550 }
1551
1552 len = ena_bufs[buf].len;
1553 req_id = ena_bufs[buf].req_id;
1554 rx_info = &rx_ring->rx_buffer_info[req_id];
1555
1556 ena_trace(ENA_DBG | ENA_RXPTH, "rx_info %p, mbuf %p, paddr %jx",
1557 rx_info, rx_info->mbuf, (uintmax_t)rx_info->ena_buf.paddr);
1558
1559 mbuf = rx_info->mbuf;
1560 KASSERT(mbuf->m_flags & M_PKTHDR);
1561 mbuf->m_pkthdr.len = len;
1562 mbuf->m_len = len;
1563 m_set_rcvif(mbuf, rx_ring->que->adapter->ifp);
1564
1565 /* Fill mbuf with hash key and it's interpretation for optimization */
1566 #if 0
1567 ena_rx_hash_mbuf(rx_ring, ena_rx_ctx, mbuf);
1568 #endif
1569
1570 ena_trace(ENA_DBG | ENA_RXPTH, "rx mbuf 0x%p, flags=0x%x, len: %d",
1571 mbuf, mbuf->m_flags, mbuf->m_pkthdr.len);
1572
1573 /* DMA address is not needed anymore, unmap it */
1574 bus_dmamap_unload(rx_ring->adapter->sc_dmat, rx_info->map);
1575
1576 rx_info->mbuf = NULL;
1577 rx_ring->free_rx_ids[ntc] = req_id;
1578 ntc = ENA_RX_RING_IDX_NEXT(ntc, rx_ring->ring_size);
1579
1580 /*
1581 * While we have more than 1 descriptors for one rcvd packet, append
1582 * other mbufs to the main one
1583 */
1584 while (--descs) {
1585 ++buf;
1586 len = ena_bufs[buf].len;
1587 req_id = ena_bufs[buf].req_id;
1588 rx_info = &rx_ring->rx_buffer_info[req_id];
1589
1590 if (unlikely(rx_info->mbuf == NULL)) {
1591 device_printf(adapter->pdev, "NULL mbuf in rx_info");
1592 /*
1593 * If one of the required mbufs was not allocated yet,
1594 * we can break there.
1595 * All earlier used descriptors will be reallocated
1596 * later and not used mbufs can be reused.
1597 * The next_to_clean pointer will not be updated in case
1598 * of an error, so caller should advance it manually
1599 * in error handling routine to keep it up to date
1600 * with hw ring.
1601 */
1602 m_freem(mbuf);
1603 return (NULL);
1604 }
1605
1606 if (unlikely(m_append(mbuf, len, rx_info->mbuf->m_data) == 0)) {
1607 counter_u64_add(rx_ring->rx_stats.mbuf_alloc_fail, 1);
1608 ena_trace(ENA_WARNING, "Failed to append Rx mbuf %p",
1609 mbuf);
1610 }
1611
1612 ena_trace(ENA_DBG | ENA_RXPTH,
1613 "rx mbuf updated. len %d", mbuf->m_pkthdr.len);
1614
1615 /* Free already appended mbuf, it won't be useful anymore */
1616 bus_dmamap_unload(rx_ring->adapter->sc_dmat, rx_info->map);
1617 m_freem(rx_info->mbuf);
1618 rx_info->mbuf = NULL;
1619
1620 rx_ring->free_rx_ids[ntc] = req_id;
1621 ntc = ENA_RX_RING_IDX_NEXT(ntc, rx_ring->ring_size);
1622 }
1623
1624 *next_to_clean = ntc;
1625
1626 return (mbuf);
1627 }
1628
1629 /**
1630 * ena_rx_checksum - indicate in mbuf if hw indicated a good cksum
1631 **/
1632 static inline void
1633 ena_rx_checksum(struct ena_ring *rx_ring, struct ena_com_rx_ctx *ena_rx_ctx,
1634 struct mbuf *mbuf)
1635 {
1636
1637 /* IPv4 */
1638 if (ena_rx_ctx->l3_proto == ENA_ETH_IO_L3_PROTO_IPV4) {
1639 mbuf->m_pkthdr.csum_flags |= M_CSUM_IPv4;
1640 if (ena_rx_ctx->l3_csum_err) {
1641 /* ipv4 checksum error */
1642 mbuf->m_pkthdr.csum_flags |= M_CSUM_IPv4_BAD;
1643 counter_u64_add(rx_ring->rx_stats.bad_csum, 1);
1644 ena_trace(ENA_DBG, "RX IPv4 header checksum error");
1645 return;
1646 }
1647
1648 /* TCP/UDP */
1649 if ((ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_TCP) ||
1650 (ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_UDP)) {
1651 mbuf->m_pkthdr.csum_flags |= (ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_TCP) ? M_CSUM_TCPv4 : M_CSUM_UDPv4;
1652 if (ena_rx_ctx->l4_csum_err) {
1653 /* TCP/UDP checksum error */
1654 mbuf->m_pkthdr.csum_flags |= M_CSUM_TCP_UDP_BAD;
1655 counter_u64_add(rx_ring->rx_stats.bad_csum, 1);
1656 ena_trace(ENA_DBG, "RX L4 checksum error");
1657 }
1658 }
1659 }
1660 /* IPv6 */
1661 else if (ena_rx_ctx->l3_proto == ENA_ETH_IO_L3_PROTO_IPV6) {
1662 /* TCP/UDP */
1663 if ((ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_TCP) ||
1664 (ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_UDP)) {
1665 mbuf->m_pkthdr.csum_flags |= (ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_TCP) ? M_CSUM_TCPv6 : M_CSUM_UDPv6;
1666 if (ena_rx_ctx->l4_csum_err) {
1667 /* TCP/UDP checksum error */
1668 mbuf->m_pkthdr.csum_flags |= M_CSUM_TCP_UDP_BAD;
1669 counter_u64_add(rx_ring->rx_stats.bad_csum, 1);
1670 ena_trace(ENA_DBG, "RX L4 checksum error");
1671 }
1672 }
1673 }
1674 }
1675
1676 static void
1677 ena_deferred_rx_cleanup(struct work *wk, void *arg)
1678 {
1679 struct ena_ring *rx_ring = arg;
1680 int budget = CLEAN_BUDGET;
1681
1682 atomic_swap_uint(&rx_ring->task_pending, 0);
1683
1684 ENA_RING_MTX_LOCK(rx_ring);
1685 /*
1686 * If deferred task was executed, perform cleanup of all awaiting
1687 * descs (or until given budget is depleted to avoid infinite loop).
1688 */
1689 while (likely(budget--)) {
1690 if (ena_rx_cleanup(rx_ring) == 0)
1691 break;
1692 }
1693 ENA_RING_MTX_UNLOCK(rx_ring);
1694 }
1695
1696 /**
1697 * ena_rx_cleanup - handle rx irq
1698 * @arg: ring for which irq is being handled
1699 **/
1700 static int
1701 ena_rx_cleanup(struct ena_ring *rx_ring)
1702 {
1703 struct ena_adapter *adapter;
1704 struct mbuf *mbuf;
1705 struct ena_com_rx_ctx ena_rx_ctx;
1706 struct ena_com_io_cq* io_cq;
1707 struct ena_com_io_sq* io_sq;
1708 struct ifnet *ifp;
1709 uint16_t ena_qid;
1710 uint16_t next_to_clean;
1711 uint32_t refill_required;
1712 uint32_t refill_threshold;
1713 uint32_t do_if_input = 0;
1714 unsigned int qid;
1715 int rc, i;
1716 int budget = RX_BUDGET;
1717
1718 adapter = rx_ring->que->adapter;
1719 ifp = adapter->ifp;
1720 qid = rx_ring->que->id;
1721 ena_qid = ENA_IO_RXQ_IDX(qid);
1722 io_cq = &adapter->ena_dev->io_cq_queues[ena_qid];
1723 io_sq = &adapter->ena_dev->io_sq_queues[ena_qid];
1724 next_to_clean = rx_ring->next_to_clean;
1725
1726 ena_trace(ENA_DBG, "rx: qid %d", qid);
1727
1728 do {
1729 ena_rx_ctx.ena_bufs = rx_ring->ena_bufs;
1730 ena_rx_ctx.max_bufs = adapter->max_rx_sgl_size;
1731 ena_rx_ctx.descs = 0;
1732 rc = ena_com_rx_pkt(io_cq, io_sq, &ena_rx_ctx);
1733
1734 if (unlikely(rc != 0))
1735 goto error;
1736
1737 if (unlikely(ena_rx_ctx.descs == 0))
1738 break;
1739
1740 ena_trace(ENA_DBG | ENA_RXPTH, "rx: q %d got packet from ena. "
1741 "descs #: %d l3 proto %d l4 proto %d hash: %x",
1742 rx_ring->qid, ena_rx_ctx.descs, ena_rx_ctx.l3_proto,
1743 ena_rx_ctx.l4_proto, ena_rx_ctx.hash);
1744
1745 /* Receive mbuf from the ring */
1746 mbuf = ena_rx_mbuf(rx_ring, rx_ring->ena_bufs,
1747 &ena_rx_ctx, &next_to_clean);
1748
1749 /* Exit if we failed to retrieve a buffer */
1750 if (unlikely(mbuf == NULL)) {
1751 for (i = 0; i < ena_rx_ctx.descs; ++i) {
1752 rx_ring->free_rx_ids[next_to_clean] =
1753 rx_ring->ena_bufs[i].req_id;
1754 next_to_clean =
1755 ENA_RX_RING_IDX_NEXT(next_to_clean,
1756 rx_ring->ring_size);
1757
1758 }
1759 break;
1760 }
1761
1762 if (((ifp->if_capenable & IFCAP_CSUM_IPv4_Rx) != 0) ||
1763 ((ifp->if_capenable & IFCAP_CSUM_TCPv4_Rx) != 0) ||
1764 ((ifp->if_capenable & IFCAP_CSUM_UDPv4_Rx) != 0) ||
1765 ((ifp->if_capenable & IFCAP_CSUM_TCPv6_Rx) != 0) ||
1766 ((ifp->if_capenable & IFCAP_CSUM_UDPv6_Rx) != 0)) {
1767 ena_rx_checksum(rx_ring, &ena_rx_ctx, mbuf);
1768 }
1769
1770 counter_enter();
1771 counter_u64_add_protected(rx_ring->rx_stats.bytes,
1772 mbuf->m_pkthdr.len);
1773 counter_u64_add_protected(adapter->hw_stats.rx_bytes,
1774 mbuf->m_pkthdr.len);
1775 counter_exit();
1776 /*
1777 * LRO is only for IP/TCP packets and TCP checksum of the packet
1778 * should be computed by hardware.
1779 */
1780 do_if_input = 1;
1781 #ifdef LRO
1782 if (((ifp->if_capenable & IFCAP_LRO) != 0) &&
1783 ((mbuf->m_pkthdr.csum_flags & CSUM_IP_VALID) != 0) &&
1784 (ena_rx_ctx.l4_proto == ENA_ETH_IO_L4_PROTO_TCP)) {
1785 /*
1786 * Send to the stack if:
1787 * - LRO not enabled, or
1788 * - no LRO resources, or
1789 * - lro enqueue fails
1790 */
1791 if ((rx_ring->lro.lro_cnt != 0) &&
1792 (tcp_lro_rx(&rx_ring->lro, mbuf, 0) == 0))
1793 do_if_input = 0;
1794 }
1795 #endif
1796 if (do_if_input != 0) {
1797 ena_trace(ENA_DBG | ENA_RXPTH,
1798 "calling if_input() with mbuf %p", mbuf);
1799 if_percpuq_enqueue(ifp->if_percpuq, mbuf);
1800 }
1801
1802 counter_enter();
1803 counter_u64_add_protected(rx_ring->rx_stats.cnt, 1);
1804 counter_u64_add_protected(adapter->hw_stats.rx_packets, 1);
1805 counter_exit();
1806 } while (--budget);
1807
1808 rx_ring->next_to_clean = next_to_clean;
1809
1810 refill_required = ena_com_free_desc(io_sq);
1811 refill_threshold = rx_ring->ring_size / ENA_RX_REFILL_THRESH_DIVIDER;
1812
1813 if (refill_required > refill_threshold) {
1814 ena_com_update_dev_comp_head(rx_ring->ena_com_io_cq);
1815 ena_refill_rx_bufs(rx_ring, refill_required);
1816 }
1817
1818 #ifdef LRO
1819 tcp_lro_flush_all(&rx_ring->lro);
1820 #endif
1821
1822 return (RX_BUDGET - budget);
1823
1824 error:
1825 counter_u64_add(rx_ring->rx_stats.bad_desc_num, 1);
1826 return (RX_BUDGET - budget);
1827 }
1828
1829 /*********************************************************************
1830 *
1831 * MSIX & Interrupt Service routine
1832 *
1833 **********************************************************************/
1834
1835 /**
1836 * ena_handle_msix - MSIX Interrupt Handler for admin/async queue
1837 * @arg: interrupt number
1838 **/
1839 static int
1840 ena_intr_msix_mgmnt(void *arg)
1841 {
1842 struct ena_adapter *adapter = (struct ena_adapter *)arg;
1843
1844 ena_com_admin_q_comp_intr_handler(adapter->ena_dev);
1845 if (likely(adapter->running))
1846 ena_com_aenq_intr_handler(adapter->ena_dev, arg);
1847
1848 return 1;
1849 }
1850
1851 /**
1852 * ena_handle_msix - MSIX Interrupt Handler for Tx/Rx
1853 * @arg: interrupt number
1854 **/
1855 static int
1856 ena_handle_msix(void *arg)
1857 {
1858 struct ena_que *que = arg;
1859 struct ena_adapter *adapter = que->adapter;
1860 struct ifnet *ifp = adapter->ifp;
1861 struct ena_ring *tx_ring;
1862 struct ena_ring *rx_ring;
1863 struct ena_com_io_cq* io_cq;
1864 struct ena_eth_io_intr_reg intr_reg;
1865 int qid, ena_qid;
1866 int txc, rxc, i;
1867
1868 if (unlikely((if_getdrvflags(ifp) & IFF_RUNNING) == 0))
1869 return 0;
1870
1871 ena_trace(ENA_DBG, "MSI-X TX/RX routine");
1872
1873 tx_ring = que->tx_ring;
1874 rx_ring = que->rx_ring;
1875 qid = que->id;
1876 ena_qid = ENA_IO_TXQ_IDX(qid);
1877 io_cq = &adapter->ena_dev->io_cq_queues[ena_qid];
1878
1879 for (i = 0; i < CLEAN_BUDGET; ++i) {
1880 /*
1881 * If lock cannot be acquired, then deferred cleanup task was
1882 * being executed and rx ring is being cleaned up in
1883 * another thread.
1884 */
1885 if (likely(ENA_RING_MTX_TRYLOCK(rx_ring) != 0)) {
1886 rxc = ena_rx_cleanup(rx_ring);
1887 ENA_RING_MTX_UNLOCK(rx_ring);
1888 } else {
1889 rxc = 0;
1890 }
1891
1892 /* Protection from calling ena_tx_cleanup from ena_start_xmit */
1893 ENA_RING_MTX_LOCK(tx_ring);
1894 txc = ena_tx_cleanup(tx_ring);
1895 ENA_RING_MTX_UNLOCK(tx_ring);
1896
1897 if (unlikely((if_getdrvflags(ifp) & IFF_RUNNING) == 0))
1898 return 0;
1899
1900 if ((txc != TX_BUDGET) && (rxc != RX_BUDGET))
1901 break;
1902 }
1903
1904 /* Signal that work is done and unmask interrupt */
1905 ena_com_update_intr_reg(&intr_reg,
1906 RX_IRQ_INTERVAL,
1907 TX_IRQ_INTERVAL,
1908 true);
1909 ena_com_unmask_intr(io_cq, &intr_reg);
1910
1911 return 1;
1912 }
1913
1914 static int
1915 ena_enable_msix(struct ena_adapter *adapter)
1916 {
1917 int msix_req;
1918 int counts[PCI_INTR_TYPE_SIZE];
1919 int max_type;
1920
1921 /* Reserved the max msix vectors we might need */
1922 msix_req = ENA_MAX_MSIX_VEC(adapter->num_queues);
1923
1924 counts[PCI_INTR_TYPE_INTX] = 0;
1925 counts[PCI_INTR_TYPE_MSI] = 0;
1926 counts[PCI_INTR_TYPE_MSIX] = msix_req;
1927 max_type = PCI_INTR_TYPE_MSIX;
1928
1929 if (pci_intr_alloc(&adapter->sc_pa, &adapter->sc_intrs, counts,
1930 max_type) != 0) {
1931 aprint_error_dev(adapter->pdev,
1932 "failed to allocate interrupt\n");
1933 return ENOSPC;
1934 }
1935
1936 adapter->sc_nintrs = counts[PCI_INTR_TYPE_MSIX];
1937
1938 if (counts[PCI_INTR_TYPE_MSIX] != msix_req) {
1939 device_printf(adapter->pdev,
1940 "Enable only %d MSI-x (out of %d), reduce "
1941 "the number of queues\n", adapter->sc_nintrs, msix_req);
1942 adapter->num_queues = adapter->sc_nintrs - ENA_ADMIN_MSIX_VEC;
1943 }
1944
1945 return 0;
1946 }
1947
1948 #if 0
1949 static void
1950 ena_setup_io_intr(struct ena_adapter *adapter)
1951 {
1952 static int last_bind_cpu = -1;
1953 int irq_idx;
1954
1955 for (int i = 0; i < adapter->num_queues; i++) {
1956 irq_idx = ENA_IO_IRQ_IDX(i);
1957
1958 snprintf(adapter->irq_tbl[irq_idx].name, ENA_IRQNAME_SIZE,
1959 "%s-TxRx-%d", device_xname(adapter->pdev), i);
1960 adapter->irq_tbl[irq_idx].handler = ena_handle_msix;
1961 adapter->irq_tbl[irq_idx].data = &adapter->que[i];
1962 adapter->irq_tbl[irq_idx].vector =
1963 adapter->msix_entries[irq_idx].vector;
1964 ena_trace(ENA_INFO | ENA_IOQ, "ena_setup_io_intr vector: %d\n",
1965 adapter->msix_entries[irq_idx].vector);
1966 #ifdef RSS
1967 adapter->que[i].cpu = adapter->irq_tbl[irq_idx].cpu =
1968 rss_getcpu(i % rss_getnumbuckets());
1969 #else
1970 /*
1971 * We still want to bind rings to the corresponding cpu
1972 * using something similar to the RSS round-robin technique.
1973 */
1974 if (unlikely(last_bind_cpu < 0))
1975 last_bind_cpu = CPU_FIRST();
1976 adapter->que[i].cpu = adapter->irq_tbl[irq_idx].cpu =
1977 last_bind_cpu;
1978 last_bind_cpu = CPU_NEXT(last_bind_cpu);
1979 #endif
1980 }
1981 }
1982 #endif
1983
1984 static int
1985 ena_request_mgmnt_irq(struct ena_adapter *adapter)
1986 {
1987 const char *intrstr;
1988 char intrbuf[PCI_INTRSTR_LEN];
1989 char intr_xname[INTRDEVNAMEBUF];
1990 pci_chipset_tag_t pc = adapter->sc_pa.pa_pc;
1991 const int irq_slot = ENA_MGMNT_IRQ_IDX;
1992
1993 KASSERT(adapter->sc_intrs != NULL);
1994 KASSERT(adapter->sc_ihs[irq_slot] == NULL);
1995
1996 snprintf(intr_xname, sizeof(intr_xname), "%s mgmnt",
1997 device_xname(adapter->pdev));
1998 intrstr = pci_intr_string(pc, adapter->sc_intrs[irq_slot],
1999 intrbuf, sizeof(intrbuf));
2000
2001 adapter->sc_ihs[irq_slot] = pci_intr_establish_xname(
2002 pc, adapter->sc_intrs[irq_slot],
2003 IPL_NET, ena_intr_msix_mgmnt, adapter, intr_xname);
2004
2005 if (adapter->sc_ihs[irq_slot] == NULL) {
2006 device_printf(adapter->pdev, "failed to register "
2007 "interrupt handler for MGMNT irq %s\n",
2008 intrstr);
2009 return ENOMEM;
2010 }
2011
2012 aprint_normal_dev(adapter->pdev,
2013 "for MGMNT interrupting at %s\n", intrstr);
2014
2015 return 0;
2016 }
2017
2018 static int
2019 ena_request_io_irq(struct ena_adapter *adapter)
2020 {
2021 const char *intrstr;
2022 char intrbuf[PCI_INTRSTR_LEN];
2023 char intr_xname[INTRDEVNAMEBUF];
2024 pci_chipset_tag_t pc = adapter->sc_pa.pa_pc;
2025 const int irq_off = ENA_IO_IRQ_FIRST_IDX;
2026 void *vih;
2027 kcpuset_t *affinity;
2028 int i;
2029
2030 KASSERT(adapter->sc_intrs != NULL);
2031
2032 kcpuset_create(&affinity, false);
2033
2034 for (i = 0; i < adapter->num_queues; i++) {
2035 int irq_slot = i + irq_off;
2036 int affinity_to = (irq_slot) % ncpu;
2037
2038 KASSERT((void *)adapter->sc_intrs[irq_slot] != NULL);
2039 KASSERT(adapter->sc_ihs[irq_slot] == NULL);
2040
2041 snprintf(intr_xname, sizeof(intr_xname), "%s ioq%d",
2042 device_xname(adapter->pdev), i);
2043 intrstr = pci_intr_string(pc, adapter->sc_intrs[irq_slot],
2044 intrbuf, sizeof(intrbuf));
2045
2046 vih = pci_intr_establish_xname(adapter->sc_pa.pa_pc,
2047 adapter->sc_intrs[irq_slot], IPL_NET,
2048 ena_handle_msix, &adapter->que[i], intr_xname);
2049
2050 if (adapter->sc_ihs[ENA_MGMNT_IRQ_IDX] == NULL) {
2051 device_printf(adapter->pdev, "failed to register "
2052 "interrupt handler for IO queue %d irq %s\n",
2053 i, intrstr);
2054 goto err;
2055 }
2056
2057 kcpuset_zero(affinity);
2058 /* Round-robin affinity */
2059 kcpuset_set(affinity, affinity_to);
2060 int error = interrupt_distribute(vih, affinity, NULL);
2061 if (error == 0) {
2062 aprint_normal_dev(adapter->pdev,
2063 "for IO queue %d interrupting at %s"
2064 " affinity to %u\n", i, intrstr, affinity_to);
2065 } else {
2066 aprint_normal_dev(adapter->pdev,
2067 "for IO queue %d interrupting at %s\n", i, intrstr);
2068 }
2069
2070 adapter->sc_ihs[irq_slot] = vih;
2071
2072 #ifdef RSS
2073 ena_trace(ENA_INFO, "queue %d - RSS bucket %d\n",
2074 i - ENA_IO_IRQ_FIRST_IDX, irq->cpu);
2075 #else
2076 ena_trace(ENA_INFO, "queue %d - cpu %d\n",
2077 i - ENA_IO_IRQ_FIRST_IDX, affinity_to);
2078 #endif
2079 }
2080
2081 kcpuset_destroy(affinity);
2082 return 0;
2083
2084 err:
2085 kcpuset_destroy(affinity);
2086
2087 for (i--; i >= 0; i--) {
2088 #if defined(DEBUG) || defined(DIAGNOSTIC)
2089 int irq_slot = i + irq_off;
2090 #endif
2091 KASSERT(adapter->sc_ihs[irq_slot] != NULL);
2092 pci_intr_disestablish(adapter->sc_pa.pa_pc, adapter->sc_ihs[i]);
2093 adapter->sc_ihs[i] = NULL;
2094 }
2095
2096 return ENOSPC;
2097 }
2098
2099 static void
2100 ena_free_mgmnt_irq(struct ena_adapter *adapter)
2101 {
2102 const int irq_slot = ENA_MGMNT_IRQ_IDX;
2103
2104 if (adapter->sc_ihs[irq_slot]) {
2105 pci_intr_disestablish(adapter->sc_pa.pa_pc,
2106 adapter->sc_ihs[irq_slot]);
2107 adapter->sc_ihs[irq_slot] = NULL;
2108 }
2109 }
2110
2111 static void
2112 ena_free_io_irq(struct ena_adapter *adapter)
2113 {
2114 const int irq_off = ENA_IO_IRQ_FIRST_IDX;
2115
2116 for (int i = 0; i < adapter->num_queues; i++) {
2117 int irq_slot = i + irq_off;
2118
2119 if (adapter->sc_ihs[irq_slot]) {
2120 pci_intr_disestablish(adapter->sc_pa.pa_pc,
2121 adapter->sc_ihs[i]);
2122 adapter->sc_ihs[i] = NULL;
2123 }
2124 }
2125 }
2126
2127 static void
2128 ena_free_irqs(struct ena_adapter* adapter)
2129 {
2130
2131 ena_free_io_irq(adapter);
2132 ena_free_mgmnt_irq(adapter);
2133 ena_disable_msix(adapter);
2134 }
2135
2136 static void
2137 ena_disable_msix(struct ena_adapter *adapter)
2138 {
2139 pci_intr_release(adapter->sc_pa.pa_pc, adapter->sc_intrs,
2140 adapter->sc_nintrs);
2141 }
2142
2143 static void
2144 ena_unmask_all_io_irqs(struct ena_adapter *adapter)
2145 {
2146 struct ena_com_io_cq* io_cq;
2147 struct ena_eth_io_intr_reg intr_reg;
2148 uint16_t ena_qid;
2149 int i;
2150
2151 /* Unmask interrupts for all queues */
2152 for (i = 0; i < adapter->num_queues; i++) {
2153 ena_qid = ENA_IO_TXQ_IDX(i);
2154 io_cq = &adapter->ena_dev->io_cq_queues[ena_qid];
2155 ena_com_update_intr_reg(&intr_reg, 0, 0, true);
2156 ena_com_unmask_intr(io_cq, &intr_reg);
2157 }
2158 }
2159
2160 /* Configure the Rx forwarding */
2161 static int
2162 ena_rss_configure(struct ena_adapter *adapter)
2163 {
2164 struct ena_com_dev *ena_dev = adapter->ena_dev;
2165 int rc;
2166
2167 /* Set indirect table */
2168 rc = ena_com_indirect_table_set(ena_dev);
2169 if (unlikely((rc != 0) && (rc != EOPNOTSUPP)))
2170 return (rc);
2171
2172 /* Configure hash function (if supported) */
2173 rc = ena_com_set_hash_function(ena_dev);
2174 if (unlikely((rc != 0) && (rc != EOPNOTSUPP)))
2175 return (rc);
2176
2177 /* Configure hash inputs (if supported) */
2178 rc = ena_com_set_hash_ctrl(ena_dev);
2179 if (unlikely((rc != 0) && (rc != EOPNOTSUPP)))
2180 return (rc);
2181
2182 return (0);
2183 }
2184
2185 static int
2186 ena_up_complete(struct ena_adapter *adapter)
2187 {
2188 int rc;
2189
2190 if (likely(adapter->rss_support)) {
2191 rc = ena_rss_configure(adapter);
2192 if (rc != 0)
2193 return (rc);
2194 }
2195
2196 rc = ena_change_mtu(adapter->ifp, adapter->ifp->if_mtu);
2197 if (unlikely(rc != 0))
2198 return (rc);
2199
2200 ena_refill_all_rx_bufs(adapter);
2201 ena_reset_counters((struct evcnt *)&adapter->hw_stats,
2202 sizeof(adapter->hw_stats));
2203
2204 return (0);
2205 }
2206
2207 static int
2208 ena_up(struct ena_adapter *adapter)
2209 {
2210 int rc = 0;
2211
2212 #if 0
2213 if (unlikely(device_is_attached(adapter->pdev) == 0)) {
2214 device_printf(adapter->pdev, "device is not attached!\n");
2215 return (ENXIO);
2216 }
2217 #endif
2218
2219 if (unlikely(!adapter->running)) {
2220 device_printf(adapter->pdev, "device is not running!\n");
2221 return (ENXIO);
2222 }
2223
2224 if (!adapter->up) {
2225 device_printf(adapter->pdev, "device is going UP\n");
2226
2227 /* setup interrupts for IO queues */
2228 rc = ena_request_io_irq(adapter);
2229 if (unlikely(rc != 0)) {
2230 ena_trace(ENA_ALERT, "err_req_irq");
2231 goto err_req_irq;
2232 }
2233
2234 /* allocate transmit descriptors */
2235 rc = ena_setup_all_tx_resources(adapter);
2236 if (unlikely(rc != 0)) {
2237 ena_trace(ENA_ALERT, "err_setup_tx");
2238 goto err_setup_tx;
2239 }
2240
2241 /* allocate receive descriptors */
2242 rc = ena_setup_all_rx_resources(adapter);
2243 if (unlikely(rc != 0)) {
2244 ena_trace(ENA_ALERT, "err_setup_rx");
2245 goto err_setup_rx;
2246 }
2247
2248 /* create IO queues for Rx & Tx */
2249 rc = ena_create_io_queues(adapter);
2250 if (unlikely(rc != 0)) {
2251 ena_trace(ENA_ALERT,
2252 "create IO queues failed");
2253 goto err_io_que;
2254 }
2255
2256 if (unlikely(adapter->link_status))
2257 if_link_state_change(adapter->ifp, LINK_STATE_UP);
2258
2259 rc = ena_up_complete(adapter);
2260 if (unlikely(rc != 0))
2261 goto err_up_complete;
2262
2263 counter_u64_add(adapter->dev_stats.interface_up, 1);
2264
2265 ena_update_hwassist(adapter);
2266
2267 if_setdrvflagbits(adapter->ifp, IFF_RUNNING,
2268 IFF_OACTIVE);
2269
2270 callout_reset(&adapter->timer_service, hz,
2271 ena_timer_service, (void *)adapter);
2272
2273 adapter->up = true;
2274
2275 ena_unmask_all_io_irqs(adapter);
2276 }
2277
2278 return (0);
2279
2280 err_up_complete:
2281 ena_destroy_all_io_queues(adapter);
2282 err_io_que:
2283 ena_free_all_rx_resources(adapter);
2284 err_setup_rx:
2285 ena_free_all_tx_resources(adapter);
2286 err_setup_tx:
2287 ena_free_io_irq(adapter);
2288 err_req_irq:
2289 return (rc);
2290 }
2291
2292 #if 0
2293 static uint64_t
2294 ena_get_counter(struct ifnet *ifp, ift_counter cnt)
2295 {
2296 struct ena_adapter *adapter;
2297 struct ena_hw_stats *stats;
2298
2299 adapter = if_getsoftc(ifp);
2300 stats = &adapter->hw_stats;
2301
2302 switch (cnt) {
2303 case IFCOUNTER_IPACKETS:
2304 return (counter_u64_fetch(stats->rx_packets));
2305 case IFCOUNTER_OPACKETS:
2306 return (counter_u64_fetch(stats->tx_packets));
2307 case IFCOUNTER_IBYTES:
2308 return (counter_u64_fetch(stats->rx_bytes));
2309 case IFCOUNTER_OBYTES:
2310 return (counter_u64_fetch(stats->tx_bytes));
2311 case IFCOUNTER_IQDROPS:
2312 return (counter_u64_fetch(stats->rx_drops));
2313 default:
2314 return (if_get_counter_default(ifp, cnt));
2315 }
2316 }
2317 #endif
2318
2319 static int
2320 ena_media_change(struct ifnet *ifp)
2321 {
2322 /* Media Change is not supported by firmware */
2323 return (0);
2324 }
2325
2326 static void
2327 ena_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
2328 {
2329 struct ena_adapter *adapter = if_getsoftc(ifp);
2330 ena_trace(ENA_DBG, "enter");
2331
2332 mutex_enter(&adapter->global_mtx);
2333
2334 ifmr->ifm_status = IFM_AVALID;
2335 ifmr->ifm_active = IFM_ETHER;
2336
2337 if (!adapter->link_status) {
2338 mutex_exit(&adapter->global_mtx);
2339 ena_trace(ENA_INFO, "link_status = false");
2340 return;
2341 }
2342
2343 ifmr->ifm_status |= IFM_ACTIVE;
2344 ifmr->ifm_active |= IFM_10G_T | IFM_FDX;
2345
2346 mutex_exit(&adapter->global_mtx);
2347 }
2348
2349 static int
2350 ena_init(struct ifnet *ifp)
2351 {
2352 struct ena_adapter *adapter = if_getsoftc(ifp);
2353
2354 if (!adapter->up) {
2355 rw_enter(&adapter->ioctl_sx, RW_WRITER);
2356 ena_up(adapter);
2357 rw_exit(&adapter->ioctl_sx);
2358 }
2359
2360 return 0;
2361 }
2362
2363 static int
2364 ena_ioctl(struct ifnet *ifp, u_long command, void *data)
2365 {
2366 struct ena_adapter *adapter;
2367 struct ifreq *ifr;
2368 int rc;
2369
2370 adapter = ifp->if_softc;
2371 ifr = (struct ifreq *)data;
2372
2373 /*
2374 * Acquiring lock to prevent from running up and down routines parallel.
2375 */
2376 rc = 0;
2377 switch (command) {
2378 case SIOCSIFMTU:
2379 if (ifp->if_mtu == ifr->ifr_mtu)
2380 break;
2381 rw_enter(&adapter->ioctl_sx, RW_WRITER);
2382 ena_down(adapter);
2383
2384 ena_change_mtu(ifp, ifr->ifr_mtu);
2385
2386 rc = ena_up(adapter);
2387 rw_exit(&adapter->ioctl_sx);
2388 break;
2389
2390 case SIOCSIFFLAGS:
2391 if ((ifp->if_flags & IFF_UP) != 0) {
2392 if ((if_getdrvflags(ifp) & IFF_RUNNING) != 0) {
2393 if ((ifp->if_flags & (IFF_PROMISC |
2394 IFF_ALLMULTI)) != 0) {
2395 device_printf(adapter->pdev,
2396 "ioctl promisc/allmulti\n");
2397 }
2398 } else {
2399 rw_enter(&adapter->ioctl_sx, RW_WRITER);
2400 rc = ena_up(adapter);
2401 rw_exit(&adapter->ioctl_sx);
2402 }
2403 } else {
2404 if ((if_getdrvflags(ifp) & IFF_RUNNING) != 0) {
2405 rw_enter(&adapter->ioctl_sx, RW_WRITER);
2406 ena_down(adapter);
2407 rw_exit(&adapter->ioctl_sx);
2408 }
2409 }
2410 break;
2411
2412 case SIOCADDMULTI:
2413 case SIOCDELMULTI:
2414 break;
2415
2416 case SIOCSIFCAP:
2417 {
2418 struct ifcapreq *ifcr = data;
2419 int reinit = 0;
2420
2421 if (ifcr->ifcr_capenable != ifp->if_capenable) {
2422 ifp->if_capenable = ifcr->ifcr_capenable;
2423 reinit = 1;
2424 }
2425
2426 if ((reinit != 0) &&
2427 ((if_getdrvflags(ifp) & IFF_RUNNING) != 0)) {
2428 rw_enter(&adapter->ioctl_sx, RW_WRITER);
2429 ena_down(adapter);
2430 rc = ena_up(adapter);
2431 rw_exit(&adapter->ioctl_sx);
2432 }
2433 }
2434
2435 break;
2436 default:
2437 rc = ether_ioctl(ifp, command, data);
2438 break;
2439 }
2440
2441 return (rc);
2442 }
2443
2444 static int
2445 ena_get_dev_offloads(struct ena_com_dev_get_features_ctx *feat)
2446 {
2447 int caps = 0;
2448
2449 if ((feat->offload.tx &
2450 (ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_FULL_MASK |
2451 ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_PART_MASK |
2452 ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L3_CSUM_IPV4_MASK)) != 0)
2453 caps |= IFCAP_CSUM_IPv4_Tx;
2454
2455 if ((feat->offload.tx &
2456 (ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_FULL_MASK |
2457 ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_PART_MASK)) != 0)
2458 caps |= IFCAP_CSUM_TCPv6_Tx | IFCAP_CSUM_UDPv6_Tx;
2459
2460 if ((feat->offload.tx &
2461 ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV4_MASK) != 0)
2462 caps |= IFCAP_TSOv4;
2463
2464 if ((feat->offload.tx &
2465 ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV6_MASK) != 0)
2466 caps |= IFCAP_TSOv6;
2467
2468 if ((feat->offload.rx_supported &
2469 (ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV4_CSUM_MASK |
2470 ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L3_CSUM_IPV4_MASK)) != 0)
2471 caps |= IFCAP_CSUM_IPv4_Rx;
2472
2473 if ((feat->offload.rx_supported &
2474 ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV6_CSUM_MASK) != 0)
2475 caps |= IFCAP_CSUM_TCPv6_Rx | IFCAP_CSUM_UDPv6_Rx;
2476
2477 caps |= IFCAP_LRO;
2478
2479 return (caps);
2480 }
2481
2482 static void
2483 ena_update_host_info(struct ena_admin_host_info *host_info, struct ifnet *ifp)
2484 {
2485
2486 host_info->supported_network_features[0] =
2487 (uint32_t)if_getcapabilities(ifp);
2488 }
2489
2490 static void
2491 ena_update_hwassist(struct ena_adapter *adapter)
2492 {
2493 struct ifnet *ifp = adapter->ifp;
2494 uint32_t feat = adapter->tx_offload_cap;
2495 int cap = if_getcapenable(ifp);
2496 int flags = 0;
2497
2498 if_clearhwassist(ifp);
2499
2500 if ((cap & (IFCAP_CSUM_IPv4_Tx|IFCAP_CSUM_TCPv4_Tx|IFCAP_CSUM_UDPv4_Tx))
2501 != 0) {
2502 if ((feat &
2503 ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L3_CSUM_IPV4_MASK) != 0)
2504 flags |= M_CSUM_IPv4;
2505 if ((feat &
2506 (ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_FULL_MASK |
2507 ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_PART_MASK)) != 0)
2508 flags |= M_CSUM_TCPv4 | M_CSUM_UDPv4;
2509 }
2510
2511 if ((cap & IFCAP_CSUM_TCPv6_Tx) != 0)
2512 flags |= M_CSUM_TCPv6;
2513
2514 if ((cap & IFCAP_CSUM_UDPv6_Tx) != 0)
2515 flags |= M_CSUM_UDPv6;
2516
2517 if ((cap & IFCAP_TSOv4) != 0)
2518 flags |= M_CSUM_TSOv4;
2519
2520 if ((cap & IFCAP_TSOv6) != 0)
2521 flags |= M_CSUM_TSOv6;
2522
2523 if_sethwassistbits(ifp, flags, 0);
2524 }
2525
2526 static int
2527 ena_setup_ifnet(device_t pdev, struct ena_adapter *adapter,
2528 struct ena_com_dev_get_features_ctx *feat)
2529 {
2530 struct ifnet *ifp;
2531 int caps = 0;
2532
2533 ifp = adapter->ifp = &adapter->sc_ec.ec_if;
2534 if (unlikely(ifp == NULL)) {
2535 ena_trace(ENA_ALERT, "can not allocate ifnet structure\n");
2536 return (ENXIO);
2537 }
2538 if_initname(ifp, "ena", device_unit(pdev));
2539 if_setdev(ifp, pdev);
2540 if_setsoftc(ifp, adapter);
2541
2542 if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST);
2543 if_setinitfn(ifp, ena_init);
2544 if_settransmitfn(ifp, ena_mq_start);
2545 #if 0
2546 if_setqflushfn(ifp, ena_qflush);
2547 #endif
2548 if_setioctlfn(ifp, ena_ioctl);
2549 #if 0
2550 if_setgetcounterfn(ifp, ena_get_counter);
2551 #endif
2552
2553 if_setsendqlen(ifp, adapter->tx_ring_size);
2554 if_setsendqready(ifp);
2555 if_setmtu(ifp, ETHERMTU);
2556 if_setbaudrate(ifp, 0);
2557 /* Zeroize capabilities... */
2558 if_setcapabilities(ifp, 0);
2559 if_setcapenable(ifp, 0);
2560 /* check hardware support */
2561 caps = ena_get_dev_offloads(feat);
2562 /* ... and set them */
2563 if_setcapabilitiesbit(ifp, caps, 0);
2564 adapter->sc_ec.ec_capabilities |= ETHERCAP_JUMBO_MTU;
2565
2566 #if 0
2567 /* TSO parameters */
2568 /* XXX no limits on NetBSD, guarded by virtue of dmamap load failing */
2569 ifp->if_hw_tsomax = ENA_TSO_MAXSIZE -
2570 (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN);
2571 ifp->if_hw_tsomaxsegcount = adapter->max_tx_sgl_size - 1;
2572 ifp->if_hw_tsomaxsegsize = ENA_TSO_MAXSIZE;
2573 #endif
2574
2575 if_setifheaderlen(ifp, sizeof(struct ether_vlan_header));
2576 if_setcapenable(ifp, if_getcapabilities(ifp));
2577
2578 /*
2579 * Specify the media types supported by this adapter and register
2580 * callbacks to update media and link information
2581 */
2582 adapter->sc_ec.ec_ifmedia = &adapter->media;
2583 ifmedia_init(&adapter->media, IFM_IMASK,
2584 ena_media_change, ena_media_status);
2585 ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2586 ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2587
2588 if_attach(ifp);
2589 if_deferred_start_init(ifp, NULL);
2590
2591 ether_ifattach(ifp, adapter->mac_addr);
2592
2593 return (0);
2594 }
2595
2596 static void
2597 ena_down(struct ena_adapter *adapter)
2598 {
2599 int rc;
2600
2601 if (adapter->up) {
2602 device_printf(adapter->pdev, "device is going DOWN\n");
2603
2604 callout_halt(&adapter->timer_service, &adapter->global_mtx);
2605
2606 adapter->up = false;
2607 if_setdrvflagbits(adapter->ifp, IFF_OACTIVE,
2608 IFF_RUNNING);
2609
2610 ena_free_io_irq(adapter);
2611
2612 if (adapter->trigger_reset) {
2613 rc = ena_com_dev_reset(adapter->ena_dev,
2614 adapter->reset_reason);
2615 if (unlikely(rc != 0))
2616 device_printf(adapter->pdev,
2617 "Device reset failed\n");
2618 }
2619
2620 ena_destroy_all_io_queues(adapter);
2621
2622 ena_free_all_tx_bufs(adapter);
2623 ena_free_all_rx_bufs(adapter);
2624 ena_free_all_tx_resources(adapter);
2625 ena_free_all_rx_resources(adapter);
2626
2627 counter_u64_add(adapter->dev_stats.interface_down, 1);
2628 }
2629 }
2630
2631 static void
2632 ena_tx_csum(struct ena_com_tx_ctx *ena_tx_ctx, struct mbuf *mbuf)
2633 {
2634 struct ena_com_tx_meta *ena_meta;
2635 struct ether_vlan_header *eh;
2636 u32 mss;
2637 bool offload;
2638 uint16_t etype;
2639 int ehdrlen;
2640 struct ip *ip;
2641 int iphlen;
2642 struct tcphdr *th;
2643
2644 offload = false;
2645 ena_meta = &ena_tx_ctx->ena_meta;
2646
2647 #if 0
2648 u32 mss = mbuf->m_pkthdr.tso_segsz;
2649
2650 if (mss != 0)
2651 offload = true;
2652 #else
2653 mss = mbuf->m_pkthdr.len; /* XXX don't have tso_segsz */
2654 #endif
2655
2656 if ((mbuf->m_pkthdr.csum_flags & (M_CSUM_TSOv4 | M_CSUM_TSOv6)) != 0)
2657 offload = true;
2658
2659 if ((mbuf->m_pkthdr.csum_flags & CSUM_OFFLOAD) != 0)
2660 offload = true;
2661
2662 if (!offload) {
2663 ena_tx_ctx->meta_valid = 0;
2664 return;
2665 }
2666
2667 /* Determine where frame payload starts. */
2668 eh = mtod(mbuf, struct ether_vlan_header *);
2669 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
2670 etype = ntohs(eh->evl_proto);
2671 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
2672 } else {
2673 etype = htons(eh->evl_encap_proto);
2674 ehdrlen = ETHER_HDR_LEN;
2675 }
2676
2677 ip = (struct ip *)(mbuf->m_data + ehdrlen);
2678 iphlen = ip->ip_hl << 2;
2679 th = (struct tcphdr *)((vaddr_t)ip + iphlen);
2680
2681 if ((mbuf->m_pkthdr.csum_flags & M_CSUM_IPv4) != 0) {
2682 ena_tx_ctx->l3_csum_enable = 1;
2683 }
2684 if ((mbuf->m_pkthdr.csum_flags & (M_CSUM_TSOv4 | M_CSUM_TSOv6)) != 0) {
2685 ena_tx_ctx->tso_enable = 1;
2686 ena_meta->l4_hdr_len = (th->th_off);
2687 }
2688
2689 switch (etype) {
2690 case ETHERTYPE_IP:
2691 ena_tx_ctx->l3_proto = ENA_ETH_IO_L3_PROTO_IPV4;
2692 if ((ip->ip_off & htons(IP_DF)) != 0)
2693 ena_tx_ctx->df = 1;
2694 break;
2695 case ETHERTYPE_IPV6:
2696 ena_tx_ctx->l3_proto = ENA_ETH_IO_L3_PROTO_IPV6;
2697
2698 default:
2699 break;
2700 }
2701
2702 if (ip->ip_p == IPPROTO_TCP) {
2703 ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_TCP;
2704 if ((mbuf->m_pkthdr.csum_flags &
2705 (M_CSUM_TCPv4 | M_CSUM_TCPv6)) != 0)
2706 ena_tx_ctx->l4_csum_enable = 1;
2707 else
2708 ena_tx_ctx->l4_csum_enable = 0;
2709 } else if (ip->ip_p == IPPROTO_UDP) {
2710 ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_UDP;
2711 if ((mbuf->m_pkthdr.csum_flags &
2712 (M_CSUM_UDPv4 | M_CSUM_UDPv6)) != 0)
2713 ena_tx_ctx->l4_csum_enable = 1;
2714 else
2715 ena_tx_ctx->l4_csum_enable = 0;
2716 } else {
2717 ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_UNKNOWN;
2718 ena_tx_ctx->l4_csum_enable = 0;
2719 }
2720
2721 ena_meta->mss = mss;
2722 ena_meta->l3_hdr_len = iphlen;
2723 ena_meta->l3_hdr_offset = ehdrlen;
2724 ena_tx_ctx->meta_valid = 1;
2725 }
2726
2727 static int
2728 ena_check_and_collapse_mbuf(struct ena_ring *tx_ring, struct mbuf **mbuf)
2729 {
2730 struct ena_adapter *adapter;
2731 struct mbuf *collapsed_mbuf;
2732 int num_frags;
2733
2734 adapter = tx_ring->adapter;
2735 num_frags = ena_mbuf_count(*mbuf);
2736
2737 /* One segment must be reserved for configuration descriptor. */
2738 if (num_frags < adapter->max_tx_sgl_size)
2739 return (0);
2740 counter_u64_add(tx_ring->tx_stats.collapse, 1);
2741
2742 collapsed_mbuf = m_collapse(*mbuf, M_NOWAIT,
2743 adapter->max_tx_sgl_size - 1);
2744 if (unlikely(collapsed_mbuf == NULL)) {
2745 counter_u64_add(tx_ring->tx_stats.collapse_err, 1);
2746 return (ENOMEM);
2747 }
2748
2749 /* If mbuf was collapsed succesfully, original mbuf is released. */
2750 *mbuf = collapsed_mbuf;
2751
2752 return (0);
2753 }
2754
2755 static int
2756 ena_xmit_mbuf(struct ena_ring *tx_ring, struct mbuf **mbuf)
2757 {
2758 struct ena_adapter *adapter;
2759 struct ena_tx_buffer *tx_info;
2760 struct ena_com_tx_ctx ena_tx_ctx;
2761 struct ena_com_dev *ena_dev;
2762 struct ena_com_buf *ena_buf;
2763 struct ena_com_io_sq* io_sq;
2764 void *push_hdr;
2765 uint16_t next_to_use;
2766 uint16_t req_id;
2767 uint16_t ena_qid;
2768 uint32_t header_len;
2769 int i, rc;
2770 int nb_hw_desc;
2771
2772 ena_qid = ENA_IO_TXQ_IDX(tx_ring->que->id);
2773 adapter = tx_ring->que->adapter;
2774 ena_dev = adapter->ena_dev;
2775 io_sq = &ena_dev->io_sq_queues[ena_qid];
2776
2777 rc = ena_check_and_collapse_mbuf(tx_ring, mbuf);
2778 if (unlikely(rc != 0)) {
2779 ena_trace(ENA_WARNING,
2780 "Failed to collapse mbuf! err: %d", rc);
2781 return (rc);
2782 }
2783
2784 next_to_use = tx_ring->next_to_use;
2785 req_id = tx_ring->free_tx_ids[next_to_use];
2786 tx_info = &tx_ring->tx_buffer_info[req_id];
2787
2788 tx_info->mbuf = *mbuf;
2789 tx_info->num_of_bufs = 0;
2790
2791 ena_buf = tx_info->bufs;
2792
2793 ena_trace(ENA_DBG | ENA_TXPTH, "Tx: %d bytes", (*mbuf)->m_pkthdr.len);
2794
2795 /*
2796 * header_len is just a hint for the device. Because FreeBSD is not
2797 * giving us information about packet header length and it is not
2798 * guaranteed that all packet headers will be in the 1st mbuf, setting
2799 * header_len to 0 is making the device ignore this value and resolve
2800 * header on it's own.
2801 */
2802 header_len = 0;
2803 push_hdr = NULL;
2804
2805 rc = bus_dmamap_load_mbuf(adapter->sc_dmat, tx_info->map,
2806 *mbuf, BUS_DMA_NOWAIT);
2807
2808 if (unlikely((rc != 0) || (tx_info->map->dm_nsegs == 0))) {
2809 ena_trace(ENA_WARNING,
2810 "dmamap load failed! err: %d nsegs: %d", rc,
2811 tx_info->map->dm_nsegs);
2812 counter_u64_add(tx_ring->tx_stats.dma_mapping_err, 1);
2813 tx_info->mbuf = NULL;
2814 if (rc == ENOMEM)
2815 return (ENA_COM_NO_MEM);
2816 else
2817 return (ENA_COM_INVAL);
2818 }
2819
2820 for (i = 0; i < tx_info->map->dm_nsegs; i++) {
2821 ena_buf->len = tx_info->map->dm_segs[i].ds_len;
2822 ena_buf->paddr = tx_info->map->dm_segs[i].ds_addr;
2823 ena_buf++;
2824 }
2825 tx_info->num_of_bufs = tx_info->map->dm_nsegs;
2826
2827 memset(&ena_tx_ctx, 0x0, sizeof(struct ena_com_tx_ctx));
2828 ena_tx_ctx.ena_bufs = tx_info->bufs;
2829 ena_tx_ctx.push_header = push_hdr;
2830 ena_tx_ctx.num_bufs = tx_info->num_of_bufs;
2831 ena_tx_ctx.req_id = req_id;
2832 ena_tx_ctx.header_len = header_len;
2833
2834 /* Set flags and meta data */
2835 ena_tx_csum(&ena_tx_ctx, *mbuf);
2836 /* Prepare the packet's descriptors and send them to device */
2837 rc = ena_com_prepare_tx(io_sq, &ena_tx_ctx, &nb_hw_desc);
2838 if (unlikely(rc != 0)) {
2839 device_printf(adapter->pdev, "failed to prepare tx bufs\n");
2840 counter_u64_add(tx_ring->tx_stats.prepare_ctx_err, 1);
2841 goto dma_error;
2842 }
2843
2844 counter_enter();
2845 counter_u64_add_protected(tx_ring->tx_stats.cnt, 1);
2846 counter_u64_add_protected(tx_ring->tx_stats.bytes,
2847 (*mbuf)->m_pkthdr.len);
2848
2849 counter_u64_add_protected(adapter->hw_stats.tx_packets, 1);
2850 counter_u64_add_protected(adapter->hw_stats.tx_bytes,
2851 (*mbuf)->m_pkthdr.len);
2852 counter_exit();
2853
2854 tx_info->tx_descs = nb_hw_desc;
2855 getbinuptime(&tx_info->timestamp);
2856 tx_info->print_once = true;
2857
2858 tx_ring->next_to_use = ENA_TX_RING_IDX_NEXT(next_to_use,
2859 tx_ring->ring_size);
2860
2861 bus_dmamap_sync(adapter->sc_dmat, tx_info->map, 0,
2862 tx_info->map->dm_mapsize, BUS_DMASYNC_PREWRITE);
2863
2864 return (0);
2865
2866 dma_error:
2867 tx_info->mbuf = NULL;
2868 bus_dmamap_unload(adapter->sc_dmat, tx_info->map);
2869
2870 return (rc);
2871 }
2872
2873 static void
2874 ena_start_xmit(struct ena_ring *tx_ring)
2875 {
2876 struct mbuf *mbuf;
2877 struct ena_adapter *adapter = tx_ring->adapter;
2878 struct ena_com_io_sq* io_sq;
2879 int ena_qid;
2880 int acum_pkts = 0;
2881 int ret = 0;
2882
2883 if (unlikely((if_getdrvflags(adapter->ifp) & IFF_RUNNING) == 0))
2884 return;
2885
2886 if (unlikely(!adapter->link_status))
2887 return;
2888
2889 ena_qid = ENA_IO_TXQ_IDX(tx_ring->que->id);
2890 io_sq = &adapter->ena_dev->io_sq_queues[ena_qid];
2891
2892 while ((mbuf = drbr_peek(adapter->ifp, tx_ring->br)) != NULL) {
2893 ena_trace(ENA_DBG | ENA_TXPTH, "\ndequeued mbuf %p with flags %#x and"
2894 " header csum flags %#jx",
2895 mbuf, mbuf->m_flags, (uint64_t)mbuf->m_pkthdr.csum_flags);
2896
2897 if (unlikely(!ena_com_sq_have_enough_space(io_sq,
2898 ENA_TX_CLEANUP_THRESHOLD)))
2899 ena_tx_cleanup(tx_ring);
2900
2901 if (unlikely((ret = ena_xmit_mbuf(tx_ring, &mbuf)) != 0)) {
2902 if (ret == ENA_COM_NO_MEM) {
2903 drbr_putback(adapter->ifp, tx_ring->br, mbuf);
2904 } else if (ret == ENA_COM_NO_SPACE) {
2905 drbr_putback(adapter->ifp, tx_ring->br, mbuf);
2906 } else {
2907 m_freem(mbuf);
2908 drbr_advance(adapter->ifp, tx_ring->br);
2909 }
2910
2911 break;
2912 }
2913
2914 drbr_advance(adapter->ifp, tx_ring->br);
2915
2916 if (unlikely((if_getdrvflags(adapter->ifp) &
2917 IFF_RUNNING) == 0))
2918 return;
2919
2920 acum_pkts++;
2921
2922 /*
2923 * If there's a BPF listener, bounce a copy of this frame
2924 * to him.
2925 */
2926 bpf_mtap(adapter->ifp, mbuf, BPF_D_OUT);
2927
2928 if (unlikely(acum_pkts == DB_THRESHOLD)) {
2929 acum_pkts = 0;
2930 wmb();
2931 /* Trigger the dma engine */
2932 ena_com_write_sq_doorbell(io_sq);
2933 counter_u64_add(tx_ring->tx_stats.doorbells, 1);
2934 }
2935
2936 }
2937
2938 if (likely(acum_pkts != 0)) {
2939 wmb();
2940 /* Trigger the dma engine */
2941 ena_com_write_sq_doorbell(io_sq);
2942 counter_u64_add(tx_ring->tx_stats.doorbells, 1);
2943 }
2944
2945 if (!ena_com_sq_have_enough_space(io_sq, ENA_TX_CLEANUP_THRESHOLD))
2946 ena_tx_cleanup(tx_ring);
2947 }
2948
2949 static void
2950 ena_deferred_mq_start(struct work *wk, void *arg)
2951 {
2952 struct ena_ring *tx_ring = (struct ena_ring *)arg;
2953 struct ifnet *ifp = tx_ring->adapter->ifp;
2954
2955 atomic_swap_uint(&tx_ring->task_pending, 0);
2956
2957 while (!drbr_empty(ifp, tx_ring->br) &&
2958 (if_getdrvflags(ifp) & IFF_RUNNING) != 0) {
2959 ENA_RING_MTX_LOCK(tx_ring);
2960 ena_start_xmit(tx_ring);
2961 ENA_RING_MTX_UNLOCK(tx_ring);
2962 }
2963 }
2964
2965 static int
2966 ena_mq_start(struct ifnet *ifp, struct mbuf *m)
2967 {
2968 struct ena_adapter *adapter = ifp->if_softc;
2969 struct ena_ring *tx_ring;
2970 int ret, is_drbr_empty;
2971 uint32_t i;
2972
2973 if (unlikely((if_getdrvflags(adapter->ifp) & IFF_RUNNING) == 0))
2974 return (ENODEV);
2975
2976 /* Which queue to use */
2977 /*
2978 * If everything is setup correctly, it should be the
2979 * same bucket that the current CPU we're on is.
2980 * It should improve performance.
2981 */
2982 #if 0
2983 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
2984 #ifdef RSS
2985 if (rss_hash2bucket(m->m_pkthdr.flowid,
2986 M_HASHTYPE_GET(m), &i) == 0) {
2987 i = i % adapter->num_queues;
2988
2989 } else
2990 #endif
2991 {
2992 i = m->m_pkthdr.flowid % adapter->num_queues;
2993 }
2994 } else {
2995 #endif
2996 i = cpu_index(curcpu()) % adapter->num_queues;
2997 #if 0
2998 }
2999 #endif
3000 tx_ring = &adapter->tx_ring[i];
3001
3002 /* Check if drbr is empty before putting packet */
3003 is_drbr_empty = drbr_empty(ifp, tx_ring->br);
3004 ret = drbr_enqueue(ifp, tx_ring->br, m);
3005 if (unlikely(ret != 0)) {
3006 if (atomic_cas_uint(&tx_ring->task_pending, 0, 1) == 0)
3007 workqueue_enqueue(tx_ring->enqueue_tq, &tx_ring->enqueue_task,
3008 curcpu());
3009 return (ret);
3010 }
3011
3012 if ((is_drbr_empty != 0) && (ENA_RING_MTX_TRYLOCK(tx_ring) != 0)) {
3013 ena_start_xmit(tx_ring);
3014 ENA_RING_MTX_UNLOCK(tx_ring);
3015 } else {
3016 if (atomic_cas_uint(&tx_ring->task_pending, 0, 1) == 0)
3017 workqueue_enqueue(tx_ring->enqueue_tq, &tx_ring->enqueue_task,
3018 curcpu());
3019 }
3020
3021 return (0);
3022 }
3023
3024 #if 0
3025 static void
3026 ena_qflush(struct ifnet *ifp)
3027 {
3028 struct ena_adapter *adapter = ifp->if_softc;
3029 struct ena_ring *tx_ring = adapter->tx_ring;
3030 int i;
3031
3032 for(i = 0; i < adapter->num_queues; ++i, ++tx_ring)
3033 if (!drbr_empty(ifp, tx_ring->br)) {
3034 ENA_RING_MTX_LOCK(tx_ring);
3035 drbr_flush(ifp, tx_ring->br);
3036 ENA_RING_MTX_UNLOCK(tx_ring);
3037 }
3038
3039 if_qflush(ifp);
3040 }
3041 #endif
3042
3043 static int
3044 ena_calc_io_queue_num(struct pci_attach_args *pa,
3045 struct ena_adapter *adapter,
3046 struct ena_com_dev_get_features_ctx *get_feat_ctx)
3047 {
3048 int io_sq_num, io_cq_num, io_queue_num;
3049
3050 io_sq_num = get_feat_ctx->max_queues.max_sq_num;
3051 io_cq_num = get_feat_ctx->max_queues.max_cq_num;
3052
3053 io_queue_num = min_t(int, mp_ncpus, ENA_MAX_NUM_IO_QUEUES);
3054 io_queue_num = min_t(int, io_queue_num, io_sq_num);
3055 io_queue_num = min_t(int, io_queue_num, io_cq_num);
3056 /* 1 IRQ for for mgmnt and 1 IRQ for each TX/RX pair */
3057 io_queue_num = min_t(int, io_queue_num,
3058 pci_msix_count(pa->pa_pc, pa->pa_tag) - 1);
3059 #ifdef RSS
3060 io_queue_num = min_t(int, io_queue_num, rss_getnumbuckets());
3061 #endif
3062
3063 return (io_queue_num);
3064 }
3065
3066 static int
3067 ena_calc_queue_size(struct ena_adapter *adapter, uint16_t *max_tx_sgl_size,
3068 uint16_t *max_rx_sgl_size, struct ena_com_dev_get_features_ctx *feat)
3069 {
3070 uint32_t queue_size = ENA_DEFAULT_RING_SIZE;
3071 uint32_t v;
3072 uint32_t q;
3073
3074 queue_size = min_t(uint32_t, queue_size,
3075 feat->max_queues.max_cq_depth);
3076 queue_size = min_t(uint32_t, queue_size,
3077 feat->max_queues.max_sq_depth);
3078
3079 /* round down to the nearest power of 2 */
3080 v = queue_size;
3081 while (v != 0) {
3082 if (powerof2(queue_size) != 0)
3083 break;
3084 v /= 2;
3085 q = rounddown2(queue_size, v);
3086 if (q != 0) {
3087 queue_size = q;
3088 break;
3089 }
3090 }
3091
3092 if (unlikely(queue_size == 0)) {
3093 device_printf(adapter->pdev, "Invalid queue size\n");
3094 return (ENA_COM_FAULT);
3095 }
3096
3097 *max_tx_sgl_size = min_t(uint16_t, ENA_PKT_MAX_BUFS,
3098 feat->max_queues.max_packet_tx_descs);
3099 *max_rx_sgl_size = min_t(uint16_t, ENA_PKT_MAX_BUFS,
3100 feat->max_queues.max_packet_rx_descs);
3101
3102 return (queue_size);
3103 }
3104
3105 #if 0
3106 static int
3107 ena_rss_init_default(struct ena_adapter *adapter)
3108 {
3109 struct ena_com_dev *ena_dev = adapter->ena_dev;
3110 device_t dev = adapter->pdev;
3111 int qid, rc, i;
3112
3113 rc = ena_com_rss_init(ena_dev, ENA_RX_RSS_TABLE_LOG_SIZE);
3114 if (unlikely(rc != 0)) {
3115 device_printf(dev, "Cannot init indirect table\n");
3116 return (rc);
3117 }
3118
3119 for (i = 0; i < ENA_RX_RSS_TABLE_SIZE; i++) {
3120 #ifdef RSS
3121 qid = rss_get_indirection_to_bucket(i);
3122 qid = qid % adapter->num_queues;
3123 #else
3124 qid = i % adapter->num_queues;
3125 #endif
3126 rc = ena_com_indirect_table_fill_entry(ena_dev, i,
3127 ENA_IO_RXQ_IDX(qid));
3128 if (unlikely((rc != 0) && (rc != EOPNOTSUPP))) {
3129 device_printf(dev, "Cannot fill indirect table\n");
3130 goto err_rss_destroy;
3131 }
3132 }
3133
3134 rc = ena_com_fill_hash_function(ena_dev, ENA_ADMIN_CRC32, NULL,
3135 ENA_HASH_KEY_SIZE, 0xFFFFFFFF);
3136 if (unlikely((rc != 0) && (rc != EOPNOTSUPP))) {
3137 device_printf(dev, "Cannot fill hash function\n");
3138 goto err_rss_destroy;
3139 }
3140
3141 rc = ena_com_set_default_hash_ctrl(ena_dev);
3142 if (unlikely((rc != 0) && (rc != EOPNOTSUPP))) {
3143 device_printf(dev, "Cannot fill hash control\n");
3144 goto err_rss_destroy;
3145 }
3146
3147 return (0);
3148
3149 err_rss_destroy:
3150 ena_com_rss_destroy(ena_dev);
3151 return (rc);
3152 }
3153
3154 static void
3155 ena_rss_init_default_deferred(void *arg)
3156 {
3157 struct ena_adapter *adapter;
3158 devclass_t dc;
3159 int max;
3160 int rc;
3161
3162 dc = devclass_find("ena");
3163 if (unlikely(dc == NULL)) {
3164 ena_trace(ENA_ALERT, "No devclass ena\n");
3165 return;
3166 }
3167
3168 max = devclass_get_maxunit(dc);
3169 while (max-- >= 0) {
3170 adapter = devclass_get_softc(dc, max);
3171 if (adapter != NULL) {
3172 rc = ena_rss_init_default(adapter);
3173 adapter->rss_support = true;
3174 if (unlikely(rc != 0)) {
3175 device_printf(adapter->pdev,
3176 "WARNING: RSS was not properly initialized,"
3177 " it will affect bandwidth\n");
3178 adapter->rss_support = false;
3179 }
3180 }
3181 }
3182 }
3183 SYSINIT(ena_rss_init, SI_SUB_KICK_SCHEDULER, SI_ORDER_SECOND, ena_rss_init_default_deferred, NULL);
3184 #endif
3185
3186 static void
3187 ena_config_host_info(struct ena_com_dev *ena_dev)
3188 {
3189 struct ena_admin_host_info *host_info;
3190 int rc;
3191
3192 /* Allocate only the host info */
3193 rc = ena_com_allocate_host_info(ena_dev);
3194 if (unlikely(rc != 0)) {
3195 ena_trace(ENA_ALERT, "Cannot allocate host info\n");
3196 return;
3197 }
3198
3199 host_info = ena_dev->host_attr.host_info;
3200
3201 host_info->os_type = ENA_ADMIN_OS_FREEBSD;
3202 host_info->kernel_ver = osreldate;
3203
3204 snprintf(host_info->kernel_ver_str, sizeof(host_info->kernel_ver_str),
3205 "%d", osreldate);
3206 host_info->os_dist = 0;
3207 strncpy(host_info->os_dist_str, osrelease,
3208 sizeof(host_info->os_dist_str) - 1);
3209
3210 host_info->driver_version =
3211 (DRV_MODULE_VER_MAJOR) |
3212 (DRV_MODULE_VER_MINOR << ENA_ADMIN_HOST_INFO_MINOR_SHIFT) |
3213 (DRV_MODULE_VER_SUBMINOR << ENA_ADMIN_HOST_INFO_SUB_MINOR_SHIFT);
3214
3215 rc = ena_com_set_host_attributes(ena_dev);
3216 if (unlikely(rc != 0)) {
3217 if (rc == EOPNOTSUPP)
3218 ena_trace(ENA_WARNING, "Cannot set host attributes\n");
3219 else
3220 ena_trace(ENA_ALERT, "Cannot set host attributes\n");
3221
3222 goto err;
3223 }
3224
3225 return;
3226
3227 err:
3228 ena_com_delete_host_info(ena_dev);
3229 }
3230
3231 static int
3232 ena_device_init(struct ena_adapter *adapter, device_t pdev,
3233 struct ena_com_dev_get_features_ctx *get_feat_ctx, int *wd_active)
3234 {
3235 struct ena_com_dev* ena_dev = adapter->ena_dev;
3236 bool readless_supported;
3237 uint32_t aenq_groups;
3238 int dma_width;
3239 int rc;
3240
3241 rc = ena_com_mmio_reg_read_request_init(ena_dev);
3242 if (unlikely(rc != 0)) {
3243 device_printf(pdev, "failed to init mmio read less\n");
3244 return (rc);
3245 }
3246
3247 /*
3248 * The PCIe configuration space revision id indicate if mmio reg
3249 * read is disabled
3250 */
3251 const int rev = PCI_REVISION(adapter->sc_pa.pa_class);
3252 readless_supported = ((rev & ENA_MMIO_DISABLE_REG_READ) == 0);
3253 ena_com_set_mmio_read_mode(ena_dev, readless_supported);
3254
3255 rc = ena_com_dev_reset(ena_dev, ENA_REGS_RESET_NORMAL);
3256 if (unlikely(rc != 0)) {
3257 device_printf(pdev, "Can not reset device\n");
3258 goto err_mmio_read_less;
3259 }
3260
3261 rc = ena_com_validate_version(ena_dev);
3262 if (unlikely(rc != 0)) {
3263 device_printf(pdev, "device version is too low\n");
3264 goto err_mmio_read_less;
3265 }
3266
3267 dma_width = ena_com_get_dma_width(ena_dev);
3268 if (unlikely(dma_width < 0)) {
3269 device_printf(pdev, "Invalid dma width value %d", dma_width);
3270 rc = dma_width;
3271 goto err_mmio_read_less;
3272 }
3273 adapter->dma_width = dma_width;
3274
3275 /* ENA admin level init */
3276 rc = ena_com_admin_init(ena_dev, &aenq_handlers, true);
3277 if (unlikely(rc != 0)) {
3278 device_printf(pdev,
3279 "Can not initialize ena admin queue with device\n");
3280 goto err_mmio_read_less;
3281 }
3282
3283 /*
3284 * To enable the msix interrupts the driver needs to know the number
3285 * of queues. So the driver uses polling mode to retrieve this
3286 * information
3287 */
3288 ena_com_set_admin_polling_mode(ena_dev, true);
3289
3290 ena_config_host_info(ena_dev);
3291
3292 /* Get Device Attributes */
3293 rc = ena_com_get_dev_attr_feat(ena_dev, get_feat_ctx);
3294 if (unlikely(rc != 0)) {
3295 device_printf(pdev,
3296 "Cannot get attribute for ena device rc: %d\n", rc);
3297 goto err_admin_init;
3298 }
3299
3300 aenq_groups = BIT(ENA_ADMIN_LINK_CHANGE) | BIT(ENA_ADMIN_KEEP_ALIVE);
3301
3302 aenq_groups &= get_feat_ctx->aenq.supported_groups;
3303 rc = ena_com_set_aenq_config(ena_dev, aenq_groups);
3304 if (unlikely(rc != 0)) {
3305 device_printf(pdev, "Cannot configure aenq groups rc: %d\n", rc);
3306 goto err_admin_init;
3307 }
3308
3309 *wd_active = !!(aenq_groups & BIT(ENA_ADMIN_KEEP_ALIVE));
3310
3311 return (0);
3312
3313 err_admin_init:
3314 ena_com_delete_host_info(ena_dev);
3315 ena_com_admin_destroy(ena_dev);
3316 err_mmio_read_less:
3317 ena_com_mmio_reg_read_request_destroy(ena_dev);
3318
3319 return (rc);
3320 }
3321
3322 static int ena_enable_msix_and_set_admin_interrupts(struct ena_adapter *adapter,
3323 int io_vectors)
3324 {
3325 struct ena_com_dev *ena_dev = adapter->ena_dev;
3326 int rc;
3327
3328 rc = ena_enable_msix(adapter);
3329 if (unlikely(rc != 0)) {
3330 device_printf(adapter->pdev, "Error with MSI-X enablement\n");
3331 return (rc);
3332 }
3333
3334 rc = ena_request_mgmnt_irq(adapter);
3335 if (unlikely(rc != 0)) {
3336 device_printf(adapter->pdev, "Cannot setup mgmnt queue intr\n");
3337 goto err_disable_msix;
3338 }
3339
3340 ena_com_set_admin_polling_mode(ena_dev, false);
3341
3342 ena_com_admin_aenq_enable(ena_dev);
3343
3344 return (0);
3345
3346 err_disable_msix:
3347 ena_disable_msix(adapter);
3348
3349 return (rc);
3350 }
3351
3352 /* Function called on ENA_ADMIN_KEEP_ALIVE event */
3353 static void ena_keep_alive_wd(void *adapter_data,
3354 struct ena_admin_aenq_entry *aenq_e)
3355 {
3356 struct ena_adapter *adapter = (struct ena_adapter *)adapter_data;
3357 struct ena_admin_aenq_keep_alive_desc *desc;
3358 sbintime_t stime;
3359 uint64_t rx_drops;
3360
3361 desc = (struct ena_admin_aenq_keep_alive_desc *)aenq_e;
3362
3363 rx_drops = ((uint64_t)desc->rx_drops_high << 32) | desc->rx_drops_low;
3364 counter_u64_zero(adapter->hw_stats.rx_drops);
3365 counter_u64_add(adapter->hw_stats.rx_drops, rx_drops);
3366
3367 stime = getsbinuptime();
3368 (void) atomic_swap_64(&adapter->keep_alive_timestamp, stime);
3369 }
3370
3371 /* Check for keep alive expiration */
3372 static void check_for_missing_keep_alive(struct ena_adapter *adapter)
3373 {
3374 sbintime_t timestamp, time;
3375
3376 if (adapter->wd_active == 0)
3377 return;
3378
3379 if (likely(adapter->keep_alive_timeout == 0))
3380 return;
3381
3382 /* FreeBSD uses atomic_load_acq_64() in place of the membar + read */
3383 membar_sync();
3384 timestamp = adapter->keep_alive_timestamp;
3385
3386 time = getsbinuptime() - timestamp;
3387 if (unlikely(time > adapter->keep_alive_timeout)) {
3388 device_printf(adapter->pdev,
3389 "Keep alive watchdog timeout.\n");
3390 counter_u64_add(adapter->dev_stats.wd_expired, 1);
3391 adapter->reset_reason = ENA_REGS_RESET_KEEP_ALIVE_TO;
3392 adapter->trigger_reset = true;
3393 }
3394 }
3395
3396 /* Check if admin queue is enabled */
3397 static void check_for_admin_com_state(struct ena_adapter *adapter)
3398 {
3399 if (unlikely(ena_com_get_admin_running_state(adapter->ena_dev) ==
3400 false)) {
3401 device_printf(adapter->pdev,
3402 "ENA admin queue is not in running state!\n");
3403 counter_u64_add(adapter->dev_stats.admin_q_pause, 1);
3404 adapter->reset_reason = ENA_REGS_RESET_ADMIN_TO;
3405 adapter->trigger_reset = true;
3406 }
3407 }
3408
3409 static int
3410 check_missing_comp_in_queue(struct ena_adapter *adapter,
3411 struct ena_ring *tx_ring)
3412 {
3413 struct bintime curtime, time;
3414 struct ena_tx_buffer *tx_buf;
3415 uint32_t missed_tx = 0;
3416 int i;
3417
3418 getbinuptime(&curtime);
3419
3420 for (i = 0; i < tx_ring->ring_size; i++) {
3421 tx_buf = &tx_ring->tx_buffer_info[i];
3422
3423 if (bintime_isset(&tx_buf->timestamp) == 0)
3424 continue;
3425
3426 time = curtime;
3427 bintime_sub(&time, &tx_buf->timestamp);
3428
3429 /* Check again if packet is still waiting */
3430 if (unlikely(bttosbt(time) > adapter->missing_tx_timeout)) {
3431
3432 if (!tx_buf->print_once)
3433 ena_trace(ENA_WARNING, "Found a Tx that wasn't "
3434 "completed on time, qid %d, index %d.\n",
3435 tx_ring->qid, i);
3436
3437 tx_buf->print_once = true;
3438 missed_tx++;
3439 counter_u64_add(tx_ring->tx_stats.missing_tx_comp, 1);
3440
3441 if (unlikely(missed_tx >
3442 adapter->missing_tx_threshold)) {
3443 device_printf(adapter->pdev,
3444 "The number of lost tx completion "
3445 "is above the threshold (%d > %d). "
3446 "Reset the device\n",
3447 missed_tx, adapter->missing_tx_threshold);
3448 adapter->reset_reason =
3449 ENA_REGS_RESET_MISS_TX_CMPL;
3450 adapter->trigger_reset = true;
3451 return (EIO);
3452 }
3453 }
3454 }
3455
3456 return (0);
3457 }
3458
3459 /*
3460 * Check for TX which were not completed on time.
3461 * Timeout is defined by "missing_tx_timeout".
3462 * Reset will be performed if number of incompleted
3463 * transactions exceeds "missing_tx_threshold".
3464 */
3465 static void
3466 check_for_missing_tx_completions(struct ena_adapter *adapter)
3467 {
3468 struct ena_ring *tx_ring;
3469 int i, budget, rc;
3470
3471 /* Make sure the driver doesn't turn the device in other process */
3472 rmb();
3473
3474 if (!adapter->up)
3475 return;
3476
3477 if (adapter->trigger_reset)
3478 return;
3479
3480 if (adapter->missing_tx_timeout == 0)
3481 return;
3482
3483 budget = adapter->missing_tx_max_queues;
3484
3485 for (i = adapter->next_monitored_tx_qid; i < adapter->num_queues; i++) {
3486 tx_ring = &adapter->tx_ring[i];
3487
3488 rc = check_missing_comp_in_queue(adapter, tx_ring);
3489 if (unlikely(rc != 0))
3490 return;
3491
3492 budget--;
3493 if (budget == 0) {
3494 i++;
3495 break;
3496 }
3497 }
3498
3499 adapter->next_monitored_tx_qid = i % adapter->num_queues;
3500 }
3501
3502 /* trigger deferred rx cleanup after 2 consecutive detections */
3503 #define EMPTY_RX_REFILL 2
3504 /* For the rare case where the device runs out of Rx descriptors and the
3505 * msix handler failed to refill new Rx descriptors (due to a lack of memory
3506 * for example).
3507 * This case will lead to a deadlock:
3508 * The device won't send interrupts since all the new Rx packets will be dropped
3509 * The msix handler won't allocate new Rx descriptors so the device won't be
3510 * able to send new packets.
3511 *
3512 * When such a situation is detected - execute rx cleanup task in another thread
3513 */
3514 static void
3515 check_for_empty_rx_ring(struct ena_adapter *adapter)
3516 {
3517 struct ena_ring *rx_ring;
3518 int i, refill_required;
3519
3520 if (!adapter->up)
3521 return;
3522
3523 if (adapter->trigger_reset)
3524 return;
3525
3526 for (i = 0; i < adapter->num_queues; i++) {
3527 rx_ring = &adapter->rx_ring[i];
3528
3529 refill_required = ena_com_free_desc(rx_ring->ena_com_io_sq);
3530 if (unlikely(refill_required == (rx_ring->ring_size - 1))) {
3531 rx_ring->empty_rx_queue++;
3532
3533 if (rx_ring->empty_rx_queue >= EMPTY_RX_REFILL) {
3534 counter_u64_add(rx_ring->rx_stats.empty_rx_ring,
3535 1);
3536
3537 device_printf(adapter->pdev,
3538 "trigger refill for ring %d\n", i);
3539
3540 if (atomic_cas_uint(&rx_ring->task_pending, 0, 1) == 0)
3541 workqueue_enqueue(rx_ring->cmpl_tq,
3542 &rx_ring->cmpl_task, curcpu());
3543 rx_ring->empty_rx_queue = 0;
3544 }
3545 } else {
3546 rx_ring->empty_rx_queue = 0;
3547 }
3548 }
3549 }
3550
3551 static void
3552 ena_timer_service(void *data)
3553 {
3554 struct ena_adapter *adapter = (struct ena_adapter *)data;
3555 struct ena_admin_host_info *host_info =
3556 adapter->ena_dev->host_attr.host_info;
3557
3558 check_for_missing_keep_alive(adapter);
3559
3560 check_for_admin_com_state(adapter);
3561
3562 check_for_missing_tx_completions(adapter);
3563
3564 check_for_empty_rx_ring(adapter);
3565
3566 if (host_info != NULL)
3567 ena_update_host_info(host_info, adapter->ifp);
3568
3569 if (unlikely(adapter->trigger_reset)) {
3570 device_printf(adapter->pdev, "Trigger reset is on\n");
3571 workqueue_enqueue(adapter->reset_tq, &adapter->reset_task,
3572 curcpu());
3573 return;
3574 }
3575
3576 /*
3577 * Schedule another timeout one second from now.
3578 */
3579 callout_schedule(&adapter->timer_service, hz);
3580 }
3581
3582 static void
3583 ena_reset_task(struct work *wk, void *arg)
3584 {
3585 struct ena_com_dev_get_features_ctx get_feat_ctx;
3586 struct ena_adapter *adapter = (struct ena_adapter *)arg;
3587 struct ena_com_dev *ena_dev = adapter->ena_dev;
3588 bool dev_up;
3589 int rc;
3590
3591 if (unlikely(!adapter->trigger_reset)) {
3592 device_printf(adapter->pdev,
3593 "device reset scheduled but trigger_reset is off\n");
3594 return;
3595 }
3596
3597 rw_enter(&adapter->ioctl_sx, RW_WRITER);
3598
3599 callout_halt(&adapter->timer_service, &adapter->global_mtx);
3600
3601 dev_up = adapter->up;
3602
3603 ena_com_set_admin_running_state(ena_dev, false);
3604 ena_down(adapter);
3605 ena_free_mgmnt_irq(adapter);
3606 ena_disable_msix(adapter);
3607 ena_com_abort_admin_commands(ena_dev);
3608 ena_com_wait_for_abort_completion(ena_dev);
3609 ena_com_admin_destroy(ena_dev);
3610 ena_com_mmio_reg_read_request_destroy(ena_dev);
3611
3612 adapter->reset_reason = ENA_REGS_RESET_NORMAL;
3613 adapter->trigger_reset = false;
3614
3615 /* Finished destroy part. Restart the device */
3616 rc = ena_device_init(adapter, adapter->pdev, &get_feat_ctx,
3617 &adapter->wd_active);
3618 if (unlikely(rc != 0)) {
3619 device_printf(adapter->pdev,
3620 "ENA device init failed! (err: %d)\n", rc);
3621 goto err_dev_free;
3622 }
3623
3624 /* XXX dealloc and realloc MSI-X, probably a waste */
3625 rc = ena_enable_msix_and_set_admin_interrupts(adapter,
3626 adapter->num_queues);
3627 if (unlikely(rc != 0)) {
3628 device_printf(adapter->pdev, "Enable MSI-X failed\n");
3629 goto err_com_free;
3630 }
3631
3632 /* If the interface was up before the reset bring it up */
3633 if (dev_up) {
3634 rc = ena_up(adapter);
3635 if (unlikely(rc != 0)) {
3636 device_printf(adapter->pdev,
3637 "Failed to create I/O queues\n");
3638 goto err_msix_free;
3639 }
3640 }
3641
3642 callout_reset(&adapter->timer_service, hz,
3643 ena_timer_service, (void *)adapter);
3644
3645 rw_exit(&adapter->ioctl_sx);
3646
3647 return;
3648
3649 err_msix_free:
3650 ena_free_mgmnt_irq(adapter);
3651 ena_disable_msix(adapter);
3652 err_com_free:
3653 ena_com_admin_destroy(ena_dev);
3654 err_dev_free:
3655 device_printf(adapter->pdev, "ENA reset failed!\n");
3656 adapter->running = false;
3657 rw_exit(&adapter->ioctl_sx);
3658 }
3659
3660 /**
3661 * ena_attach - Device Initialization Routine
3662 * @pdev: device information struct
3663 *
3664 * Returns 0 on success, otherwise on failure.
3665 *
3666 * ena_attach initializes an adapter identified by a device structure.
3667 * The OS initialization, configuring of the adapter private structure,
3668 * and a hardware reset occur.
3669 **/
3670 static void
3671 ena_attach(device_t parent, device_t self, void *aux)
3672 {
3673 struct pci_attach_args *pa = aux;
3674 struct ena_com_dev_get_features_ctx get_feat_ctx;
3675 static int version_printed;
3676 struct ena_adapter *adapter = device_private(self);
3677 struct ena_com_dev *ena_dev = NULL;
3678 uint16_t tx_sgl_size = 0;
3679 uint16_t rx_sgl_size = 0;
3680 pcireg_t reg;
3681 int io_queue_num;
3682 int queue_size;
3683 int rc;
3684
3685 adapter->pdev = self;
3686 adapter->ifp = &adapter->sc_ec.ec_if;
3687 adapter->sc_pa = *pa; /* used after attach for adapter reset too */
3688
3689 if (pci_dma64_available(pa))
3690 adapter->sc_dmat = pa->pa_dmat64;
3691 else
3692 adapter->sc_dmat = pa->pa_dmat;
3693
3694 pci_aprint_devinfo(pa, NULL);
3695
3696 reg = pci_conf_read(pa->pa_pc, pa->pa_tag, PCI_COMMAND_STATUS_REG);
3697 if ((reg & PCI_COMMAND_MASTER_ENABLE) == 0) {
3698 reg |= PCI_COMMAND_MASTER_ENABLE;
3699 pci_conf_write(pa->pa_pc, pa->pa_tag, PCI_COMMAND_STATUS_REG, reg);
3700 }
3701
3702 mutex_init(&adapter->global_mtx, MUTEX_DEFAULT, IPL_NET);
3703 rw_init(&adapter->ioctl_sx);
3704
3705 /* Set up the timer service */
3706 adapter->keep_alive_timeout = DEFAULT_KEEP_ALIVE_TO;
3707 adapter->missing_tx_timeout = DEFAULT_TX_CMP_TO;
3708 adapter->missing_tx_max_queues = DEFAULT_TX_MONITORED_QUEUES;
3709 adapter->missing_tx_threshold = DEFAULT_TX_CMP_THRESHOLD;
3710
3711 if (version_printed++ == 0)
3712 device_printf(parent, "%s\n", ena_version);
3713
3714 rc = ena_allocate_pci_resources(pa, adapter);
3715 if (unlikely(rc != 0)) {
3716 device_printf(parent, "PCI resource allocation failed!\n");
3717 ena_free_pci_resources(adapter);
3718 return;
3719 }
3720
3721 /* Allocate memory for ena_dev structure */
3722 ena_dev = malloc(sizeof(struct ena_com_dev), M_DEVBUF,
3723 M_WAITOK | M_ZERO);
3724
3725 adapter->ena_dev = ena_dev;
3726 ena_dev->dmadev = self;
3727 ena_dev->bus = malloc(sizeof(struct ena_bus), M_DEVBUF,
3728 M_WAITOK | M_ZERO);
3729
3730 /* Store register resources */
3731 ((struct ena_bus*)(ena_dev->bus))->reg_bar_t = adapter->sc_btag;
3732 ((struct ena_bus*)(ena_dev->bus))->reg_bar_h = adapter->sc_bhandle;
3733
3734 ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
3735
3736 /* Device initialization */
3737 rc = ena_device_init(adapter, self, &get_feat_ctx, &adapter->wd_active);
3738 if (unlikely(rc != 0)) {
3739 device_printf(self, "ENA device init failed! (err: %d)\n", rc);
3740 rc = ENXIO;
3741 goto err_bus_free;
3742 }
3743
3744 adapter->keep_alive_timestamp = getsbinuptime();
3745
3746 adapter->tx_offload_cap = get_feat_ctx.offload.tx;
3747
3748 /* Set for sure that interface is not up */
3749 adapter->up = false;
3750
3751 memcpy(adapter->mac_addr, get_feat_ctx.dev_attr.mac_addr,
3752 ETHER_ADDR_LEN);
3753
3754 /* calculate IO queue number to create */
3755 io_queue_num = ena_calc_io_queue_num(pa, adapter, &get_feat_ctx);
3756
3757 ENA_ASSERT(io_queue_num > 0, "Invalid queue number: %d\n",
3758 io_queue_num);
3759 adapter->num_queues = io_queue_num;
3760
3761 adapter->max_mtu = get_feat_ctx.dev_attr.max_mtu;
3762
3763 /* calculatre ring sizes */
3764 queue_size = ena_calc_queue_size(adapter,&tx_sgl_size,
3765 &rx_sgl_size, &get_feat_ctx);
3766 if (unlikely((queue_size <= 0) || (io_queue_num <= 0))) {
3767 rc = ENA_COM_FAULT;
3768 goto err_com_free;
3769 }
3770
3771 adapter->reset_reason = ENA_REGS_RESET_NORMAL;
3772
3773 adapter->tx_ring_size = queue_size;
3774 adapter->rx_ring_size = queue_size;
3775
3776 adapter->max_tx_sgl_size = tx_sgl_size;
3777 adapter->max_rx_sgl_size = rx_sgl_size;
3778
3779 #if 0
3780 /* set up dma tags for rx and tx buffers */
3781 rc = ena_setup_tx_dma_tag(adapter);
3782 if (unlikely(rc != 0)) {
3783 device_printf(self, "Failed to create TX DMA tag\n");
3784 goto err_com_free;
3785 }
3786
3787 rc = ena_setup_rx_dma_tag(adapter);
3788 if (unlikely(rc != 0)) {
3789 device_printf(self, "Failed to create RX DMA tag\n");
3790 goto err_tx_tag_free;
3791 }
3792 #endif
3793
3794 /* initialize rings basic information */
3795 device_printf(self, "initialize %d io queues\n", io_queue_num);
3796 ena_init_io_rings(adapter);
3797
3798 /* setup network interface */
3799 rc = ena_setup_ifnet(self, adapter, &get_feat_ctx);
3800 if (unlikely(rc != 0)) {
3801 device_printf(self, "Error with network interface setup\n");
3802 goto err_io_free;
3803 }
3804
3805 rc = ena_enable_msix_and_set_admin_interrupts(adapter, io_queue_num);
3806 if (unlikely(rc != 0)) {
3807 device_printf(self,
3808 "Failed to enable and set the admin interrupts\n");
3809 goto err_ifp_free;
3810 }
3811
3812 callout_init(&adapter->timer_service, CALLOUT_MPSAFE);
3813
3814 /* Initialize reset task queue */
3815 rc = workqueue_create(&adapter->reset_tq, "ena_reset_enq",
3816 ena_reset_task, adapter, 0, IPL_NET, WQ_PERCPU | WQ_MPSAFE);
3817 if (unlikely(rc != 0)) {
3818 ena_trace(ENA_ALERT,
3819 "Unable to create workqueue for reset task\n");
3820 goto err_ifp_free;
3821 }
3822
3823 /* Initialize statistics */
3824 ena_alloc_counters_dev(&adapter->dev_stats, io_queue_num);
3825 ena_alloc_counters_hwstats(&adapter->hw_stats, io_queue_num);
3826 #if 0
3827 ena_sysctl_add_nodes(adapter);
3828 #endif
3829
3830 /* Tell the stack that the interface is not active */
3831 if_setdrvflagbits(adapter->ifp, IFF_OACTIVE, IFF_RUNNING);
3832
3833 adapter->running = true;
3834 return;
3835
3836 err_ifp_free:
3837 if_detach(adapter->ifp);
3838 if_free(adapter->ifp);
3839 err_io_free:
3840 ena_free_all_io_rings_resources(adapter);
3841 #if 0
3842 ena_free_rx_dma_tag(adapter);
3843 err_tx_tag_free:
3844 ena_free_tx_dma_tag(adapter);
3845 #endif
3846 err_com_free:
3847 ena_com_admin_destroy(ena_dev);
3848 ena_com_delete_host_info(ena_dev);
3849 ena_com_mmio_reg_read_request_destroy(ena_dev);
3850 err_bus_free:
3851 free(ena_dev->bus, M_DEVBUF);
3852 free(ena_dev, M_DEVBUF);
3853 ena_free_pci_resources(adapter);
3854 }
3855
3856 /**
3857 * ena_detach - Device Removal Routine
3858 * @pdev: device information struct
3859 *
3860 * ena_detach is called by the device subsystem to alert the driver
3861 * that it should release a PCI device.
3862 **/
3863 static int
3864 ena_detach(device_t pdev, int flags)
3865 {
3866 struct ena_adapter *adapter = device_private(pdev);
3867 struct ena_com_dev *ena_dev = adapter->ena_dev;
3868 #if 0
3869 int rc;
3870 #endif
3871
3872 /* Make sure VLANS are not using driver */
3873 if (VLAN_ATTACHED(&adapter->sc_ec)) {
3874 device_printf(adapter->pdev ,"VLAN is in use, detach first\n");
3875 return (EBUSY);
3876 }
3877
3878 /* Free reset task and callout */
3879 callout_halt(&adapter->timer_service, &adapter->global_mtx);
3880 callout_destroy(&adapter->timer_service);
3881 workqueue_wait(adapter->reset_tq, &adapter->reset_task);
3882 workqueue_destroy(adapter->reset_tq);
3883 adapter->reset_tq = NULL;
3884
3885 rw_enter(&adapter->ioctl_sx, RW_WRITER);
3886 ena_down(adapter);
3887 rw_exit(&adapter->ioctl_sx);
3888
3889 if (adapter->ifp != NULL) {
3890 ether_ifdetach(adapter->ifp);
3891 if_free(adapter->ifp);
3892 }
3893
3894 ena_free_all_io_rings_resources(adapter);
3895
3896 ena_free_counters((struct evcnt *)&adapter->hw_stats,
3897 sizeof(struct ena_hw_stats));
3898 ena_free_counters((struct evcnt *)&adapter->dev_stats,
3899 sizeof(struct ena_stats_dev));
3900
3901 if (likely(adapter->rss_support))
3902 ena_com_rss_destroy(ena_dev);
3903
3904 #if 0
3905 rc = ena_free_rx_dma_tag(adapter);
3906 if (unlikely(rc != 0))
3907 device_printf(adapter->pdev,
3908 "Unmapped RX DMA tag associations\n");
3909
3910 rc = ena_free_tx_dma_tag(adapter);
3911 if (unlikely(rc != 0))
3912 device_printf(adapter->pdev,
3913 "Unmapped TX DMA tag associations\n");
3914 #endif
3915
3916 /* Reset the device only if the device is running. */
3917 if (adapter->running)
3918 ena_com_dev_reset(ena_dev, adapter->reset_reason);
3919
3920 ena_com_delete_host_info(ena_dev);
3921
3922 ena_free_irqs(adapter);
3923
3924 ena_com_abort_admin_commands(ena_dev);
3925
3926 ena_com_wait_for_abort_completion(ena_dev);
3927
3928 ena_com_admin_destroy(ena_dev);
3929
3930 ena_com_mmio_reg_read_request_destroy(ena_dev);
3931
3932 ena_free_pci_resources(adapter);
3933
3934 mutex_destroy(&adapter->global_mtx);
3935 rw_destroy(&adapter->ioctl_sx);
3936
3937 if (ena_dev->bus != NULL)
3938 free(ena_dev->bus, M_DEVBUF);
3939
3940 if (ena_dev != NULL)
3941 free(ena_dev, M_DEVBUF);
3942
3943 return 0;
3944 }
3945
3946 /******************************************************************************
3947 ******************************** AENQ Handlers *******************************
3948 *****************************************************************************/
3949 /**
3950 * ena_update_on_link_change:
3951 * Notify the network interface about the change in link status
3952 **/
3953 static void
3954 ena_update_on_link_change(void *adapter_data,
3955 struct ena_admin_aenq_entry *aenq_e)
3956 {
3957 struct ena_adapter *adapter = (struct ena_adapter *)adapter_data;
3958 struct ena_admin_aenq_link_change_desc *aenq_desc;
3959 int status;
3960 struct ifnet *ifp;
3961
3962 aenq_desc = (struct ena_admin_aenq_link_change_desc *)aenq_e;
3963 ifp = adapter->ifp;
3964 status = aenq_desc->flags &
3965 ENA_ADMIN_AENQ_LINK_CHANGE_DESC_LINK_STATUS_MASK;
3966
3967 if (status != 0) {
3968 device_printf(adapter->pdev, "link is UP\n");
3969 if_link_state_change(ifp, LINK_STATE_UP);
3970 } else if (status == 0) {
3971 device_printf(adapter->pdev, "link is DOWN\n");
3972 if_link_state_change(ifp, LINK_STATE_DOWN);
3973 } else {
3974 device_printf(adapter->pdev, "invalid value recvd\n");
3975 BUG();
3976 }
3977
3978 adapter->link_status = status;
3979 }
3980
3981 /**
3982 * This handler will called for unknown event group or unimplemented handlers
3983 **/
3984 static void
3985 unimplemented_aenq_handler(void *data,
3986 struct ena_admin_aenq_entry *aenq_e)
3987 {
3988 return;
3989 }
3990
3991 static struct ena_aenq_handlers aenq_handlers = {
3992 .handlers = {
3993 [ENA_ADMIN_LINK_CHANGE] = ena_update_on_link_change,
3994 [ENA_ADMIN_KEEP_ALIVE] = ena_keep_alive_wd,
3995 },
3996 .unimplemented_handler = unimplemented_aenq_handler
3997 };
3998
3999 #ifdef __FreeBSD__
4000 /*********************************************************************
4001 * FreeBSD Device Interface Entry Points
4002 *********************************************************************/
4003
4004 static device_method_t ena_methods[] = {
4005 /* Device interface */
4006 DEVMETHOD(device_probe, ena_probe),
4007 DEVMETHOD(device_attach, ena_attach),
4008 DEVMETHOD(device_detach, ena_detach),
4009 DEVMETHOD_END
4010 };
4011
4012 static driver_t ena_driver = {
4013 "ena", ena_methods, sizeof(struct ena_adapter),
4014 };
4015
4016 devclass_t ena_devclass;
4017 DRIVER_MODULE(ena, pci, ena_driver, ena_devclass, 0, 0);
4018 MODULE_DEPEND(ena, pci, 1, 1, 1);
4019 MODULE_DEPEND(ena, ether, 1, 1, 1);
4020
4021 /*********************************************************************/
4022 #endif /* __FreeBSD__ */
4023
4024 #ifdef __NetBSD__
4025 CFATTACH_DECL_NEW(ena, sizeof(struct ena_adapter), ena_probe, ena_attach,
4026 ena_detach, NULL);
4027 #endif /* __NetBSD */
4028