if_enavar.h revision 1.8 1 /* $NetBSD: if_enavar.h,v 1.8 2021/07/19 21:16:33 jmcneill Exp $ */
2
3 /*-
4 * BSD LICENSE
5 *
6 * Copyright (c) 2015-2017 Amazon.com, Inc. or its affiliates.
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 *
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 *
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 *
32 * $FreeBSD: head/sys/dev/ena/ena.h 333450 2018-05-10 09:06:21Z mw $
33 *
34 */
35
36 #ifndef ENA_H
37 #define ENA_H
38
39 #include <sys/types.h>
40 #include <sys/atomic.h>
41 #include <sys/pcq.h>
42
43 #include "external/bsd/ena-com/ena_com.h"
44 #include "external/bsd/ena-com/ena_eth_com.h"
45
46 #define DRV_MODULE_VER_MAJOR 0
47 #define DRV_MODULE_VER_MINOR 8
48 #define DRV_MODULE_VER_SUBMINOR 1
49
50 #define DRV_MODULE_NAME "ena"
51
52 #ifndef DRV_MODULE_VERSION
53 #define DRV_MODULE_VERSION \
54 ___STRING(DRV_MODULE_VER_MAJOR) "." \
55 ___STRING(DRV_MODULE_VER_MINOR) "." \
56 ___STRING(DRV_MODULE_VER_SUBMINOR)
57 #endif
58 #define DEVICE_NAME "Elastic Network Adapter (ENA)"
59 #define DEVICE_DESC "ENA adapter"
60
61 /* Calculate DMA mask - width for ena cannot exceed 48, so it is safe */
62 #define ENA_DMA_BIT_MASK(x) ((1ULL << (x)) - 1ULL)
63
64 /* 1 for AENQ + ADMIN */
65 #define ENA_ADMIN_MSIX_VEC 1
66 #define ENA_MAX_MSIX_VEC(io_queues) (ENA_ADMIN_MSIX_VEC + (io_queues))
67
68 #define ENA_REG_BAR PCI_BAR(0)
69 #define ENA_MEM_BAR PCI_BAR(2)
70
71 #define ENA_BUS_DMA_SEGS 32
72
73 #define ENA_DEFAULT_RING_SIZE 1024
74
75 #define ENA_RX_REFILL_THRESH_DIVIDER 8
76
77 #define ENA_IRQNAME_SIZE 40
78
79 #define ENA_PKT_MAX_BUFS 19
80
81 #define ENA_RX_RSS_TABLE_LOG_SIZE 7
82 #define ENA_RX_RSS_TABLE_SIZE (1 << ENA_RX_RSS_TABLE_LOG_SIZE)
83
84 #define ENA_HASH_KEY_SIZE 40
85
86 #define ENA_MAX_FRAME_LEN 10000
87 #define ENA_MIN_FRAME_LEN 60
88
89 #define ENA_TX_CLEANUP_THRESHOLD 128
90
91 #define DB_THRESHOLD 64
92
93 #define TX_COMMIT 32
94 /*
95 * TX budget for cleaning. It should be half of the RX budget to reduce amount
96 * of TCP retransmissions.
97 */
98 #define TX_BUDGET 128
99 /* RX cleanup budget. -1 stands for infinity. */
100 #define RX_BUDGET 256
101 /*
102 * How many times we can repeat cleanup in the io irq handling routine if the
103 * RX or TX budget was depleted.
104 */
105 #define CLEAN_BUDGET 8
106
107 #define RX_IRQ_INTERVAL 20
108 #define TX_IRQ_INTERVAL 50
109
110 #define ENA_MIN_MTU 128
111
112 #define ENA_TSO_MAXSIZE 65536
113
114 #define ENA_MMIO_DISABLE_REG_READ BIT(0)
115
116 #define ENA_TX_RING_IDX_NEXT(idx, ring_size) (((idx) + 1) & ((ring_size) - 1))
117
118 #define ENA_RX_RING_IDX_NEXT(idx, ring_size) (((idx) + 1) & ((ring_size) - 1))
119
120 #define ENA_IO_TXQ_IDX(q) (2 * (q))
121 #define ENA_IO_RXQ_IDX(q) (2 * (q) + 1)
122
123 #define ENA_MGMNT_IRQ_IDX 0
124 #define ENA_IO_IRQ_FIRST_IDX 1
125 #define ENA_IO_IRQ_IDX(q) (ENA_IO_IRQ_FIRST_IDX + (q))
126
127 /*
128 * ENA device should send keep alive msg every 1 sec.
129 * We wait for 6 sec just to be on the safe side.
130 */
131 #define DEFAULT_KEEP_ALIVE_TO (SBT_1S * 6)
132
133 /* Time in jiffies before concluding the transmitter is hung. */
134 #define DEFAULT_TX_CMP_TO (SBT_1S * 5)
135
136 /* Number of queues to check for missing queues per timer tick */
137 #define DEFAULT_TX_MONITORED_QUEUES (4)
138
139 /* Max number of timeouted packets before device reset */
140 #define DEFAULT_TX_CMP_THRESHOLD (128)
141
142 /*
143 * Supported PCI vendor and devices IDs
144 */
145 #define PCI_VENDOR_ID_AMAZON 0x1d0f
146
147 #define PCI_DEV_ID_ENA_PF 0x0ec2
148 #define PCI_DEV_ID_ENA_LLQ_PF 0x1ec2
149 #define PCI_DEV_ID_ENA_VF 0xec20
150 #define PCI_DEV_ID_ENA_LLQ_VF 0xec21
151
152 /*
153 * Flags indicating current ENA driver state
154 */
155 enum ena_flags_t {
156 ENA_FLAG_DEVICE_RUNNING,
157 ENA_FLAG_DEV_UP,
158 ENA_FLAG_LINK_UP,
159 ENA_FLAG_MSIX_ENABLED,
160 ENA_FLAG_TRIGGER_RESET,
161 ENA_FLAG_ONGOING_RESET,
162 ENA_FLAG_DEV_UP_BEFORE_RESET,
163 ENA_FLAG_RSS_ACTIVE,
164 ENA_FLAGS_NUMBER = ENA_FLAG_RSS_ACTIVE
165 };
166
167 #define ENA_FLAG_BITMASK(bit) (~(uint32_t)__BIT(bit))
168 #define ENA_FLAG_ZERO(adapter) (adapter)->flags = 0;
169 #define ENA_FLAG_ISSET(bit, adapter) ((adapter)->flags & __BIT(bit))
170 #define ENA_FLAG_SET_ATOMIC(bit, adapter) \
171 atomic_or_32(&(adapter)->flags, __BIT(bit))
172 #define ENA_FLAG_CLEAR_ATOMIC(bit, adapter) \
173 atomic_and_32(&(adapter)->flags, ENA_FLAG_BITMASK(bit))
174
175 typedef __int64_t sbintime_t;
176
177 struct msix_entry {
178 int entry;
179 int vector;
180 };
181
182 typedef struct _ena_vendor_info_t {
183 unsigned int vendor_id;
184 unsigned int device_id;
185 unsigned int index;
186 } ena_vendor_info_t;
187
188 struct ena_que {
189 struct ena_adapter *adapter;
190 struct ena_ring *tx_ring;
191 struct ena_ring *rx_ring;
192 uint32_t id;
193 int cpu;
194 };
195
196 struct ena_tx_buffer {
197 struct mbuf *mbuf;
198 /* # of ena desc for this specific mbuf
199 * (includes data desc and metadata desc) */
200 unsigned int tx_descs;
201 /* # of buffers used by this mbuf */
202 unsigned int num_of_bufs;
203 bus_dmamap_t map;
204
205 /* Used to detect missing tx packets */
206 struct bintime timestamp;
207 bool print_once;
208
209 struct ena_com_buf bufs[ENA_PKT_MAX_BUFS];
210 } __aligned(CACHE_LINE_SIZE);
211
212 struct ena_rx_buffer {
213 struct mbuf *mbuf;
214 bus_dmamap_t map;
215 struct ena_com_buf ena_buf;
216 } __aligned(CACHE_LINE_SIZE);
217
218 struct ena_stats_tx {
219 char name[16];
220 struct evcnt cnt;
221 struct evcnt bytes;
222 struct evcnt prepare_ctx_err;
223 struct evcnt dma_mapping_err;
224 struct evcnt doorbells;
225 struct evcnt missing_tx_comp;
226 struct evcnt bad_req_id;
227 struct evcnt collapse;
228 struct evcnt collapse_err;
229 struct evcnt pcq_drops;
230 };
231
232 struct ena_stats_rx {
233 char name[16];
234 struct evcnt cnt;
235 struct evcnt bytes;
236 struct evcnt refil_partial;
237 struct evcnt bad_csum;
238 struct evcnt mbuf_alloc_fail;
239 struct evcnt dma_mapping_err;
240 struct evcnt bad_desc_num;
241 struct evcnt bad_req_id;
242 struct evcnt empty_rx_ring;
243 };
244
245 /*
246 * Locking notes:
247 * + For TX, a field in ena_ring is protected by ring_mtx (a spin mutex).
248 * - protect them only when I/F is up.
249 * - when I/F is down or attaching, detaching, no need to protect them.
250 * + For RX, a field "stopping" is protected by ring_mtx (a spin mutex).
251 * - other fields in ena_ring are not protected.
252 * + a fields in ena_adapter is protected by global_mtx (a adaptive mutex).
253 *
254 * + a field marked "stable" is unlocked.
255 * + a field marked "atomic" is unlocked,
256 * but must use atomic ops to read/write.
257 *
258 * Lock order:
259 * + global_mtx -> ring_mtx
260 */
261 struct ena_ring {
262 /* Holds the empty requests for TX/RX out of order completions */
263 union {
264 uint16_t *free_tx_ids;
265 uint16_t *free_rx_ids;
266 };
267 struct ena_com_dev *ena_dev;
268 struct ena_adapter *adapter;
269 struct ena_com_io_cq *ena_com_io_cq;
270 struct ena_com_io_sq *ena_com_io_sq;
271
272 uint16_t qid;
273
274 /* Determines if device will use LLQ or normal mode for TX */
275 enum ena_admin_placement_policy_type tx_mem_queue_type;
276 /* The maximum length the driver can push to the device (For LLQ) */
277 uint8_t tx_max_header_size;
278
279 struct ena_com_rx_buf_info ena_bufs[ENA_PKT_MAX_BUFS];
280
281 /*
282 * Fields used for Adaptive Interrupt Modulation - to be implemented in
283 * the future releases
284 */
285 uint32_t smoothed_interval;
286 enum ena_intr_moder_level moder_tbl_idx;
287
288 struct ena_que *que;
289 #ifdef LRO
290 struct lro_ctrl lro;
291 #endif
292
293 uint16_t next_to_use;
294 uint16_t next_to_clean;
295
296 union {
297 struct ena_tx_buffer *tx_buffer_info; /* contex of tx packet */
298 struct ena_rx_buffer *rx_buffer_info; /* contex of rx packet */
299 };
300 int ring_size; /* number of tx/rx_buffer_info's entries */
301
302 pcq_t *br; /* only for TX */
303
304 kmutex_t ring_mtx;
305 char mtx_name[16];
306
307 union {
308 struct {
309 struct work enqueue_task;
310 struct workqueue *enqueue_tq;
311 };
312 struct {
313 struct work cleanup_task;
314 struct workqueue *cleanup_tq;
315 };
316 };
317 u_int task_pending; /* atomic */
318 bool stopping;
319
320 union {
321 struct ena_stats_tx tx_stats;
322 struct ena_stats_rx rx_stats;
323 };
324
325 int empty_rx_queue;
326 } __aligned(CACHE_LINE_SIZE);
327
328 struct ena_stats_dev {
329 char name[16];
330 struct evcnt wd_expired;
331 struct evcnt interface_up;
332 struct evcnt interface_down;
333 struct evcnt admin_q_pause;
334 };
335
336 struct ena_hw_stats {
337 char name[16];
338 struct evcnt rx_packets;
339 struct evcnt tx_packets;
340
341 struct evcnt rx_bytes;
342 struct evcnt tx_bytes;
343
344 struct evcnt rx_drops;
345 };
346
347 /* Board specific private data structure */
348 struct ena_adapter {
349 struct ena_com_dev *ena_dev;
350
351 /* OS defined structs */
352 device_t pdev;
353 struct ethercom sc_ec;
354 struct ifnet *ifp; /* set to point to sc_ec */
355 struct ifmedia media;
356
357 /* OS resources */
358 kmutex_t global_mtx;
359
360 void *sc_ihs[ENA_MAX_MSIX_VEC(ENA_MAX_NUM_IO_QUEUES)];
361 pci_intr_handle_t *sc_intrs;
362 int sc_nintrs;
363 struct pci_attach_args sc_pa;
364
365 /* Registers */
366 bus_space_handle_t sc_bhandle;
367 bus_space_tag_t sc_btag;
368 bus_addr_t sc_memaddr;
369 bus_size_t sc_mapsize;
370
371 /* DMA tag used throughout the driver adapter for Tx and Rx */
372 bus_dma_tag_t sc_dmat;
373 int dma_width;
374
375 uint32_t max_mtu;
376
377 uint16_t max_tx_sgl_size;
378 uint16_t max_rx_sgl_size;
379
380 uint32_t tx_offload_cap;
381
382 /* Tx fast path data */
383 int num_queues;
384
385 unsigned int tx_ring_size;
386 unsigned int rx_ring_size;
387
388 /* RSS*/
389 uint8_t rss_ind_tbl[ENA_RX_RSS_TABLE_SIZE];
390 bool rss_support;
391
392 uint8_t mac_addr[ETHER_ADDR_LEN];
393 /* mdio and phy*/
394
395 uint32_t flags; /* atomic */
396
397 /* Queue will represent one TX and one RX ring */
398 struct ena_que que[ENA_MAX_NUM_IO_QUEUES]
399 __aligned(CACHE_LINE_SIZE); /* stable */
400
401 /* TX */
402 struct ena_ring tx_ring[ENA_MAX_NUM_IO_QUEUES]
403 __aligned(CACHE_LINE_SIZE);
404
405 /* RX */
406 struct ena_ring rx_ring[ENA_MAX_NUM_IO_QUEUES]
407 __aligned(CACHE_LINE_SIZE);
408
409 /* Timer service */
410 struct callout timer_service;
411 sbintime_t keep_alive_timestamp;
412 uint32_t next_monitored_tx_qid;
413 struct work reset_task;
414 struct workqueue *reset_tq;
415 int wd_active;
416 sbintime_t keep_alive_timeout;
417 sbintime_t missing_tx_timeout;
418 uint32_t missing_tx_max_queues;
419 uint32_t missing_tx_threshold;
420
421 /* Statistics */
422 struct ena_stats_dev dev_stats;
423 struct ena_hw_stats hw_stats;
424
425 enum ena_regs_reset_reason_types reset_reason;
426 };
427
428 #define ENA_RING_MTX_LOCK(_ring) mutex_enter(&(_ring)->ring_mtx)
429 #define ENA_RING_MTX_TRYLOCK(_ring) mutex_tryenter(&(_ring)->ring_mtx)
430 #define ENA_RING_MTX_UNLOCK(_ring) mutex_exit(&(_ring)->ring_mtx)
431 #define ENA_RING_MTX_OWNED(_ring) mutex_owned(&(_ring)->ring_mtx)
432
433 #define ENA_CORE_MTX_LOCK(_adapter) mutex_enter(&(_adapter)->global_mtx)
434 #define ENA_CORE_MTX_TRYLOCK(_adapter) mutex_tryenter(&(_adapter)->global_mtx)
435 #define ENA_CORE_MTX_UNLOCK(_adapter) mutex_exit(&(_adapter)->global_mtx)
436 #define ENA_CORE_MTX_OWNED(_adapter) mutex_owned(&(_adapter)->global_mtx)
437
438 static inline int ena_mbuf_count(struct mbuf *mbuf)
439 {
440 int count = 1;
441
442 while ((mbuf = mbuf->m_next) != NULL)
443 ++count;
444
445 return count;
446 }
447
448 /* provide FreeBSD-compatible macros */
449 #define if_getcapenable(ifp) (ifp)->if_capenable
450 #define if_setcapenable(ifp, s) SET((ifp)->if_capenable, s)
451 #define if_getcapabilities(ifp) (ifp)->if_capabilities
452 #define if_setcapabilities(ifp, s) SET((ifp)->if_capabilities, s)
453 #define if_setcapabilitiesbit(ifp, s, c) do { \
454 CLR((ifp)->if_capabilities, c); \
455 SET((ifp)->if_capabilities, s); \
456 } while (0)
457 #define if_getsoftc(ifp) (ifp)->if_softc
458 #define if_setmtu(ifp, new_mtu) (ifp)->if_mtu = (new_mtu)
459 #define if_getdrvflags(ifp) (ifp)->if_flags
460 #define if_setdrvflagbits(ifp, s, c) do { \
461 CLR((ifp)->if_flags, c); \
462 SET((ifp)->if_flags, s); \
463 } while (0)
464 #define if_setflags(ifp, s) SET((ifp)->if_flags, s)
465 #define if_sethwassistbits(ifp, s, c) do { \
466 CLR((ifp)->if_csum_flags_rx, c); \
467 SET((ifp)->if_csum_flags_rx, s); \
468 } while (0)
469 #define if_clearhwassist(ifp) (ifp)->if_csum_flags_rx = 0
470 #define if_setbaudrate(ifp, r) (ifp)->if_baudrate = (r)
471 #define if_setdev(ifp, dev) do { } while (0)
472 #define if_setsoftc(ifp, softc) (ifp)->if_softc = (softc)
473 #define if_setinitfn(ifp, initfn) (ifp)->if_init = (initfn)
474 #define if_settransmitfn(ifp, txfn) (ifp)->if_transmit = (txfn)
475 #define if_setioctlfn(ifp, ioctlfn) (ifp)->if_ioctl = (ioctlfn)
476 #define if_setsendqlen(ifp, sqlen) \
477 IFQ_SET_MAXLEN(&(ifp)->if_snd, uimax(sqlen, IFQ_MAXLEN))
478 #define if_setsendqready(ifp) IFQ_SET_READY(&(ifp)->if_snd)
479 #define if_setifheaderlen(ifp, len) (ifp)->if_hdrlen = (len)
480
481 #define SBT_1S ((sbintime_t)1 << 32)
482 #define bintime_clear(a) ((a)->sec = (a)->frac = 0)
483 #define bintime_isset(a) ((a)->sec || (a)->frac)
484
485 static __inline sbintime_t
486 bttosbt(const struct bintime _bt)
487 {
488 return (((sbintime_t)_bt.sec << 32) + (_bt.frac >> 32));
489 }
490
491 static __inline sbintime_t
492 getsbinuptime(void)
493 {
494 struct bintime _bt;
495
496 getbinuptime(&_bt);
497 return (bttosbt(_bt));
498 }
499
500 /* Intentionally non-atomic, it's just unnecessary overhead */
501 #define counter_u64_add(x, cnt) (x).ev_count += (cnt)
502 #define counter_u64_zero(x) (x).ev_count = 0
503 #define counter_u64_free(x) evcnt_detach(&(x))
504
505 #define counter_u64_add_protected(x, cnt) (x).ev_count += (cnt)
506 #define counter_enter() do {} while (0)
507 #define counter_exit() do {} while (0)
508
509 /* Misc other constants */
510 #define mp_ncpus ncpu
511 #define osreldate __NetBSD_Version__
512
513 /*
514 * XXX XXX XXX just to make compile, must provide replacement XXX XXX XXX
515 * Other than that, TODO:
516 * - decide whether to import <sys/buf_ring.h>
517 * - recheck the M_CSUM/IPCAP mapping
518 * - recheck workqueue use - FreeBSD taskqueues might have different semantics
519 */
520 #define buf_ring_alloc(a, b, c, d) (void *)&a
521 #define drbr_free(ifp, b) do { } while (0)
522 #define drbr_flush(ifp, b) IFQ_PURGE(&(ifp)->if_snd)
523 #define drbr_advance(ifp, b) \
524 ({ \
525 struct mbuf *__m; \
526 IFQ_DEQUEUE(&(ifp)->if_snd, __m); \
527 __m; \
528 })
529 #define drbr_putback(ifp, b, m) do { } while (0)
530 #define drbr_empty(ifp, b) IFQ_IS_EMPTY(&(ifp)->if_snd)
531 #define drbr_peek(ifp, b) \
532 ({ \
533 struct mbuf *__m; \
534 IFQ_POLL(&(ifp)->if_snd, __m); \
535 __m; \
536 })
537 #define drbr_enqueue(ifp, b, m) \
538 ({ \
539 int __err; \
540 IFQ_ENQUEUE(&(ifp)->if_snd, m, __err); \
541 __err; \
542 })
543 #define m_getjcl(a, b, c, d) NULL
544 #define MJUM16BYTES MCLBYTES
545 #define m_append(m, len, cp) ena_m_append(m, len, cp)
546 #define m_collapse(m, how, maxfrags) m_defrag(m, how) /* XXX */
547 /* XXX XXX XXX */
548
549 static inline int
550 ena_m_append(struct mbuf *m0, int len, const void *cpv)
551 {
552 struct mbuf *m, *n;
553 int remainder, space;
554 const char *cp = cpv;
555
556 KASSERT(len != M_COPYALL);
557 for (m = m0; m->m_next != NULL; m = m->m_next)
558 continue;
559 remainder = len;
560 space = M_TRAILINGSPACE(m);
561 if (space > 0) {
562 /*
563 * Copy into available space.
564 */
565 if (space > remainder)
566 space = remainder;
567 memmove(mtod(m, char *) + m->m_len, cp, space);
568 m->m_len += space;
569 cp = cp + space, remainder -= space;
570 }
571 while (remainder > 0) {
572 /*
573 * Allocate a new mbuf; could check space
574 * and allocate a cluster instead.
575 */
576 n = m_get(M_DONTWAIT, m->m_type);
577 if (n == NULL)
578 break;
579 n->m_len = uimin(MLEN, remainder);
580 memmove(mtod(n, void *), cp, n->m_len);
581 cp += n->m_len, remainder -= n->m_len;
582 m->m_next = n;
583 m = n;
584 }
585 if (m0->m_flags & M_PKTHDR)
586 m0->m_pkthdr.len += len - remainder;
587 return (remainder == 0);
588 }
589 #endif /* !(ENA_H) */
590