ix_txrx.c revision 1.23 1 /******************************************************************************
2
3 Copyright (c) 2001-2015, Intel Corporation
4 All rights reserved.
5
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
8
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
11
12 2. Redistributions in binary form must reproduce the above copyright
13 notice, this list of conditions and the following disclaimer in the
14 documentation and/or other materials provided with the distribution.
15
16 3. Neither the name of the Intel Corporation nor the names of its
17 contributors may be used to endorse or promote products derived from
18 this software without specific prior written permission.
19
20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*
34 * Copyright (c) 2011 The NetBSD Foundation, Inc.
35 * All rights reserved.
36 *
37 * This code is derived from software contributed to The NetBSD Foundation
38 * by Coyote Point Systems, Inc.
39 *
40 * Redistribution and use in source and binary forms, with or without
41 * modification, are permitted provided that the following conditions
42 * are met:
43 * 1. Redistributions of source code must retain the above copyright
44 * notice, this list of conditions and the following disclaimer.
45 * 2. Redistributions in binary form must reproduce the above copyright
46 * notice, this list of conditions and the following disclaimer in the
47 * documentation and/or other materials provided with the distribution.
48 *
49 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
50 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
51 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
52 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
53 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
54 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
55 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
56 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
57 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
58 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
59 * POSSIBILITY OF SUCH DAMAGE.
60 */
61 /*$FreeBSD: head/sys/dev/ixgbe/ix_txrx.c 301538 2016-06-07 04:51:50Z sephe $*/
62 /*$NetBSD: ix_txrx.c,v 1.23 2017/05/08 10:00:41 msaitoh Exp $*/
63
64 #include "opt_inet.h"
65 #include "opt_inet6.h"
66
67 #include "ixgbe.h"
68
69 #ifdef DEV_NETMAP
70 #include <net/netmap.h>
71 #include <sys/selinfo.h>
72 #include <dev/netmap/netmap_kern.h>
73
74 extern int ix_crcstrip;
75 #endif
76
77 /*
78 ** HW RSC control:
79 ** this feature only works with
80 ** IPv4, and only on 82599 and later.
81 ** Also this will cause IP forwarding to
82 ** fail and that can't be controlled by
83 ** the stack as LRO can. For all these
84 ** reasons I've deemed it best to leave
85 ** this off and not bother with a tuneable
86 ** interface, this would need to be compiled
87 ** to enable.
88 */
89 static bool ixgbe_rsc_enable = FALSE;
90
91 #ifdef IXGBE_FDIR
92 /*
93 ** For Flow Director: this is the
94 ** number of TX packets we sample
95 ** for the filter pool, this means
96 ** every 20th packet will be probed.
97 **
98 ** This feature can be disabled by
99 ** setting this to 0.
100 */
101 static int atr_sample_rate = 20;
102 #endif
103
104 /*********************************************************************
105 * Local Function prototypes
106 *********************************************************************/
107 static void ixgbe_setup_transmit_ring(struct tx_ring *);
108 static void ixgbe_free_transmit_buffers(struct tx_ring *);
109 static int ixgbe_setup_receive_ring(struct rx_ring *);
110 static void ixgbe_free_receive_buffers(struct rx_ring *);
111
112 static void ixgbe_rx_checksum(u32, struct mbuf *, u32,
113 struct ixgbe_hw_stats *);
114 static void ixgbe_refresh_mbufs(struct rx_ring *, int);
115 static int ixgbe_xmit(struct tx_ring *, struct mbuf *);
116 static int ixgbe_tx_ctx_setup(struct tx_ring *,
117 struct mbuf *, u32 *, u32 *);
118 static int ixgbe_tso_setup(struct tx_ring *,
119 struct mbuf *, u32 *, u32 *);
120 #ifdef IXGBE_FDIR
121 static void ixgbe_atr(struct tx_ring *, struct mbuf *);
122 #endif
123 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
124 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
125 struct mbuf *, u32);
126
127 static void ixgbe_setup_hw_rsc(struct rx_ring *);
128
129 /*********************************************************************
130 * Transmit entry point
131 *
132 * ixgbe_start is called by the stack to initiate a transmit.
133 * The driver will remain in this routine as long as there are
134 * packets to transmit and transmit resources are available.
135 * In case resources are not available stack is notified and
136 * the packet is requeued.
137 **********************************************************************/
138
139 void
140 ixgbe_start_locked(struct tx_ring *txr, struct ifnet * ifp)
141 {
142 int rc;
143 struct mbuf *m_head;
144 struct adapter *adapter = txr->adapter;
145
146 IXGBE_TX_LOCK_ASSERT(txr);
147
148 if ((ifp->if_flags & IFF_RUNNING) == 0)
149 return;
150 if (!adapter->link_active)
151 return;
152
153 while (!IFQ_IS_EMPTY(&ifp->if_snd)) {
154 if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
155 break;
156
157 IFQ_POLL(&ifp->if_snd, m_head);
158 if (m_head == NULL)
159 break;
160
161 if ((rc = ixgbe_xmit(txr, m_head)) == EAGAIN) {
162 break;
163 }
164 IFQ_DEQUEUE(&ifp->if_snd, m_head);
165 if (rc != 0) {
166 m_freem(m_head);
167 continue;
168 }
169
170 /* Send a copy of the frame to the BPF listener */
171 bpf_mtap(ifp, m_head);
172 }
173 return;
174 }
175
176 /*
177 * Legacy TX start - called by the stack, this
178 * always uses the first tx ring, and should
179 * not be used with multiqueue tx enabled.
180 */
181 void
182 ixgbe_start(struct ifnet *ifp)
183 {
184 struct adapter *adapter = ifp->if_softc;
185 struct tx_ring *txr = adapter->tx_rings;
186
187 if (ifp->if_flags & IFF_RUNNING) {
188 IXGBE_TX_LOCK(txr);
189 ixgbe_start_locked(txr, ifp);
190 IXGBE_TX_UNLOCK(txr);
191 }
192 return;
193 }
194
195 #ifndef IXGBE_LEGACY_TX
196
197 /*
198 ** Multiqueue Transmit Entry Point
199 ** (if_transmit function)
200 */
201 int
202 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
203 {
204 struct adapter *adapter = ifp->if_softc;
205 struct tx_ring *txr;
206 int i, err = 0;
207 #ifdef RSS
208 uint32_t bucket_id;
209 #endif
210
211 /*
212 * When doing RSS, map it to the same outbound queue
213 * as the incoming flow would be mapped to.
214 *
215 * If everything is setup correctly, it should be the
216 * same bucket that the current CPU we're on is.
217 */
218 #if 0
219 #if __FreeBSD_version < 1100054
220 if (m->m_flags & M_FLOWID) {
221 #else
222 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
223 #endif
224 #ifdef RSS
225 if (rss_hash2bucket(m->m_pkthdr.flowid,
226 M_HASHTYPE_GET(m), &bucket_id) == 0) {
227 /* TODO: spit out something if bucket_id > num_queues? */
228 i = bucket_id % adapter->num_queues;
229 #ifdef IXGBE_DEBUG
230 if (bucket_id > adapter->num_queues)
231 if_printf(ifp, "bucket_id (%d) > num_queues "
232 "(%d)\n", bucket_id, adapter->num_queues);
233 #endif
234 } else
235 #endif /* RSS */
236 i = m->m_pkthdr.flowid % adapter->num_queues;
237 } else
238 #endif
239 i = cpu_index(curcpu()) % adapter->num_queues;
240
241 /* Check for a hung queue and pick alternative */
242 if (((1 << i) & adapter->active_queues) == 0)
243 i = ffs64(adapter->active_queues);
244
245 txr = &adapter->tx_rings[i];
246
247 err = pcq_put(txr->txr_interq, m);
248 if (err == false) {
249 m_freem(m);
250 txr->pcq_drops.ev_count++;
251 return (err);
252 }
253 if (IXGBE_TX_TRYLOCK(txr)) {
254 ixgbe_mq_start_locked(ifp, txr);
255 IXGBE_TX_UNLOCK(txr);
256 } else
257 softint_schedule(txr->txr_si);
258
259 return (0);
260 }
261
262 int
263 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
264 {
265 struct adapter *adapter = txr->adapter;
266 struct mbuf *next;
267 int enqueued = 0, err = 0;
268
269 if (((ifp->if_flags & IFF_RUNNING) == 0) ||
270 adapter->link_active == 0)
271 return (ENETDOWN);
272
273 /* Process the queue */
274 while ((next = pcq_get(txr->txr_interq)) != NULL) {
275 if ((err = ixgbe_xmit(txr, next)) != 0) {
276 m_freem(next);
277 /* All errors are counted in ixgbe_xmit() */
278 break;
279 }
280 enqueued++;
281 #if 0 // this is VF-only
282 #if __FreeBSD_version >= 1100036
283 /*
284 * Since we're looking at the tx ring, we can check
285 * to see if we're a VF by examing our tail register
286 * address.
287 */
288 if (txr->tail < IXGBE_TDT(0) && next->m_flags & M_MCAST)
289 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
290 #endif
291 #endif /* 0 */
292 /* Send a copy of the frame to the BPF listener */
293 bpf_mtap(ifp, next);
294 if ((ifp->if_flags & IFF_RUNNING) == 0)
295 break;
296 }
297
298 if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD)
299 ixgbe_txeof(txr);
300
301 return (err);
302 }
303
304 /*
305 * Called from a taskqueue to drain queued transmit packets.
306 */
307 void
308 ixgbe_deferred_mq_start(void *arg)
309 {
310 struct tx_ring *txr = arg;
311 struct adapter *adapter = txr->adapter;
312 struct ifnet *ifp = adapter->ifp;
313
314 IXGBE_TX_LOCK(txr);
315 if (pcq_peek(txr->txr_interq) != NULL)
316 ixgbe_mq_start_locked(ifp, txr);
317 IXGBE_TX_UNLOCK(txr);
318 }
319
320 #endif /* IXGBE_LEGACY_TX */
321
322
323 /*********************************************************************
324 *
325 * This routine maps the mbufs to tx descriptors, allowing the
326 * TX engine to transmit the packets.
327 * - return 0 on success, positive on failure
328 *
329 **********************************************************************/
330
331 static int
332 ixgbe_xmit(struct tx_ring *txr, struct mbuf *m_head)
333 {
334 struct m_tag *mtag;
335 struct adapter *adapter = txr->adapter;
336 struct ifnet *ifp = adapter->ifp;
337 struct ethercom *ec = &adapter->osdep.ec;
338 u32 olinfo_status = 0, cmd_type_len;
339 int i, j, error;
340 int first;
341 bool remap = TRUE;
342 bus_dmamap_t map;
343 struct ixgbe_tx_buf *txbuf;
344 union ixgbe_adv_tx_desc *txd = NULL;
345
346 /* Basic descriptor defines */
347 cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
348 IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
349
350 if ((mtag = VLAN_OUTPUT_TAG(ec, m_head)) != NULL)
351 cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
352
353 /*
354 * Important to capture the first descriptor
355 * used because it will contain the index of
356 * the one we tell the hardware to report back
357 */
358 first = txr->next_avail_desc;
359 txbuf = &txr->tx_buffers[first];
360 map = txbuf->map;
361
362 /*
363 * Map the packet for DMA.
364 */
365 retry:
366 error = bus_dmamap_load_mbuf(txr->txtag->dt_dmat, map,
367 m_head, BUS_DMA_NOWAIT);
368
369 if (__predict_false(error)) {
370 struct mbuf *m;
371
372 switch (error) {
373 case EAGAIN:
374 adapter->eagain_tx_dma_setup.ev_count++;
375 return EAGAIN;
376 case ENOMEM:
377 adapter->enomem_tx_dma_setup.ev_count++;
378 return EAGAIN;
379 case EFBIG:
380 /* Try it again? - one try */
381 if (remap == TRUE) {
382 remap = FALSE;
383 /*
384 * XXX: m_defrag will choke on
385 * non-MCLBYTES-sized clusters
386 */
387 adapter->efbig_tx_dma_setup.ev_count++;
388 m = m_defrag(m_head, M_NOWAIT);
389 if (m == NULL) {
390 adapter->mbuf_defrag_failed.ev_count++;
391 return ENOBUFS;
392 }
393 m_head = m;
394 goto retry;
395 } else {
396 adapter->efbig2_tx_dma_setup.ev_count++;
397 return error;
398 }
399 case EINVAL:
400 adapter->einval_tx_dma_setup.ev_count++;
401 return error;
402 default:
403 adapter->other_tx_dma_setup.ev_count++;
404 return error;
405 }
406 }
407
408 /* Make certain there are enough descriptors */
409 if (txr->tx_avail < (map->dm_nsegs + 2)) {
410 txr->no_desc_avail.ev_count++;
411 ixgbe_dmamap_unload(txr->txtag, txbuf->map);
412 return EAGAIN;
413 }
414
415 /*
416 * Set up the appropriate offload context
417 * this will consume the first descriptor
418 */
419 error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
420 if (__predict_false(error)) {
421 return (error);
422 }
423
424 #ifdef IXGBE_FDIR
425 /* Do the flow director magic */
426 if ((txr->atr_sample) && (!adapter->fdir_reinit)) {
427 ++txr->atr_count;
428 if (txr->atr_count >= atr_sample_rate) {
429 ixgbe_atr(txr, m_head);
430 txr->atr_count = 0;
431 }
432 }
433 #endif
434
435 olinfo_status |= IXGBE_ADVTXD_CC;
436 i = txr->next_avail_desc;
437 for (j = 0; j < map->dm_nsegs; j++) {
438 bus_size_t seglen;
439 bus_addr_t segaddr;
440
441 txbuf = &txr->tx_buffers[i];
442 txd = &txr->tx_base[i];
443 seglen = map->dm_segs[j].ds_len;
444 segaddr = htole64(map->dm_segs[j].ds_addr);
445
446 txd->read.buffer_addr = segaddr;
447 txd->read.cmd_type_len = htole32(txr->txd_cmd |
448 cmd_type_len |seglen);
449 txd->read.olinfo_status = htole32(olinfo_status);
450
451 if (++i == txr->num_desc)
452 i = 0;
453 }
454
455 txd->read.cmd_type_len |=
456 htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
457 txr->tx_avail -= map->dm_nsegs;
458 txr->next_avail_desc = i;
459
460 txbuf->m_head = m_head;
461 /*
462 * Here we swap the map so the last descriptor,
463 * which gets the completion interrupt has the
464 * real map, and the first descriptor gets the
465 * unused map from this descriptor.
466 */
467 txr->tx_buffers[first].map = txbuf->map;
468 txbuf->map = map;
469 bus_dmamap_sync(txr->txtag->dt_dmat, map, 0, m_head->m_pkthdr.len,
470 BUS_DMASYNC_PREWRITE);
471
472 /* Set the EOP descriptor that will be marked done */
473 txbuf = &txr->tx_buffers[first];
474 txbuf->eop = txd;
475
476 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
477 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
478 /*
479 * Advance the Transmit Descriptor Tail (Tdt), this tells the
480 * hardware that this frame is available to transmit.
481 */
482 ++txr->total_packets.ev_count;
483 IXGBE_WRITE_REG(&adapter->hw, txr->tail, i);
484
485 /*
486 * XXXX NOMPSAFE: ifp->if_data should be percpu.
487 */
488 ifp->if_obytes += m_head->m_pkthdr.len;
489 if (m_head->m_flags & M_MCAST)
490 ifp->if_omcasts++;
491
492 /* Mark queue as having work */
493 if (txr->busy == 0)
494 txr->busy = 1;
495
496 return 0;
497 }
498
499
500 /*********************************************************************
501 *
502 * Allocate memory for tx_buffer structures. The tx_buffer stores all
503 * the information needed to transmit a packet on the wire. This is
504 * called only once at attach, setup is done every reset.
505 *
506 **********************************************************************/
507 int
508 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
509 {
510 struct adapter *adapter = txr->adapter;
511 device_t dev = adapter->dev;
512 struct ixgbe_tx_buf *txbuf;
513 int error, i;
514
515 /*
516 * Setup DMA descriptor areas.
517 */
518 if ((error = ixgbe_dma_tag_create(adapter->osdep.dmat, /* parent */
519 1, 0, /* alignment, bounds */
520 IXGBE_TSO_SIZE, /* maxsize */
521 adapter->num_segs, /* nsegments */
522 PAGE_SIZE, /* maxsegsize */
523 0, /* flags */
524 &txr->txtag))) {
525 aprint_error_dev(dev,"Unable to allocate TX DMA tag\n");
526 goto fail;
527 }
528
529 if (!(txr->tx_buffers =
530 (struct ixgbe_tx_buf *) malloc(sizeof(struct ixgbe_tx_buf) *
531 adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
532 aprint_error_dev(dev, "Unable to allocate tx_buffer memory\n");
533 error = ENOMEM;
534 goto fail;
535 }
536
537 /* Create the descriptor buffer dma maps */
538 txbuf = txr->tx_buffers;
539 for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
540 error = ixgbe_dmamap_create(txr->txtag, 0, &txbuf->map);
541 if (error != 0) {
542 aprint_error_dev(dev,
543 "Unable to create TX DMA map (%d)\n", error);
544 goto fail;
545 }
546 }
547
548 return 0;
549 fail:
550 /* We free all, it handles case where we are in the middle */
551 #if 0 /* XXX was FreeBSD */
552 ixgbe_free_transmit_structures(adapter);
553 #else
554 ixgbe_free_transmit_buffers(txr);
555 #endif
556 return (error);
557 }
558
559 /*********************************************************************
560 *
561 * Initialize a transmit ring.
562 *
563 **********************************************************************/
564 static void
565 ixgbe_setup_transmit_ring(struct tx_ring *txr)
566 {
567 struct adapter *adapter = txr->adapter;
568 struct ixgbe_tx_buf *txbuf;
569 #ifdef DEV_NETMAP
570 struct netmap_adapter *na = NA(adapter->ifp);
571 struct netmap_slot *slot;
572 #endif /* DEV_NETMAP */
573
574 /* Clear the old ring contents */
575 IXGBE_TX_LOCK(txr);
576 #ifdef DEV_NETMAP
577 /*
578 * (under lock): if in netmap mode, do some consistency
579 * checks and set slot to entry 0 of the netmap ring.
580 */
581 slot = netmap_reset(na, NR_TX, txr->me, 0);
582 #endif /* DEV_NETMAP */
583 bzero((void *)txr->tx_base,
584 (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
585 /* Reset indices */
586 txr->next_avail_desc = 0;
587 txr->next_to_clean = 0;
588
589 /* Free any existing tx buffers. */
590 txbuf = txr->tx_buffers;
591 for (int i = 0; i < txr->num_desc; i++, txbuf++) {
592 if (txbuf->m_head != NULL) {
593 bus_dmamap_sync(txr->txtag->dt_dmat, txbuf->map,
594 0, txbuf->m_head->m_pkthdr.len,
595 BUS_DMASYNC_POSTWRITE);
596 ixgbe_dmamap_unload(txr->txtag, txbuf->map);
597 m_freem(txbuf->m_head);
598 txbuf->m_head = NULL;
599 }
600 #ifdef DEV_NETMAP
601 /*
602 * In netmap mode, set the map for the packet buffer.
603 * NOTE: Some drivers (not this one) also need to set
604 * the physical buffer address in the NIC ring.
605 * Slots in the netmap ring (indexed by "si") are
606 * kring->nkr_hwofs positions "ahead" wrt the
607 * corresponding slot in the NIC ring. In some drivers
608 * (not here) nkr_hwofs can be negative. Function
609 * netmap_idx_n2k() handles wraparounds properly.
610 */
611 if (slot) {
612 int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
613 netmap_load_map(na, txr->txtag,
614 txbuf->map, NMB(na, slot + si));
615 }
616 #endif /* DEV_NETMAP */
617 /* Clear the EOP descriptor pointer */
618 txbuf->eop = NULL;
619 }
620
621 #ifdef IXGBE_FDIR
622 /* Set the rate at which we sample packets */
623 if (adapter->hw.mac.type != ixgbe_mac_82598EB)
624 txr->atr_sample = atr_sample_rate;
625 #endif
626
627 /* Set number of descriptors available */
628 txr->tx_avail = adapter->num_tx_desc;
629
630 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
631 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
632 IXGBE_TX_UNLOCK(txr);
633 }
634
635 /*********************************************************************
636 *
637 * Initialize all transmit rings.
638 *
639 **********************************************************************/
640 int
641 ixgbe_setup_transmit_structures(struct adapter *adapter)
642 {
643 struct tx_ring *txr = adapter->tx_rings;
644
645 for (int i = 0; i < adapter->num_queues; i++, txr++)
646 ixgbe_setup_transmit_ring(txr);
647
648 return (0);
649 }
650
651 /*********************************************************************
652 *
653 * Free all transmit rings.
654 *
655 **********************************************************************/
656 void
657 ixgbe_free_transmit_structures(struct adapter *adapter)
658 {
659 struct tx_ring *txr = adapter->tx_rings;
660
661 for (int i = 0; i < adapter->num_queues; i++, txr++) {
662 ixgbe_free_transmit_buffers(txr);
663 ixgbe_dma_free(adapter, &txr->txdma);
664 IXGBE_TX_LOCK_DESTROY(txr);
665 }
666 free(adapter->tx_rings, M_DEVBUF);
667 }
668
669 /*********************************************************************
670 *
671 * Free transmit ring related data structures.
672 *
673 **********************************************************************/
674 static void
675 ixgbe_free_transmit_buffers(struct tx_ring *txr)
676 {
677 struct adapter *adapter = txr->adapter;
678 struct ixgbe_tx_buf *tx_buffer;
679 int i;
680
681 INIT_DEBUGOUT("ixgbe_free_transmit_buffers: begin");
682
683 if (txr->tx_buffers == NULL)
684 return;
685
686 tx_buffer = txr->tx_buffers;
687 for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
688 if (tx_buffer->m_head != NULL) {
689 bus_dmamap_sync(txr->txtag->dt_dmat, tx_buffer->map,
690 0, tx_buffer->m_head->m_pkthdr.len,
691 BUS_DMASYNC_POSTWRITE);
692 ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
693 m_freem(tx_buffer->m_head);
694 tx_buffer->m_head = NULL;
695 if (tx_buffer->map != NULL) {
696 ixgbe_dmamap_destroy(txr->txtag,
697 tx_buffer->map);
698 tx_buffer->map = NULL;
699 }
700 } else if (tx_buffer->map != NULL) {
701 ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
702 ixgbe_dmamap_destroy(txr->txtag, tx_buffer->map);
703 tx_buffer->map = NULL;
704 }
705 }
706 #ifndef IXGBE_LEGACY_TX
707 if (txr->txr_interq != NULL) {
708 struct mbuf *m;
709
710 while ((m = pcq_get(txr->txr_interq)) != NULL)
711 m_freem(m);
712 pcq_destroy(txr->txr_interq);
713 }
714 #endif
715 if (txr->tx_buffers != NULL) {
716 free(txr->tx_buffers, M_DEVBUF);
717 txr->tx_buffers = NULL;
718 }
719 if (txr->txtag != NULL) {
720 ixgbe_dma_tag_destroy(txr->txtag);
721 txr->txtag = NULL;
722 }
723 return;
724 }
725
726 /*********************************************************************
727 *
728 * Advanced Context Descriptor setup for VLAN, CSUM or TSO
729 *
730 **********************************************************************/
731
732 static int
733 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
734 u32 *cmd_type_len, u32 *olinfo_status)
735 {
736 struct adapter *adapter = txr->adapter;
737 struct ethercom *ec = &adapter->osdep.ec;
738 struct m_tag *mtag;
739 struct ixgbe_adv_tx_context_desc *TXD;
740 struct ether_vlan_header *eh;
741 #ifdef INET
742 struct ip *ip;
743 #endif
744 #ifdef INET6
745 struct ip6_hdr *ip6;
746 #endif
747 u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
748 int ehdrlen, ip_hlen = 0;
749 u16 etype;
750 u8 ipproto = 0;
751 int offload = TRUE;
752 int ctxd = txr->next_avail_desc;
753 u16 vtag = 0;
754 char *l3d;
755
756
757 /* First check if TSO is to be used */
758 if (mp->m_pkthdr.csum_flags & (M_CSUM_TSOv4|M_CSUM_TSOv6)) {
759 int rv = ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status);
760
761 if (rv != 0)
762 ++adapter->tso_err.ev_count;
763 return rv;
764 }
765
766 if ((mp->m_pkthdr.csum_flags & M_CSUM_OFFLOAD) == 0)
767 offload = FALSE;
768
769 /* Indicate the whole packet as payload when not doing TSO */
770 *olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
771
772 /* Now ready a context descriptor */
773 TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
774
775 /*
776 ** In advanced descriptors the vlan tag must
777 ** be placed into the context descriptor. Hence
778 ** we need to make one even if not doing offloads.
779 */
780 if ((mtag = VLAN_OUTPUT_TAG(ec, mp)) != NULL) {
781 vtag = htole16(VLAN_TAG_VALUE(mtag) & 0xffff);
782 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
783 } else if (!IXGBE_IS_X550VF(adapter) && (offload == FALSE))
784 return (0);
785
786 /*
787 * Determine where frame payload starts.
788 * Jump over vlan headers if already present,
789 * helpful for QinQ too.
790 */
791 KASSERT(mp->m_len >= offsetof(struct ether_vlan_header, evl_tag));
792 eh = mtod(mp, struct ether_vlan_header *);
793 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
794 KASSERT(mp->m_len >= sizeof(struct ether_vlan_header));
795 etype = ntohs(eh->evl_proto);
796 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
797 } else {
798 etype = ntohs(eh->evl_encap_proto);
799 ehdrlen = ETHER_HDR_LEN;
800 }
801
802 /* Set the ether header length */
803 vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
804
805 if (offload == FALSE)
806 goto no_offloads;
807
808 /*
809 * If the first mbuf only includes the ethernet header, jump to the next one
810 * XXX: This assumes the stack splits mbufs containing headers on header boundaries
811 * XXX: And assumes the entire IP header is contained in one mbuf
812 */
813 if (mp->m_len == ehdrlen && mp->m_next)
814 l3d = mtod(mp->m_next, char *);
815 else
816 l3d = mtod(mp, char *) + ehdrlen;
817
818 switch (etype) {
819 #ifdef INET
820 case ETHERTYPE_IP:
821 ip = (struct ip *)(l3d);
822 ip_hlen = ip->ip_hl << 2;
823 ipproto = ip->ip_p;
824 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
825 KASSERT((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) == 0 ||
826 ip->ip_sum == 0);
827 break;
828 #endif
829 #ifdef INET6
830 case ETHERTYPE_IPV6:
831 ip6 = (struct ip6_hdr *)(l3d);
832 ip_hlen = sizeof(struct ip6_hdr);
833 ipproto = ip6->ip6_nxt;
834 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
835 break;
836 #endif
837 default:
838 offload = false;
839 break;
840 }
841
842 if ((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) != 0)
843 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
844
845 vlan_macip_lens |= ip_hlen;
846
847 /* No support for offloads for non-L4 next headers */
848 switch (ipproto) {
849 case IPPROTO_TCP:
850 if (mp->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_TCPv6))
851
852 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
853 else
854 offload = false;
855 break;
856 case IPPROTO_UDP:
857 if (mp->m_pkthdr.csum_flags & (M_CSUM_UDPv4|M_CSUM_UDPv6))
858 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
859 else
860 offload = false;
861 break;
862 default:
863 offload = false;
864 break;
865 }
866
867 if (offload) /* Insert L4 checksum into data descriptors */
868 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
869
870 no_offloads:
871 type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
872
873 /* Now copy bits into descriptor */
874 TXD->vlan_macip_lens = htole32(vlan_macip_lens);
875 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
876 TXD->seqnum_seed = htole32(0);
877 TXD->mss_l4len_idx = htole32(0);
878
879 /* We've consumed the first desc, adjust counters */
880 if (++ctxd == txr->num_desc)
881 ctxd = 0;
882 txr->next_avail_desc = ctxd;
883 --txr->tx_avail;
884
885 return 0;
886 }
887
888 /**********************************************************************
889 *
890 * Setup work for hardware segmentation offload (TSO) on
891 * adapters using advanced tx descriptors
892 *
893 **********************************************************************/
894 static int
895 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp,
896 u32 *cmd_type_len, u32 *olinfo_status)
897 {
898 struct m_tag *mtag;
899 struct adapter *adapter = txr->adapter;
900 struct ethercom *ec = &adapter->osdep.ec;
901 struct ixgbe_adv_tx_context_desc *TXD;
902 u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
903 u32 mss_l4len_idx = 0, paylen;
904 u16 vtag = 0, eh_type;
905 int ctxd, ehdrlen, ip_hlen, tcp_hlen;
906 struct ether_vlan_header *eh;
907 #ifdef INET6
908 struct ip6_hdr *ip6;
909 #endif
910 #ifdef INET
911 struct ip *ip;
912 #endif
913 struct tcphdr *th;
914
915 /*
916 * Determine where frame payload starts.
917 * Jump over vlan headers if already present
918 */
919 eh = mtod(mp, struct ether_vlan_header *);
920 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
921 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
922 eh_type = eh->evl_proto;
923 } else {
924 ehdrlen = ETHER_HDR_LEN;
925 eh_type = eh->evl_encap_proto;
926 }
927
928 switch (ntohs(eh_type)) {
929 #ifdef INET6
930 case ETHERTYPE_IPV6:
931 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
932 /* XXX-BZ For now we do not pretend to support ext. hdrs. */
933 if (ip6->ip6_nxt != IPPROTO_TCP)
934 return (ENXIO);
935 ip_hlen = sizeof(struct ip6_hdr);
936 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
937 th = (struct tcphdr *)((char *)ip6 + ip_hlen);
938 th->th_sum = in6_cksum_phdr(&ip6->ip6_src,
939 &ip6->ip6_dst, 0, htonl(IPPROTO_TCP));
940 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
941 break;
942 #endif
943 #ifdef INET
944 case ETHERTYPE_IP:
945 ip = (struct ip *)(mp->m_data + ehdrlen);
946 if (ip->ip_p != IPPROTO_TCP)
947 return (ENXIO);
948 ip->ip_sum = 0;
949 ip_hlen = ip->ip_hl << 2;
950 th = (struct tcphdr *)((char *)ip + ip_hlen);
951 th->th_sum = in_cksum_phdr(ip->ip_src.s_addr,
952 ip->ip_dst.s_addr, htons(IPPROTO_TCP));
953 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
954 /* Tell transmit desc to also do IPv4 checksum. */
955 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
956 break;
957 #endif
958 default:
959 panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
960 __func__, ntohs(eh_type));
961 break;
962 }
963
964 ctxd = txr->next_avail_desc;
965 TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
966
967 tcp_hlen = th->th_off << 2;
968
969 /* This is used in the transmit desc in encap */
970 paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
971
972 /* VLAN MACLEN IPLEN */
973 if ((mtag = VLAN_OUTPUT_TAG(ec, mp)) != NULL) {
974 vtag = htole16(VLAN_TAG_VALUE(mtag) & 0xffff);
975 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
976 }
977
978 vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
979 vlan_macip_lens |= ip_hlen;
980 TXD->vlan_macip_lens = htole32(vlan_macip_lens);
981
982 /* ADV DTYPE TUCMD */
983 type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
984 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
985 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
986
987 /* MSS L4LEN IDX */
988 mss_l4len_idx |= (mp->m_pkthdr.segsz << IXGBE_ADVTXD_MSS_SHIFT);
989 mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
990 TXD->mss_l4len_idx = htole32(mss_l4len_idx);
991
992 TXD->seqnum_seed = htole32(0);
993
994 if (++ctxd == txr->num_desc)
995 ctxd = 0;
996
997 txr->tx_avail--;
998 txr->next_avail_desc = ctxd;
999 *cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
1000 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
1001 *olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
1002 ++txr->tso_tx.ev_count;
1003 return (0);
1004 }
1005
1006
1007 /**********************************************************************
1008 *
1009 * Examine each tx_buffer in the used queue. If the hardware is done
1010 * processing the packet then free associated resources. The
1011 * tx_buffer is put back on the free queue.
1012 *
1013 **********************************************************************/
1014 void
1015 ixgbe_txeof(struct tx_ring *txr)
1016 {
1017 struct adapter *adapter = txr->adapter;
1018 struct ifnet *ifp = adapter->ifp;
1019 u32 work, processed = 0;
1020 u32 limit = adapter->tx_process_limit;
1021 struct ixgbe_tx_buf *buf;
1022 union ixgbe_adv_tx_desc *txd;
1023
1024 KASSERT(mutex_owned(&txr->tx_mtx));
1025
1026 #ifdef DEV_NETMAP
1027 if (ifp->if_capenable & IFCAP_NETMAP) {
1028 struct netmap_adapter *na = NA(ifp);
1029 struct netmap_kring *kring = &na->tx_rings[txr->me];
1030 txd = txr->tx_base;
1031 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1032 BUS_DMASYNC_POSTREAD);
1033 /*
1034 * In netmap mode, all the work is done in the context
1035 * of the client thread. Interrupt handlers only wake up
1036 * clients, which may be sleeping on individual rings
1037 * or on a global resource for all rings.
1038 * To implement tx interrupt mitigation, we wake up the client
1039 * thread roughly every half ring, even if the NIC interrupts
1040 * more frequently. This is implemented as follows:
1041 * - ixgbe_txsync() sets kring->nr_kflags with the index of
1042 * the slot that should wake up the thread (nkr_num_slots
1043 * means the user thread should not be woken up);
1044 * - the driver ignores tx interrupts unless netmap_mitigate=0
1045 * or the slot has the DD bit set.
1046 */
1047 if (!netmap_mitigate ||
1048 (kring->nr_kflags < kring->nkr_num_slots &&
1049 txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD)) {
1050 netmap_tx_irq(ifp, txr->me);
1051 }
1052 return;
1053 }
1054 #endif /* DEV_NETMAP */
1055
1056 if (txr->tx_avail == txr->num_desc) {
1057 txr->busy = 0;
1058 return;
1059 }
1060
1061 /* Get work starting point */
1062 work = txr->next_to_clean;
1063 buf = &txr->tx_buffers[work];
1064 txd = &txr->tx_base[work];
1065 work -= txr->num_desc; /* The distance to ring end */
1066 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1067 BUS_DMASYNC_POSTREAD);
1068
1069 do {
1070 union ixgbe_adv_tx_desc *eop = buf->eop;
1071 if (eop == NULL) /* No work */
1072 break;
1073
1074 if ((eop->wb.status & IXGBE_TXD_STAT_DD) == 0)
1075 break; /* I/O not complete */
1076
1077 if (buf->m_head) {
1078 txr->bytes +=
1079 buf->m_head->m_pkthdr.len;
1080 bus_dmamap_sync(txr->txtag->dt_dmat,
1081 buf->map,
1082 0, buf->m_head->m_pkthdr.len,
1083 BUS_DMASYNC_POSTWRITE);
1084 ixgbe_dmamap_unload(txr->txtag,
1085 buf->map);
1086 m_freem(buf->m_head);
1087 buf->m_head = NULL;
1088 }
1089 buf->eop = NULL;
1090 ++txr->tx_avail;
1091
1092 /* We clean the range if multi segment */
1093 while (txd != eop) {
1094 ++txd;
1095 ++buf;
1096 ++work;
1097 /* wrap the ring? */
1098 if (__predict_false(!work)) {
1099 work -= txr->num_desc;
1100 buf = txr->tx_buffers;
1101 txd = txr->tx_base;
1102 }
1103 if (buf->m_head) {
1104 txr->bytes +=
1105 buf->m_head->m_pkthdr.len;
1106 bus_dmamap_sync(txr->txtag->dt_dmat,
1107 buf->map,
1108 0, buf->m_head->m_pkthdr.len,
1109 BUS_DMASYNC_POSTWRITE);
1110 ixgbe_dmamap_unload(txr->txtag,
1111 buf->map);
1112 m_freem(buf->m_head);
1113 buf->m_head = NULL;
1114 }
1115 ++txr->tx_avail;
1116 buf->eop = NULL;
1117
1118 }
1119 ++txr->packets;
1120 ++processed;
1121 ++ifp->if_opackets;
1122
1123 /* Try the next packet */
1124 ++txd;
1125 ++buf;
1126 ++work;
1127 /* reset with a wrap */
1128 if (__predict_false(!work)) {
1129 work -= txr->num_desc;
1130 buf = txr->tx_buffers;
1131 txd = txr->tx_base;
1132 }
1133 prefetch(txd);
1134 } while (__predict_true(--limit));
1135
1136 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1137 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1138
1139 work += txr->num_desc;
1140 txr->next_to_clean = work;
1141
1142 /*
1143 ** Queue Hang detection, we know there's
1144 ** work outstanding or the first return
1145 ** would have been taken, so increment busy
1146 ** if nothing managed to get cleaned, then
1147 ** in local_timer it will be checked and
1148 ** marked as HUNG if it exceeds a MAX attempt.
1149 */
1150 if ((processed == 0) && (txr->busy != IXGBE_QUEUE_HUNG))
1151 ++txr->busy;
1152 /*
1153 ** If anything gets cleaned we reset state to 1,
1154 ** note this will turn off HUNG if its set.
1155 */
1156 if (processed)
1157 txr->busy = 1;
1158
1159 if (txr->tx_avail == txr->num_desc)
1160 txr->busy = 0;
1161
1162 return;
1163 }
1164
1165
1166 #ifdef IXGBE_FDIR
1167 /*
1168 ** This routine parses packet headers so that Flow
1169 ** Director can make a hashed filter table entry
1170 ** allowing traffic flows to be identified and kept
1171 ** on the same cpu. This would be a performance
1172 ** hit, but we only do it at IXGBE_FDIR_RATE of
1173 ** packets.
1174 */
1175 static void
1176 ixgbe_atr(struct tx_ring *txr, struct mbuf *mp)
1177 {
1178 struct adapter *adapter = txr->adapter;
1179 struct ix_queue *que;
1180 struct ip *ip;
1181 struct tcphdr *th;
1182 struct udphdr *uh;
1183 struct ether_vlan_header *eh;
1184 union ixgbe_atr_hash_dword input = {.dword = 0};
1185 union ixgbe_atr_hash_dword common = {.dword = 0};
1186 int ehdrlen, ip_hlen;
1187 u16 etype;
1188
1189 eh = mtod(mp, struct ether_vlan_header *);
1190 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
1191 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
1192 etype = eh->evl_proto;
1193 } else {
1194 ehdrlen = ETHER_HDR_LEN;
1195 etype = eh->evl_encap_proto;
1196 }
1197
1198 /* Only handling IPv4 */
1199 if (etype != htons(ETHERTYPE_IP))
1200 return;
1201
1202 ip = (struct ip *)(mp->m_data + ehdrlen);
1203 ip_hlen = ip->ip_hl << 2;
1204
1205 /* check if we're UDP or TCP */
1206 switch (ip->ip_p) {
1207 case IPPROTO_TCP:
1208 th = (struct tcphdr *)((char *)ip + ip_hlen);
1209 /* src and dst are inverted */
1210 common.port.dst ^= th->th_sport;
1211 common.port.src ^= th->th_dport;
1212 input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_TCPV4;
1213 break;
1214 case IPPROTO_UDP:
1215 uh = (struct udphdr *)((char *)ip + ip_hlen);
1216 /* src and dst are inverted */
1217 common.port.dst ^= uh->uh_sport;
1218 common.port.src ^= uh->uh_dport;
1219 input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_UDPV4;
1220 break;
1221 default:
1222 return;
1223 }
1224
1225 input.formatted.vlan_id = htobe16(mp->m_pkthdr.ether_vtag);
1226 if (mp->m_pkthdr.ether_vtag)
1227 common.flex_bytes ^= htons(ETHERTYPE_VLAN);
1228 else
1229 common.flex_bytes ^= etype;
1230 common.ip ^= ip->ip_src.s_addr ^ ip->ip_dst.s_addr;
1231
1232 que = &adapter->queues[txr->me];
1233 /*
1234 ** This assumes the Rx queue and Tx
1235 ** queue are bound to the same CPU
1236 */
1237 ixgbe_fdir_add_signature_filter_82599(&adapter->hw,
1238 input, common, que->msix);
1239 }
1240 #endif /* IXGBE_FDIR */
1241
1242 /*
1243 ** Used to detect a descriptor that has
1244 ** been merged by Hardware RSC.
1245 */
1246 static inline u32
1247 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1248 {
1249 return (le32toh(rx->wb.lower.lo_dword.data) &
1250 IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1251 }
1252
1253 /*********************************************************************
1254 *
1255 * Initialize Hardware RSC (LRO) feature on 82599
1256 * for an RX ring, this is toggled by the LRO capability
1257 * even though it is transparent to the stack.
1258 *
1259 * NOTE: since this HW feature only works with IPV4 and
1260 * our testing has shown soft LRO to be as effective
1261 * I have decided to disable this by default.
1262 *
1263 **********************************************************************/
1264 static void
1265 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
1266 {
1267 struct adapter *adapter = rxr->adapter;
1268 struct ixgbe_hw *hw = &adapter->hw;
1269 u32 rscctrl, rdrxctl;
1270
1271 /* If turning LRO/RSC off we need to disable it */
1272 if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) {
1273 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1274 rscctrl &= ~IXGBE_RSCCTL_RSCEN;
1275 return;
1276 }
1277
1278 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
1279 rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
1280 #ifdef DEV_NETMAP /* crcstrip is optional in netmap */
1281 if (adapter->ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip)
1282 #endif /* DEV_NETMAP */
1283 rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
1284 rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
1285 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
1286
1287 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1288 rscctrl |= IXGBE_RSCCTL_RSCEN;
1289 /*
1290 ** Limit the total number of descriptors that
1291 ** can be combined, so it does not exceed 64K
1292 */
1293 if (rxr->mbuf_sz == MCLBYTES)
1294 rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
1295 else if (rxr->mbuf_sz == MJUMPAGESIZE)
1296 rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
1297 else if (rxr->mbuf_sz == MJUM9BYTES)
1298 rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
1299 else /* Using 16K cluster */
1300 rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
1301
1302 IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
1303
1304 /* Enable TCP header recognition */
1305 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
1306 (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) |
1307 IXGBE_PSRTYPE_TCPHDR));
1308
1309 /* Disable RSC for ACK packets */
1310 IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
1311 (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
1312
1313 rxr->hw_rsc = TRUE;
1314 }
1315
1316 /*********************************************************************
1317 *
1318 * Refresh mbuf buffers for RX descriptor rings
1319 * - now keeps its own state so discards due to resource
1320 * exhaustion are unnecessary, if an mbuf cannot be obtained
1321 * it just returns, keeping its placeholder, thus it can simply
1322 * be recalled to try again.
1323 *
1324 **********************************************************************/
1325 static void
1326 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
1327 {
1328 struct adapter *adapter = rxr->adapter;
1329 struct ixgbe_rx_buf *rxbuf;
1330 struct mbuf *mp;
1331 int i, j, error;
1332 bool refreshed = false;
1333
1334 i = j = rxr->next_to_refresh;
1335 /* Control the loop with one beyond */
1336 if (++j == rxr->num_desc)
1337 j = 0;
1338
1339 while (j != limit) {
1340 rxbuf = &rxr->rx_buffers[i];
1341 if (rxbuf->buf == NULL) {
1342 mp = ixgbe_getjcl(&adapter->jcl_head, M_NOWAIT,
1343 MT_DATA, M_PKTHDR, rxr->mbuf_sz);
1344 if (mp == NULL) {
1345 rxr->no_jmbuf.ev_count++;
1346 goto update;
1347 }
1348 if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
1349 m_adj(mp, ETHER_ALIGN);
1350 } else
1351 mp = rxbuf->buf;
1352
1353 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1354
1355 /* If we're dealing with an mbuf that was copied rather
1356 * than replaced, there's no need to go through busdma.
1357 */
1358 if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
1359 /* Get the memory mapping */
1360 ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
1361 error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat,
1362 rxbuf->pmap, mp, BUS_DMA_NOWAIT);
1363 if (error != 0) {
1364 printf("Refresh mbufs: payload dmamap load"
1365 " failure - %d\n", error);
1366 m_free(mp);
1367 rxbuf->buf = NULL;
1368 goto update;
1369 }
1370 rxbuf->buf = mp;
1371 bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
1372 0, mp->m_pkthdr.len, BUS_DMASYNC_PREREAD);
1373 rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
1374 htole64(rxbuf->pmap->dm_segs[0].ds_addr);
1375 } else {
1376 rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
1377 rxbuf->flags &= ~IXGBE_RX_COPY;
1378 }
1379
1380 refreshed = true;
1381 /* Next is precalculated */
1382 i = j;
1383 rxr->next_to_refresh = i;
1384 if (++j == rxr->num_desc)
1385 j = 0;
1386 }
1387 update:
1388 if (refreshed) /* Update hardware tail index */
1389 IXGBE_WRITE_REG(&adapter->hw,
1390 rxr->tail, rxr->next_to_refresh);
1391 return;
1392 }
1393
1394 /*********************************************************************
1395 *
1396 * Allocate memory for rx_buffer structures. Since we use one
1397 * rx_buffer per received packet, the maximum number of rx_buffer's
1398 * that we'll need is equal to the number of receive descriptors
1399 * that we've allocated.
1400 *
1401 **********************************************************************/
1402 int
1403 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
1404 {
1405 struct adapter *adapter = rxr->adapter;
1406 device_t dev = adapter->dev;
1407 struct ixgbe_rx_buf *rxbuf;
1408 int bsize, error;
1409
1410 bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
1411 if (!(rxr->rx_buffers =
1412 (struct ixgbe_rx_buf *) malloc(bsize,
1413 M_DEVBUF, M_NOWAIT | M_ZERO))) {
1414 aprint_error_dev(dev, "Unable to allocate rx_buffer memory\n");
1415 error = ENOMEM;
1416 goto fail;
1417 }
1418
1419 if ((error = ixgbe_dma_tag_create(adapter->osdep.dmat, /* parent */
1420 1, 0, /* alignment, bounds */
1421 MJUM16BYTES, /* maxsize */
1422 1, /* nsegments */
1423 MJUM16BYTES, /* maxsegsize */
1424 0, /* flags */
1425 &rxr->ptag))) {
1426 aprint_error_dev(dev, "Unable to create RX DMA tag\n");
1427 goto fail;
1428 }
1429
1430 for (int i = 0; i < rxr->num_desc; i++, rxbuf++) {
1431 rxbuf = &rxr->rx_buffers[i];
1432 error = ixgbe_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
1433 if (error) {
1434 aprint_error_dev(dev, "Unable to create RX dma map\n");
1435 goto fail;
1436 }
1437 }
1438
1439 return (0);
1440
1441 fail:
1442 /* Frees all, but can handle partial completion */
1443 ixgbe_free_receive_structures(adapter);
1444 return (error);
1445 }
1446
1447 static void
1448 ixgbe_free_receive_ring(struct rx_ring *rxr)
1449 {
1450 struct ixgbe_rx_buf *rxbuf;
1451
1452 for (int i = 0; i < rxr->num_desc; i++) {
1453 rxbuf = &rxr->rx_buffers[i];
1454 if (rxbuf->buf != NULL) {
1455 bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
1456 0, rxbuf->buf->m_pkthdr.len,
1457 BUS_DMASYNC_POSTREAD);
1458 ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
1459 rxbuf->buf->m_flags |= M_PKTHDR;
1460 m_freem(rxbuf->buf);
1461 rxbuf->buf = NULL;
1462 rxbuf->flags = 0;
1463 }
1464 }
1465 }
1466
1467 /*********************************************************************
1468 *
1469 * Initialize a receive ring and its buffers.
1470 *
1471 **********************************************************************/
1472 static int
1473 ixgbe_setup_receive_ring(struct rx_ring *rxr)
1474 {
1475 struct adapter *adapter;
1476 struct ixgbe_rx_buf *rxbuf;
1477 #ifdef LRO
1478 struct ifnet *ifp;
1479 struct lro_ctrl *lro = &rxr->lro;
1480 #endif /* LRO */
1481 int rsize, error = 0;
1482 #ifdef DEV_NETMAP
1483 struct netmap_adapter *na = NA(rxr->adapter->ifp);
1484 struct netmap_slot *slot;
1485 #endif /* DEV_NETMAP */
1486
1487 adapter = rxr->adapter;
1488 #ifdef LRO
1489 ifp = adapter->ifp;
1490 #endif /* LRO */
1491
1492 /* Clear the ring contents */
1493 IXGBE_RX_LOCK(rxr);
1494 #ifdef DEV_NETMAP
1495 /* same as in ixgbe_setup_transmit_ring() */
1496 slot = netmap_reset(na, NR_RX, rxr->me, 0);
1497 #endif /* DEV_NETMAP */
1498 rsize = roundup2(adapter->num_rx_desc *
1499 sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
1500 bzero((void *)rxr->rx_base, rsize);
1501 /* Cache the size */
1502 rxr->mbuf_sz = adapter->rx_mbuf_sz;
1503
1504 /* Free current RX buffer structs and their mbufs */
1505 ixgbe_free_receive_ring(rxr);
1506
1507 IXGBE_RX_UNLOCK(rxr);
1508
1509 /* Now reinitialize our supply of jumbo mbufs. The number
1510 * or size of jumbo mbufs may have changed.
1511 */
1512 ixgbe_jcl_reinit(&adapter->jcl_head, rxr->ptag->dt_dmat,
1513 2 * adapter->num_rx_desc, adapter->rx_mbuf_sz);
1514
1515 IXGBE_RX_LOCK(rxr);
1516
1517 /* Now replenish the mbufs */
1518 for (int j = 0; j != rxr->num_desc; ++j) {
1519 struct mbuf *mp;
1520
1521 rxbuf = &rxr->rx_buffers[j];
1522 #ifdef DEV_NETMAP
1523 /*
1524 * In netmap mode, fill the map and set the buffer
1525 * address in the NIC ring, considering the offset
1526 * between the netmap and NIC rings (see comment in
1527 * ixgbe_setup_transmit_ring() ). No need to allocate
1528 * an mbuf, so end the block with a continue;
1529 */
1530 if (slot) {
1531 int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
1532 uint64_t paddr;
1533 void *addr;
1534
1535 addr = PNMB(na, slot + sj, &paddr);
1536 netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
1537 /* Update descriptor and the cached value */
1538 rxr->rx_base[j].read.pkt_addr = htole64(paddr);
1539 rxbuf->addr = htole64(paddr);
1540 continue;
1541 }
1542 #endif /* DEV_NETMAP */
1543 rxbuf->flags = 0;
1544 rxbuf->buf = ixgbe_getjcl(&adapter->jcl_head, M_NOWAIT,
1545 MT_DATA, M_PKTHDR, adapter->rx_mbuf_sz);
1546 if (rxbuf->buf == NULL) {
1547 error = ENOBUFS;
1548 goto fail;
1549 }
1550 mp = rxbuf->buf;
1551 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1552 /* Get the memory mapping */
1553 error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat,
1554 rxbuf->pmap, mp, BUS_DMA_NOWAIT);
1555 if (error != 0)
1556 goto fail;
1557 bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
1558 0, adapter->rx_mbuf_sz, BUS_DMASYNC_PREREAD);
1559 /* Update the descriptor and the cached value */
1560 rxr->rx_base[j].read.pkt_addr =
1561 htole64(rxbuf->pmap->dm_segs[0].ds_addr);
1562 rxbuf->addr = htole64(rxbuf->pmap->dm_segs[0].ds_addr);
1563 }
1564
1565
1566 /* Setup our descriptor indices */
1567 rxr->next_to_check = 0;
1568 rxr->next_to_refresh = 0;
1569 rxr->lro_enabled = FALSE;
1570 rxr->rx_copies.ev_count = 0;
1571 #if 0 /* NetBSD */
1572 rxr->rx_bytes.ev_count = 0;
1573 #if 1 /* Fix inconsistency */
1574 rxr->rx_packets.ev_count = 0;
1575 #endif
1576 #endif
1577 rxr->vtag_strip = FALSE;
1578
1579 ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1580 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1581
1582 /*
1583 ** Now set up the LRO interface:
1584 */
1585 if (ixgbe_rsc_enable)
1586 ixgbe_setup_hw_rsc(rxr);
1587 #ifdef LRO
1588 else if (ifp->if_capenable & IFCAP_LRO) {
1589 device_t dev = adapter->dev;
1590 int err = tcp_lro_init(lro);
1591 if (err) {
1592 device_printf(dev, "LRO Initialization failed!\n");
1593 goto fail;
1594 }
1595 INIT_DEBUGOUT("RX Soft LRO Initialized\n");
1596 rxr->lro_enabled = TRUE;
1597 lro->ifp = adapter->ifp;
1598 }
1599 #endif /* LRO */
1600
1601 IXGBE_RX_UNLOCK(rxr);
1602 return (0);
1603
1604 fail:
1605 ixgbe_free_receive_ring(rxr);
1606 IXGBE_RX_UNLOCK(rxr);
1607 return (error);
1608 }
1609
1610 /*********************************************************************
1611 *
1612 * Initialize all receive rings.
1613 *
1614 **********************************************************************/
1615 int
1616 ixgbe_setup_receive_structures(struct adapter *adapter)
1617 {
1618 struct rx_ring *rxr = adapter->rx_rings;
1619 int j;
1620
1621 for (j = 0; j < adapter->num_queues; j++, rxr++)
1622 if (ixgbe_setup_receive_ring(rxr))
1623 goto fail;
1624
1625 return (0);
1626 fail:
1627 /*
1628 * Free RX buffers allocated so far, we will only handle
1629 * the rings that completed, the failing case will have
1630 * cleaned up for itself. 'j' failed, so its the terminus.
1631 */
1632 for (int i = 0; i < j; ++i) {
1633 rxr = &adapter->rx_rings[i];
1634 ixgbe_free_receive_ring(rxr);
1635 }
1636
1637 return (ENOBUFS);
1638 }
1639
1640
1641 /*********************************************************************
1642 *
1643 * Free all receive rings.
1644 *
1645 **********************************************************************/
1646 void
1647 ixgbe_free_receive_structures(struct adapter *adapter)
1648 {
1649 struct rx_ring *rxr = adapter->rx_rings;
1650
1651 INIT_DEBUGOUT("ixgbe_free_receive_structures: begin");
1652
1653 for (int i = 0; i < adapter->num_queues; i++, rxr++) {
1654 #ifdef LRO
1655 struct lro_ctrl *lro = &rxr->lro;
1656 #endif /* LRO */
1657 ixgbe_free_receive_buffers(rxr);
1658 #ifdef LRO
1659 /* Free LRO memory */
1660 tcp_lro_free(lro);
1661 #endif /* LRO */
1662 /* Free the ring memory as well */
1663 ixgbe_dma_free(adapter, &rxr->rxdma);
1664 IXGBE_RX_LOCK_DESTROY(rxr);
1665 }
1666
1667 free(adapter->rx_rings, M_DEVBUF);
1668 }
1669
1670
1671 /*********************************************************************
1672 *
1673 * Free receive ring data structures
1674 *
1675 **********************************************************************/
1676 static void
1677 ixgbe_free_receive_buffers(struct rx_ring *rxr)
1678 {
1679 struct adapter *adapter = rxr->adapter;
1680 struct ixgbe_rx_buf *rxbuf;
1681
1682 INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin");
1683
1684 /* Cleanup any existing buffers */
1685 if (rxr->rx_buffers != NULL) {
1686 for (int i = 0; i < adapter->num_rx_desc; i++) {
1687 rxbuf = &rxr->rx_buffers[i];
1688 if (rxbuf->buf != NULL) {
1689 bus_dmamap_sync(rxr->ptag->dt_dmat,
1690 rxbuf->pmap, 0, rxbuf->buf->m_pkthdr.len,
1691 BUS_DMASYNC_POSTREAD);
1692 ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
1693 rxbuf->buf->m_flags |= M_PKTHDR;
1694 m_freem(rxbuf->buf);
1695 }
1696 rxbuf->buf = NULL;
1697 if (rxbuf->pmap != NULL) {
1698 ixgbe_dmamap_destroy(rxr->ptag, rxbuf->pmap);
1699 rxbuf->pmap = NULL;
1700 }
1701 }
1702 if (rxr->rx_buffers != NULL) {
1703 free(rxr->rx_buffers, M_DEVBUF);
1704 rxr->rx_buffers = NULL;
1705 }
1706 }
1707
1708 if (rxr->ptag != NULL) {
1709 ixgbe_dma_tag_destroy(rxr->ptag);
1710 rxr->ptag = NULL;
1711 }
1712
1713 return;
1714 }
1715
1716 static __inline void
1717 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
1718 {
1719 struct adapter *adapter = ifp->if_softc;
1720
1721 #ifdef LRO
1722 struct ethercom *ec = &adapter->osdep.ec;
1723
1724 /*
1725 * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
1726 * should be computed by hardware. Also it should not have VLAN tag in
1727 * ethernet header. In case of IPv6 we do not yet support ext. hdrs.
1728 */
1729 if (rxr->lro_enabled &&
1730 (ec->ec_capenable & ETHERCAP_VLAN_HWTAGGING) != 0 &&
1731 (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
1732 ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1733 (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
1734 (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1735 (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
1736 (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
1737 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
1738 /*
1739 * Send to the stack if:
1740 ** - LRO not enabled, or
1741 ** - no LRO resources, or
1742 ** - lro enqueue fails
1743 */
1744 if (rxr->lro.lro_cnt != 0)
1745 if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
1746 return;
1747 }
1748 #endif /* LRO */
1749
1750 IXGBE_RX_UNLOCK(rxr);
1751
1752 if_percpuq_enqueue(adapter->ipq, m);
1753
1754 IXGBE_RX_LOCK(rxr);
1755 }
1756
1757 static __inline void
1758 ixgbe_rx_discard(struct rx_ring *rxr, int i)
1759 {
1760 struct ixgbe_rx_buf *rbuf;
1761
1762 rbuf = &rxr->rx_buffers[i];
1763
1764
1765 /*
1766 ** With advanced descriptors the writeback
1767 ** clobbers the buffer addrs, so its easier
1768 ** to just free the existing mbufs and take
1769 ** the normal refresh path to get new buffers
1770 ** and mapping.
1771 */
1772
1773 if (rbuf->buf != NULL) {/* Partial chain ? */
1774 rbuf->fmp->m_flags |= M_PKTHDR;
1775 m_freem(rbuf->fmp);
1776 rbuf->fmp = NULL;
1777 rbuf->buf = NULL; /* rbuf->buf is part of fmp's chain */
1778 } else if (rbuf->buf) {
1779 m_free(rbuf->buf);
1780 rbuf->buf = NULL;
1781 }
1782 ixgbe_dmamap_unload(rxr->ptag, rbuf->pmap);
1783
1784 rbuf->flags = 0;
1785
1786 return;
1787 }
1788
1789
1790 /*********************************************************************
1791 *
1792 * This routine executes in interrupt context. It replenishes
1793 * the mbufs in the descriptor and sends data which has been
1794 * dma'ed into host memory to upper layer.
1795 *
1796 * Return TRUE for more work, FALSE for all clean.
1797 *********************************************************************/
1798 bool
1799 ixgbe_rxeof(struct ix_queue *que)
1800 {
1801 struct adapter *adapter = que->adapter;
1802 struct rx_ring *rxr = que->rxr;
1803 struct ifnet *ifp = adapter->ifp;
1804 #ifdef LRO
1805 struct lro_ctrl *lro = &rxr->lro;
1806 #endif /* LRO */
1807 int i, nextp, processed = 0;
1808 u32 staterr = 0;
1809 u32 count = adapter->rx_process_limit;
1810 union ixgbe_adv_rx_desc *cur;
1811 struct ixgbe_rx_buf *rbuf, *nbuf;
1812 #ifdef RSS
1813 u16 pkt_info;
1814 #endif
1815
1816 IXGBE_RX_LOCK(rxr);
1817
1818 #ifdef DEV_NETMAP
1819 /* Same as the txeof routine: wakeup clients on intr. */
1820 if (netmap_rx_irq(ifp, rxr->me, &processed)) {
1821 IXGBE_RX_UNLOCK(rxr);
1822 return (FALSE);
1823 }
1824 #endif /* DEV_NETMAP */
1825
1826 for (i = rxr->next_to_check; count != 0;) {
1827 struct mbuf *sendmp, *mp;
1828 u32 rsc, ptype;
1829 u16 len;
1830 u16 vtag = 0;
1831 bool eop;
1832
1833 /* Sync the ring. */
1834 ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1835 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1836
1837 cur = &rxr->rx_base[i];
1838 staterr = le32toh(cur->wb.upper.status_error);
1839 #ifdef RSS
1840 pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
1841 #endif
1842
1843 if ((staterr & IXGBE_RXD_STAT_DD) == 0)
1844 break;
1845 if ((ifp->if_flags & IFF_RUNNING) == 0)
1846 break;
1847
1848 count--;
1849 sendmp = NULL;
1850 nbuf = NULL;
1851 rsc = 0;
1852 cur->wb.upper.status_error = 0;
1853 rbuf = &rxr->rx_buffers[i];
1854 mp = rbuf->buf;
1855
1856 len = le16toh(cur->wb.upper.length);
1857 ptype = le32toh(cur->wb.lower.lo_dword.data) &
1858 IXGBE_RXDADV_PKTTYPE_MASK;
1859 eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
1860
1861 /* Make sure bad packets are discarded */
1862 if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) {
1863 #if __FreeBSD_version >= 1100036
1864 if (IXGBE_IS_VF(adapter))
1865 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
1866 #endif
1867 rxr->rx_discarded.ev_count++;
1868 ixgbe_rx_discard(rxr, i);
1869 goto next_desc;
1870 }
1871
1872 /*
1873 ** On 82599 which supports a hardware
1874 ** LRO (called HW RSC), packets need
1875 ** not be fragmented across sequential
1876 ** descriptors, rather the next descriptor
1877 ** is indicated in bits of the descriptor.
1878 ** This also means that we might proceses
1879 ** more than one packet at a time, something
1880 ** that has never been true before, it
1881 ** required eliminating global chain pointers
1882 ** in favor of what we are doing here. -jfv
1883 */
1884 if (!eop) {
1885 /*
1886 ** Figure out the next descriptor
1887 ** of this frame.
1888 */
1889 if (rxr->hw_rsc == TRUE) {
1890 rsc = ixgbe_rsc_count(cur);
1891 rxr->rsc_num += (rsc - 1);
1892 }
1893 if (rsc) { /* Get hardware index */
1894 nextp = ((staterr &
1895 IXGBE_RXDADV_NEXTP_MASK) >>
1896 IXGBE_RXDADV_NEXTP_SHIFT);
1897 } else { /* Just sequential */
1898 nextp = i + 1;
1899 if (nextp == adapter->num_rx_desc)
1900 nextp = 0;
1901 }
1902 nbuf = &rxr->rx_buffers[nextp];
1903 prefetch(nbuf);
1904 }
1905 /*
1906 ** Rather than using the fmp/lmp global pointers
1907 ** we now keep the head of a packet chain in the
1908 ** buffer struct and pass this along from one
1909 ** descriptor to the next, until we get EOP.
1910 */
1911 mp->m_len = len;
1912 /*
1913 ** See if there is a stored head
1914 ** that determines what we are
1915 */
1916 sendmp = rbuf->fmp;
1917 if (sendmp != NULL) { /* secondary frag */
1918 rbuf->buf = rbuf->fmp = NULL;
1919 mp->m_flags &= ~M_PKTHDR;
1920 sendmp->m_pkthdr.len += mp->m_len;
1921 } else {
1922 /*
1923 * Optimize. This might be a small packet,
1924 * maybe just a TCP ACK. Do a fast copy that
1925 * is cache aligned into a new mbuf, and
1926 * leave the old mbuf+cluster for re-use.
1927 */
1928 if (eop && len <= IXGBE_RX_COPY_LEN) {
1929 sendmp = m_gethdr(M_NOWAIT, MT_DATA);
1930 if (sendmp != NULL) {
1931 sendmp->m_data +=
1932 IXGBE_RX_COPY_ALIGN;
1933 ixgbe_bcopy(mp->m_data,
1934 sendmp->m_data, len);
1935 sendmp->m_len = len;
1936 rxr->rx_copies.ev_count++;
1937 rbuf->flags |= IXGBE_RX_COPY;
1938 }
1939 }
1940 if (sendmp == NULL) {
1941 rbuf->buf = rbuf->fmp = NULL;
1942 sendmp = mp;
1943 }
1944
1945 /* first desc of a non-ps chain */
1946 sendmp->m_flags |= M_PKTHDR;
1947 sendmp->m_pkthdr.len = mp->m_len;
1948 }
1949 ++processed;
1950
1951 /* Pass the head pointer on */
1952 if (eop == 0) {
1953 nbuf->fmp = sendmp;
1954 sendmp = NULL;
1955 mp->m_next = nbuf->buf;
1956 } else { /* Sending this frame */
1957 m_set_rcvif(sendmp, ifp);
1958 ifp->if_ipackets++;
1959 rxr->rx_packets.ev_count++;
1960 /* capture data for AIM */
1961 rxr->bytes += sendmp->m_pkthdr.len;
1962 rxr->rx_bytes.ev_count += sendmp->m_pkthdr.len;
1963 /* Process vlan info */
1964 if ((rxr->vtag_strip) &&
1965 (staterr & IXGBE_RXD_STAT_VP))
1966 vtag = le16toh(cur->wb.upper.vlan);
1967 if (vtag) {
1968 VLAN_INPUT_TAG(ifp, sendmp, vtag,
1969 printf("%s: could not apply VLAN "
1970 "tag", __func__));
1971 }
1972 if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) {
1973 ixgbe_rx_checksum(staterr, sendmp, ptype,
1974 &adapter->stats.pf);
1975 }
1976
1977 #if 0 /* FreeBSD */
1978 /*
1979 * In case of multiqueue, we have RXCSUM.PCSD bit set
1980 * and never cleared. This means we have RSS hash
1981 * available to be used.
1982 */
1983 if (adapter->num_queues > 1) {
1984 sendmp->m_pkthdr.flowid =
1985 le32toh(cur->wb.lower.hi_dword.rss);
1986 switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) {
1987 case IXGBE_RXDADV_RSSTYPE_IPV4:
1988 M_HASHTYPE_SET(sendmp,
1989 M_HASHTYPE_RSS_IPV4);
1990 break;
1991 case IXGBE_RXDADV_RSSTYPE_IPV4_TCP:
1992 M_HASHTYPE_SET(sendmp,
1993 M_HASHTYPE_RSS_TCP_IPV4);
1994 break;
1995 case IXGBE_RXDADV_RSSTYPE_IPV6:
1996 M_HASHTYPE_SET(sendmp,
1997 M_HASHTYPE_RSS_IPV6);
1998 break;
1999 case IXGBE_RXDADV_RSSTYPE_IPV6_TCP:
2000 M_HASHTYPE_SET(sendmp,
2001 M_HASHTYPE_RSS_TCP_IPV6);
2002 break;
2003 case IXGBE_RXDADV_RSSTYPE_IPV6_EX:
2004 M_HASHTYPE_SET(sendmp,
2005 M_HASHTYPE_RSS_IPV6_EX);
2006 break;
2007 case IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX:
2008 M_HASHTYPE_SET(sendmp,
2009 M_HASHTYPE_RSS_TCP_IPV6_EX);
2010 break;
2011 #if __FreeBSD_version > 1100000
2012 case IXGBE_RXDADV_RSSTYPE_IPV4_UDP:
2013 M_HASHTYPE_SET(sendmp,
2014 M_HASHTYPE_RSS_UDP_IPV4);
2015 break;
2016 case IXGBE_RXDADV_RSSTYPE_IPV6_UDP:
2017 M_HASHTYPE_SET(sendmp,
2018 M_HASHTYPE_RSS_UDP_IPV6);
2019 break;
2020 case IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX:
2021 M_HASHTYPE_SET(sendmp,
2022 M_HASHTYPE_RSS_UDP_IPV6_EX);
2023 break;
2024 #endif
2025 default:
2026 M_HASHTYPE_SET(sendmp,
2027 M_HASHTYPE_OPAQUE_HASH);
2028 }
2029 } else {
2030 sendmp->m_pkthdr.flowid = que->msix;
2031 M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
2032 }
2033 #endif
2034 }
2035 next_desc:
2036 ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
2037 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2038
2039 /* Advance our pointers to the next descriptor. */
2040 if (++i == rxr->num_desc)
2041 i = 0;
2042
2043 /* Now send to the stack or do LRO */
2044 if (sendmp != NULL) {
2045 rxr->next_to_check = i;
2046 ixgbe_rx_input(rxr, ifp, sendmp, ptype);
2047 i = rxr->next_to_check;
2048 }
2049
2050 /* Every 8 descriptors we go to refresh mbufs */
2051 if (processed == 8) {
2052 ixgbe_refresh_mbufs(rxr, i);
2053 processed = 0;
2054 }
2055 }
2056
2057 /* Refresh any remaining buf structs */
2058 if (ixgbe_rx_unrefreshed(rxr))
2059 ixgbe_refresh_mbufs(rxr, i);
2060
2061 rxr->next_to_check = i;
2062
2063 #ifdef LRO
2064 /*
2065 * Flush any outstanding LRO work
2066 */
2067 tcp_lro_flush_all(lro);
2068 #endif /* LRO */
2069
2070 IXGBE_RX_UNLOCK(rxr);
2071
2072 /*
2073 ** Still have cleaning to do?
2074 */
2075 if ((staterr & IXGBE_RXD_STAT_DD) != 0)
2076 return true;
2077 else
2078 return false;
2079 }
2080
2081
2082 /*********************************************************************
2083 *
2084 * Verify that the hardware indicated that the checksum is valid.
2085 * Inform the stack about the status of checksum so that stack
2086 * doesn't spend time verifying the checksum.
2087 *
2088 *********************************************************************/
2089 static void
2090 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype,
2091 struct ixgbe_hw_stats *stats)
2092 {
2093 u16 status = (u16) staterr;
2094 u8 errors = (u8) (staterr >> 24);
2095 #if 0
2096 bool sctp = false;
2097
2098 if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
2099 (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
2100 sctp = true;
2101 #endif
2102
2103 /* IPv4 checksum */
2104 if (status & IXGBE_RXD_STAT_IPCS) {
2105 stats->ipcs.ev_count++;
2106 if (!(errors & IXGBE_RXD_ERR_IPE)) {
2107 /* IP Checksum Good */
2108 mp->m_pkthdr.csum_flags = M_CSUM_IPv4;
2109 } else {
2110 stats->ipcs_bad.ev_count++;
2111 mp->m_pkthdr.csum_flags = M_CSUM_IPv4|M_CSUM_IPv4_BAD;
2112 }
2113 }
2114 /* TCP/UDP/SCTP checksum */
2115 if (status & IXGBE_RXD_STAT_L4CS) {
2116 stats->l4cs.ev_count++;
2117 int type = M_CSUM_TCPv4|M_CSUM_TCPv6|M_CSUM_UDPv4|M_CSUM_UDPv6;
2118 if (!(errors & IXGBE_RXD_ERR_TCPE)) {
2119 mp->m_pkthdr.csum_flags |= type;
2120 } else {
2121 stats->l4cs_bad.ev_count++;
2122 mp->m_pkthdr.csum_flags |= type | M_CSUM_TCP_UDP_BAD;
2123 }
2124 }
2125 }
2126
2127
2128 /********************************************************************
2129 * Manage DMA'able memory.
2130 *******************************************************************/
2131
2132 int
2133 ixgbe_dma_malloc(struct adapter *adapter, const bus_size_t size,
2134 struct ixgbe_dma_alloc *dma, const int mapflags)
2135 {
2136 device_t dev = adapter->dev;
2137 int r, rsegs;
2138
2139 r = ixgbe_dma_tag_create(adapter->osdep.dmat, /* parent */
2140 DBA_ALIGN, 0, /* alignment, bounds */
2141 size, /* maxsize */
2142 1, /* nsegments */
2143 size, /* maxsegsize */
2144 BUS_DMA_ALLOCNOW, /* flags */
2145 &dma->dma_tag);
2146 if (r != 0) {
2147 aprint_error_dev(dev,
2148 "%s: ixgbe_dma_tag_create failed; error %d\n", __func__, r);
2149 goto fail_0;
2150 }
2151
2152 r = bus_dmamem_alloc(dma->dma_tag->dt_dmat,
2153 size,
2154 dma->dma_tag->dt_alignment,
2155 dma->dma_tag->dt_boundary,
2156 &dma->dma_seg, 1, &rsegs, BUS_DMA_NOWAIT);
2157 if (r != 0) {
2158 aprint_error_dev(dev,
2159 "%s: bus_dmamem_alloc failed; error %d\n", __func__, r);
2160 goto fail_1;
2161 }
2162
2163 r = bus_dmamem_map(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs,
2164 size, &dma->dma_vaddr, BUS_DMA_NOWAIT);
2165 if (r != 0) {
2166 aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
2167 __func__, r);
2168 goto fail_2;
2169 }
2170
2171 r = ixgbe_dmamap_create(dma->dma_tag, 0, &dma->dma_map);
2172 if (r != 0) {
2173 aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
2174 __func__, r);
2175 goto fail_3;
2176 }
2177
2178 r = bus_dmamap_load(dma->dma_tag->dt_dmat, dma->dma_map, dma->dma_vaddr,
2179 size,
2180 NULL,
2181 mapflags | BUS_DMA_NOWAIT);
2182 if (r != 0) {
2183 aprint_error_dev(dev, "%s: bus_dmamap_load failed; error %d\n",
2184 __func__, r);
2185 goto fail_4;
2186 }
2187 dma->dma_paddr = dma->dma_map->dm_segs[0].ds_addr;
2188 dma->dma_size = size;
2189 return 0;
2190 fail_4:
2191 ixgbe_dmamap_destroy(dma->dma_tag, dma->dma_map);
2192 fail_3:
2193 bus_dmamem_unmap(dma->dma_tag->dt_dmat, dma->dma_vaddr, size);
2194 fail_2:
2195 bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs);
2196 fail_1:
2197 ixgbe_dma_tag_destroy(dma->dma_tag);
2198 fail_0:
2199 return r;
2200 }
2201
2202 void
2203 ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma)
2204 {
2205 bus_dmamap_sync(dma->dma_tag->dt_dmat, dma->dma_map, 0, dma->dma_size,
2206 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2207 ixgbe_dmamap_unload(dma->dma_tag, dma->dma_map);
2208 bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, 1);
2209 ixgbe_dma_tag_destroy(dma->dma_tag);
2210 }
2211
2212
2213 /*********************************************************************
2214 *
2215 * Allocate memory for the transmit and receive rings, and then
2216 * the descriptors associated with each, called only once at attach.
2217 *
2218 **********************************************************************/
2219 int
2220 ixgbe_allocate_queues(struct adapter *adapter)
2221 {
2222 device_t dev = adapter->dev;
2223 struct ix_queue *que;
2224 struct tx_ring *txr;
2225 struct rx_ring *rxr;
2226 int rsize, tsize, error = IXGBE_SUCCESS;
2227 int txconf = 0, rxconf = 0;
2228 #ifdef PCI_IOV
2229 enum ixgbe_iov_mode iov_mode;
2230 #endif
2231
2232 /* First allocate the top level queue structs */
2233 if (!(adapter->queues =
2234 (struct ix_queue *) malloc(sizeof(struct ix_queue) *
2235 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2236 aprint_error_dev(dev, "Unable to allocate queue memory\n");
2237 error = ENOMEM;
2238 goto fail;
2239 }
2240
2241 /* First allocate the TX ring struct memory */
2242 if (!(adapter->tx_rings =
2243 (struct tx_ring *) malloc(sizeof(struct tx_ring) *
2244 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2245 aprint_error_dev(dev, "Unable to allocate TX ring memory\n");
2246 error = ENOMEM;
2247 goto tx_fail;
2248 }
2249
2250 /* Next allocate the RX */
2251 if (!(adapter->rx_rings =
2252 (struct rx_ring *) malloc(sizeof(struct rx_ring) *
2253 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2254 aprint_error_dev(dev, "Unable to allocate RX ring memory\n");
2255 error = ENOMEM;
2256 goto rx_fail;
2257 }
2258
2259 /* For the ring itself */
2260 tsize = roundup2(adapter->num_tx_desc *
2261 sizeof(union ixgbe_adv_tx_desc), DBA_ALIGN);
2262
2263 #ifdef PCI_IOV
2264 iov_mode = ixgbe_get_iov_mode(adapter);
2265 adapter->pool = ixgbe_max_vfs(iov_mode);
2266 #else
2267 adapter->pool = 0;
2268 #endif
2269 /*
2270 * Now set up the TX queues, txconf is needed to handle the
2271 * possibility that things fail midcourse and we need to
2272 * undo memory gracefully
2273 */
2274 for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2275 /* Set up some basics */
2276 txr = &adapter->tx_rings[i];
2277 txr->adapter = adapter;
2278 #ifdef PCI_IOV
2279 txr->me = ixgbe_pf_que_index(iov_mode, i);
2280 #else
2281 txr->me = i;
2282 #endif
2283 txr->num_desc = adapter->num_tx_desc;
2284
2285 /* Initialize the TX side lock */
2286 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
2287 device_xname(dev), txr->me);
2288 mutex_init(&txr->tx_mtx, MUTEX_DEFAULT, IPL_NET);
2289
2290 if (ixgbe_dma_malloc(adapter, tsize,
2291 &txr->txdma, BUS_DMA_NOWAIT)) {
2292 aprint_error_dev(dev,
2293 "Unable to allocate TX Descriptor memory\n");
2294 error = ENOMEM;
2295 goto err_tx_desc;
2296 }
2297 txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
2298 bzero((void *)txr->tx_base, tsize);
2299
2300 /* Now allocate transmit buffers for the ring */
2301 if (ixgbe_allocate_transmit_buffers(txr)) {
2302 aprint_error_dev(dev,
2303 "Critical Failure setting up transmit buffers\n");
2304 error = ENOMEM;
2305 goto err_tx_desc;
2306 }
2307 #ifndef IXGBE_LEGACY_TX
2308 /* Allocate a buf ring */
2309 txr->txr_interq = pcq_create(IXGBE_BR_SIZE, KM_SLEEP);
2310 if (txr->txr_interq == NULL) {
2311 aprint_error_dev(dev,
2312 "Critical Failure setting up buf ring\n");
2313 error = ENOMEM;
2314 goto err_tx_desc;
2315 }
2316 #endif
2317 }
2318
2319 /*
2320 * Next the RX queues...
2321 */
2322 rsize = roundup2(adapter->num_rx_desc *
2323 sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
2324 for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2325 rxr = &adapter->rx_rings[i];
2326 /* Set up some basics */
2327 rxr->adapter = adapter;
2328 #ifdef PCI_IOV
2329 rxr->me = ixgbe_pf_que_index(iov_mode, i);
2330 #else
2331 rxr->me = i;
2332 #endif
2333 rxr->num_desc = adapter->num_rx_desc;
2334
2335 /* Initialize the RX side lock */
2336 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
2337 device_xname(dev), rxr->me);
2338 mutex_init(&rxr->rx_mtx, MUTEX_DEFAULT, IPL_NET);
2339
2340 if (ixgbe_dma_malloc(adapter, rsize,
2341 &rxr->rxdma, BUS_DMA_NOWAIT)) {
2342 aprint_error_dev(dev,
2343 "Unable to allocate RxDescriptor memory\n");
2344 error = ENOMEM;
2345 goto err_rx_desc;
2346 }
2347 rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2348 bzero((void *)rxr->rx_base, rsize);
2349
2350 /* Allocate receive buffers for the ring*/
2351 if (ixgbe_allocate_receive_buffers(rxr)) {
2352 aprint_error_dev(dev,
2353 "Critical Failure setting up receive buffers\n");
2354 error = ENOMEM;
2355 goto err_rx_desc;
2356 }
2357 }
2358
2359 /*
2360 ** Finally set up the queue holding structs
2361 */
2362 for (int i = 0; i < adapter->num_queues; i++) {
2363 que = &adapter->queues[i];
2364 que->adapter = adapter;
2365 que->me = i;
2366 que->txr = &adapter->tx_rings[i];
2367 que->rxr = &adapter->rx_rings[i];
2368 }
2369
2370 return (0);
2371
2372 err_rx_desc:
2373 for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2374 ixgbe_dma_free(adapter, &rxr->rxdma);
2375 err_tx_desc:
2376 for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2377 ixgbe_dma_free(adapter, &txr->txdma);
2378 free(adapter->rx_rings, M_DEVBUF);
2379 rx_fail:
2380 free(adapter->tx_rings, M_DEVBUF);
2381 tx_fail:
2382 free(adapter->queues, M_DEVBUF);
2383 fail:
2384 return (error);
2385 }
2386