ix_txrx.c revision 1.19 1 /******************************************************************************
2
3 Copyright (c) 2001-2015, Intel Corporation
4 All rights reserved.
5
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
8
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
11
12 2. Redistributions in binary form must reproduce the above copyright
13 notice, this list of conditions and the following disclaimer in the
14 documentation and/or other materials provided with the distribution.
15
16 3. Neither the name of the Intel Corporation nor the names of its
17 contributors may be used to endorse or promote products derived from
18 this software without specific prior written permission.
19
20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*
34 * Copyright (c) 2011 The NetBSD Foundation, Inc.
35 * All rights reserved.
36 *
37 * This code is derived from software contributed to The NetBSD Foundation
38 * by Coyote Point Systems, Inc.
39 *
40 * Redistribution and use in source and binary forms, with or without
41 * modification, are permitted provided that the following conditions
42 * are met:
43 * 1. Redistributions of source code must retain the above copyright
44 * notice, this list of conditions and the following disclaimer.
45 * 2. Redistributions in binary form must reproduce the above copyright
46 * notice, this list of conditions and the following disclaimer in the
47 * documentation and/or other materials provided with the distribution.
48 *
49 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
50 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
51 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
52 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
53 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
54 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
55 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
56 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
57 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
58 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
59 * POSSIBILITY OF SUCH DAMAGE.
60 */
61 /*$FreeBSD: head/sys/dev/ixgbe/ix_txrx.c 301538 2016-06-07 04:51:50Z sephe $*/
62 /*$NetBSD: ix_txrx.c,v 1.19 2017/02/10 06:35:22 msaitoh Exp $*/
63
64 #include "opt_inet.h"
65 #include "opt_inet6.h"
66
67 #include "ixgbe.h"
68
69 #ifdef DEV_NETMAP
70 #include <net/netmap.h>
71 #include <sys/selinfo.h>
72 #include <dev/netmap/netmap_kern.h>
73
74 extern int ix_crcstrip;
75 #endif
76
77 /*
78 ** HW RSC control:
79 ** this feature only works with
80 ** IPv4, and only on 82599 and later.
81 ** Also this will cause IP forwarding to
82 ** fail and that can't be controlled by
83 ** the stack as LRO can. For all these
84 ** reasons I've deemed it best to leave
85 ** this off and not bother with a tuneable
86 ** interface, this would need to be compiled
87 ** to enable.
88 */
89 static bool ixgbe_rsc_enable = FALSE;
90
91 #ifdef IXGBE_FDIR
92 /*
93 ** For Flow Director: this is the
94 ** number of TX packets we sample
95 ** for the filter pool, this means
96 ** every 20th packet will be probed.
97 **
98 ** This feature can be disabled by
99 ** setting this to 0.
100 */
101 static int atr_sample_rate = 20;
102 #endif
103
104 /*********************************************************************
105 * Local Function prototypes
106 *********************************************************************/
107 static void ixgbe_setup_transmit_ring(struct tx_ring *);
108 static void ixgbe_free_transmit_buffers(struct tx_ring *);
109 static int ixgbe_setup_receive_ring(struct rx_ring *);
110 static void ixgbe_free_receive_buffers(struct rx_ring *);
111
112 static void ixgbe_rx_checksum(u32, struct mbuf *, u32,
113 struct ixgbe_hw_stats *);
114 static void ixgbe_refresh_mbufs(struct rx_ring *, int);
115 static int ixgbe_xmit(struct tx_ring *, struct mbuf *);
116 static int ixgbe_tx_ctx_setup(struct tx_ring *,
117 struct mbuf *, u32 *, u32 *);
118 static int ixgbe_tso_setup(struct tx_ring *,
119 struct mbuf *, u32 *, u32 *);
120 #ifdef IXGBE_FDIR
121 static void ixgbe_atr(struct tx_ring *, struct mbuf *);
122 #endif
123 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
124 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
125 struct mbuf *, u32);
126
127 static void ixgbe_setup_hw_rsc(struct rx_ring *);
128
129 /*********************************************************************
130 * Transmit entry point
131 *
132 * ixgbe_start is called by the stack to initiate a transmit.
133 * The driver will remain in this routine as long as there are
134 * packets to transmit and transmit resources are available.
135 * In case resources are not available stack is notified and
136 * the packet is requeued.
137 **********************************************************************/
138
139 void
140 ixgbe_start_locked(struct tx_ring *txr, struct ifnet * ifp)
141 {
142 int rc;
143 struct mbuf *m_head;
144 struct adapter *adapter = txr->adapter;
145
146 IXGBE_TX_LOCK_ASSERT(txr);
147
148 if ((ifp->if_flags & IFF_RUNNING) == 0)
149 return;
150 if (!adapter->link_active)
151 return;
152
153 while (!IFQ_IS_EMPTY(&ifp->if_snd)) {
154 if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
155 break;
156
157 IFQ_POLL(&ifp->if_snd, m_head);
158 if (m_head == NULL)
159 break;
160
161 if ((rc = ixgbe_xmit(txr, m_head)) == EAGAIN) {
162 break;
163 }
164 IFQ_DEQUEUE(&ifp->if_snd, m_head);
165 if (rc == EFBIG) {
166 struct mbuf *mtmp;
167
168 if ((mtmp = m_defrag(m_head, M_NOWAIT)) != NULL) {
169 m_head = mtmp;
170 rc = ixgbe_xmit(txr, m_head);
171 if (rc != 0)
172 adapter->efbig2_tx_dma_setup.ev_count++;
173 } else
174 adapter->mbuf_defrag_failed.ev_count++;
175 }
176 if (rc != 0) {
177 m_freem(m_head);
178 continue;
179 }
180
181 /* Send a copy of the frame to the BPF listener */
182 bpf_mtap(ifp, m_head);
183 }
184 return;
185 }
186
187 /*
188 * Legacy TX start - called by the stack, this
189 * always uses the first tx ring, and should
190 * not be used with multiqueue tx enabled.
191 */
192 void
193 ixgbe_start(struct ifnet *ifp)
194 {
195 struct adapter *adapter = ifp->if_softc;
196 struct tx_ring *txr = adapter->tx_rings;
197
198 if (ifp->if_flags & IFF_RUNNING) {
199 IXGBE_TX_LOCK(txr);
200 ixgbe_start_locked(txr, ifp);
201 IXGBE_TX_UNLOCK(txr);
202 }
203 return;
204 }
205
206 #ifndef IXGBE_LEGACY_TX
207
208 /*
209 ** Multiqueue Transmit Entry Point
210 ** (if_transmit function)
211 */
212 int
213 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
214 {
215 struct adapter *adapter = ifp->if_softc;
216 struct tx_ring *txr;
217 int i, err = 0;
218 #ifdef RSS
219 uint32_t bucket_id;
220 #endif
221
222 /*
223 * When doing RSS, map it to the same outbound queue
224 * as the incoming flow would be mapped to.
225 *
226 * If everything is setup correctly, it should be the
227 * same bucket that the current CPU we're on is.
228 */
229 #if 0
230 #if __FreeBSD_version < 1100054
231 if (m->m_flags & M_FLOWID) {
232 #else
233 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
234 #endif
235 #ifdef RSS
236 if (rss_hash2bucket(m->m_pkthdr.flowid,
237 M_HASHTYPE_GET(m), &bucket_id) == 0) {
238 /* TODO: spit out something if bucket_id > num_queues? */
239 i = bucket_id % adapter->num_queues;
240 #ifdef IXGBE_DEBUG
241 if (bucket_id > adapter->num_queues)
242 if_printf(ifp, "bucket_id (%d) > num_queues "
243 "(%d)\n", bucket_id, adapter->num_queues);
244 #endif
245 } else
246 #endif /* RSS */
247 i = m->m_pkthdr.flowid % adapter->num_queues;
248 } else
249 #endif
250 i = cpu_index(curcpu()) % adapter->num_queues;
251
252 /* Check for a hung queue and pick alternative */
253 if (((1 << i) & adapter->active_queues) == 0)
254 i = ffs64(adapter->active_queues);
255
256 txr = &adapter->tx_rings[i];
257
258 err = pcq_put(txr->txr_interq, m);
259 if (err == false) {
260 m_freem(m);
261 txr->pcq_drops.ev_count++;
262 return (err);
263 }
264 if (IXGBE_TX_TRYLOCK(txr)) {
265 ixgbe_mq_start_locked(ifp, txr);
266 IXGBE_TX_UNLOCK(txr);
267 } else
268 softint_schedule(txr->txr_si);
269
270 return (0);
271 }
272
273 int
274 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
275 {
276 struct adapter *adapter = txr->adapter;
277 struct mbuf *next;
278 int enqueued = 0, err = 0;
279
280 if (((ifp->if_flags & IFF_RUNNING) == 0) ||
281 adapter->link_active == 0)
282 return (ENETDOWN);
283
284 /* Process the queue */
285 while ((next = pcq_get(txr->txr_interq)) != NULL) {
286 if ((err = ixgbe_xmit(txr, next)) != 0) {
287 m_freem(next);
288 /* All errors are counted in ixgbe_xmit() */
289 break;
290 }
291 enqueued++;
292 #if 0 // this is VF-only
293 #if __FreeBSD_version >= 1100036
294 /*
295 * Since we're looking at the tx ring, we can check
296 * to see if we're a VF by examing our tail register
297 * address.
298 */
299 if (txr->tail < IXGBE_TDT(0) && next->m_flags & M_MCAST)
300 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
301 #endif
302 #endif /* 0 */
303 /* Send a copy of the frame to the BPF listener */
304 bpf_mtap(ifp, next);
305 if ((ifp->if_flags & IFF_RUNNING) == 0)
306 break;
307 }
308
309 if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD)
310 ixgbe_txeof(txr);
311
312 return (err);
313 }
314
315 /*
316 * Called from a taskqueue to drain queued transmit packets.
317 */
318 void
319 ixgbe_deferred_mq_start(void *arg)
320 {
321 struct tx_ring *txr = arg;
322 struct adapter *adapter = txr->adapter;
323 struct ifnet *ifp = adapter->ifp;
324
325 IXGBE_TX_LOCK(txr);
326 if (pcq_peek(txr->txr_interq) != NULL)
327 ixgbe_mq_start_locked(ifp, txr);
328 IXGBE_TX_UNLOCK(txr);
329 }
330
331 #endif /* IXGBE_LEGACY_TX */
332
333
334 /*********************************************************************
335 *
336 * This routine maps the mbufs to tx descriptors, allowing the
337 * TX engine to transmit the packets.
338 * - return 0 on success, positive on failure
339 *
340 **********************************************************************/
341
342 static int
343 ixgbe_xmit(struct tx_ring *txr, struct mbuf *m_head)
344 {
345 struct m_tag *mtag;
346 struct adapter *adapter = txr->adapter;
347 struct ethercom *ec = &adapter->osdep.ec;
348 u32 olinfo_status = 0, cmd_type_len;
349 int i, j, error;
350 int first;
351 bus_dmamap_t map;
352 struct ixgbe_tx_buf *txbuf;
353 union ixgbe_adv_tx_desc *txd = NULL;
354
355 /* Basic descriptor defines */
356 cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
357 IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
358
359 if ((mtag = VLAN_OUTPUT_TAG(ec, m_head)) != NULL)
360 cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
361
362 /*
363 * Important to capture the first descriptor
364 * used because it will contain the index of
365 * the one we tell the hardware to report back
366 */
367 first = txr->next_avail_desc;
368 txbuf = &txr->tx_buffers[first];
369 map = txbuf->map;
370
371 /*
372 * Map the packet for DMA.
373 */
374 error = bus_dmamap_load_mbuf(txr->txtag->dt_dmat, map,
375 m_head, BUS_DMA_NOWAIT);
376
377 if (__predict_false(error)) {
378
379 switch (error) {
380 case EAGAIN:
381 adapter->eagain_tx_dma_setup.ev_count++;
382 return EAGAIN;
383 case ENOMEM:
384 adapter->enomem_tx_dma_setup.ev_count++;
385 return EAGAIN;
386 case EFBIG:
387 /*
388 * XXX Try it again?
389 * do m_defrag() and retry bus_dmamap_load_mbuf().
390 */
391 adapter->efbig_tx_dma_setup.ev_count++;
392 return error;
393 case EINVAL:
394 adapter->einval_tx_dma_setup.ev_count++;
395 return error;
396 default:
397 adapter->other_tx_dma_setup.ev_count++;
398 return error;
399 }
400 }
401
402 /* Make certain there are enough descriptors */
403 if (txr->tx_avail < (map->dm_nsegs + 2)) {
404 txr->no_desc_avail.ev_count++;
405 ixgbe_dmamap_unload(txr->txtag, txbuf->map);
406 return EAGAIN;
407 }
408
409 /*
410 * Set up the appropriate offload context
411 * this will consume the first descriptor
412 */
413 error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
414 if (__predict_false(error)) {
415 return (error);
416 }
417
418 #ifdef IXGBE_FDIR
419 /* Do the flow director magic */
420 if ((txr->atr_sample) && (!adapter->fdir_reinit)) {
421 ++txr->atr_count;
422 if (txr->atr_count >= atr_sample_rate) {
423 ixgbe_atr(txr, m_head);
424 txr->atr_count = 0;
425 }
426 }
427 #endif
428
429 olinfo_status |= IXGBE_ADVTXD_CC;
430 i = txr->next_avail_desc;
431 for (j = 0; j < map->dm_nsegs; j++) {
432 bus_size_t seglen;
433 bus_addr_t segaddr;
434
435 txbuf = &txr->tx_buffers[i];
436 txd = &txr->tx_base[i];
437 seglen = map->dm_segs[j].ds_len;
438 segaddr = htole64(map->dm_segs[j].ds_addr);
439
440 txd->read.buffer_addr = segaddr;
441 txd->read.cmd_type_len = htole32(txr->txd_cmd |
442 cmd_type_len |seglen);
443 txd->read.olinfo_status = htole32(olinfo_status);
444
445 if (++i == txr->num_desc)
446 i = 0;
447 }
448
449 txd->read.cmd_type_len |=
450 htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
451 txr->tx_avail -= map->dm_nsegs;
452 txr->next_avail_desc = i;
453
454 txbuf->m_head = m_head;
455 /*
456 * Here we swap the map so the last descriptor,
457 * which gets the completion interrupt has the
458 * real map, and the first descriptor gets the
459 * unused map from this descriptor.
460 */
461 txr->tx_buffers[first].map = txbuf->map;
462 txbuf->map = map;
463 bus_dmamap_sync(txr->txtag->dt_dmat, map, 0, m_head->m_pkthdr.len,
464 BUS_DMASYNC_PREWRITE);
465
466 /* Set the EOP descriptor that will be marked done */
467 txbuf = &txr->tx_buffers[first];
468 txbuf->eop = txd;
469
470 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
471 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
472 /*
473 * Advance the Transmit Descriptor Tail (Tdt), this tells the
474 * hardware that this frame is available to transmit.
475 */
476 ++txr->total_packets.ev_count;
477 IXGBE_WRITE_REG(&adapter->hw, txr->tail, i);
478
479 /* Mark queue as having work */
480 if (txr->busy == 0)
481 txr->busy = 1;
482
483 return 0;
484 }
485
486
487 /*********************************************************************
488 *
489 * Allocate memory for tx_buffer structures. The tx_buffer stores all
490 * the information needed to transmit a packet on the wire. This is
491 * called only once at attach, setup is done every reset.
492 *
493 **********************************************************************/
494 int
495 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
496 {
497 struct adapter *adapter = txr->adapter;
498 device_t dev = adapter->dev;
499 struct ixgbe_tx_buf *txbuf;
500 int error, i;
501
502 /*
503 * Setup DMA descriptor areas.
504 */
505 if ((error = ixgbe_dma_tag_create(adapter->osdep.dmat, /* parent */
506 1, 0, /* alignment, bounds */
507 IXGBE_TSO_SIZE, /* maxsize */
508 adapter->num_segs, /* nsegments */
509 PAGE_SIZE, /* maxsegsize */
510 0, /* flags */
511 &txr->txtag))) {
512 aprint_error_dev(dev,"Unable to allocate TX DMA tag\n");
513 goto fail;
514 }
515
516 if (!(txr->tx_buffers =
517 (struct ixgbe_tx_buf *) malloc(sizeof(struct ixgbe_tx_buf) *
518 adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
519 aprint_error_dev(dev, "Unable to allocate tx_buffer memory\n");
520 error = ENOMEM;
521 goto fail;
522 }
523
524 /* Create the descriptor buffer dma maps */
525 txbuf = txr->tx_buffers;
526 for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
527 error = ixgbe_dmamap_create(txr->txtag, 0, &txbuf->map);
528 if (error != 0) {
529 aprint_error_dev(dev,
530 "Unable to create TX DMA map (%d)\n", error);
531 goto fail;
532 }
533 }
534
535 return 0;
536 fail:
537 /* We free all, it handles case where we are in the middle */
538 #if 0 /* XXX was FreeBSD */
539 ixgbe_free_transmit_structures(adapter);
540 #else
541 ixgbe_free_transmit_buffers(txr);
542 #endif
543 return (error);
544 }
545
546 /*********************************************************************
547 *
548 * Initialize a transmit ring.
549 *
550 **********************************************************************/
551 static void
552 ixgbe_setup_transmit_ring(struct tx_ring *txr)
553 {
554 struct adapter *adapter = txr->adapter;
555 struct ixgbe_tx_buf *txbuf;
556 #ifdef DEV_NETMAP
557 struct netmap_adapter *na = NA(adapter->ifp);
558 struct netmap_slot *slot;
559 #endif /* DEV_NETMAP */
560
561 /* Clear the old ring contents */
562 IXGBE_TX_LOCK(txr);
563 #ifdef DEV_NETMAP
564 /*
565 * (under lock): if in netmap mode, do some consistency
566 * checks and set slot to entry 0 of the netmap ring.
567 */
568 slot = netmap_reset(na, NR_TX, txr->me, 0);
569 #endif /* DEV_NETMAP */
570 bzero((void *)txr->tx_base,
571 (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
572 /* Reset indices */
573 txr->next_avail_desc = 0;
574 txr->next_to_clean = 0;
575
576 /* Free any existing tx buffers. */
577 txbuf = txr->tx_buffers;
578 for (int i = 0; i < txr->num_desc; i++, txbuf++) {
579 if (txbuf->m_head != NULL) {
580 bus_dmamap_sync(txr->txtag->dt_dmat, txbuf->map,
581 0, txbuf->m_head->m_pkthdr.len,
582 BUS_DMASYNC_POSTWRITE);
583 ixgbe_dmamap_unload(txr->txtag, txbuf->map);
584 m_freem(txbuf->m_head);
585 txbuf->m_head = NULL;
586 }
587 #ifdef DEV_NETMAP
588 /*
589 * In netmap mode, set the map for the packet buffer.
590 * NOTE: Some drivers (not this one) also need to set
591 * the physical buffer address in the NIC ring.
592 * Slots in the netmap ring (indexed by "si") are
593 * kring->nkr_hwofs positions "ahead" wrt the
594 * corresponding slot in the NIC ring. In some drivers
595 * (not here) nkr_hwofs can be negative. Function
596 * netmap_idx_n2k() handles wraparounds properly.
597 */
598 if (slot) {
599 int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
600 netmap_load_map(na, txr->txtag,
601 txbuf->map, NMB(na, slot + si));
602 }
603 #endif /* DEV_NETMAP */
604 /* Clear the EOP descriptor pointer */
605 txbuf->eop = NULL;
606 }
607
608 #ifdef IXGBE_FDIR
609 /* Set the rate at which we sample packets */
610 if (adapter->hw.mac.type != ixgbe_mac_82598EB)
611 txr->atr_sample = atr_sample_rate;
612 #endif
613
614 /* Set number of descriptors available */
615 txr->tx_avail = adapter->num_tx_desc;
616
617 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
618 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
619 IXGBE_TX_UNLOCK(txr);
620 }
621
622 /*********************************************************************
623 *
624 * Initialize all transmit rings.
625 *
626 **********************************************************************/
627 int
628 ixgbe_setup_transmit_structures(struct adapter *adapter)
629 {
630 struct tx_ring *txr = adapter->tx_rings;
631
632 for (int i = 0; i < adapter->num_queues; i++, txr++)
633 ixgbe_setup_transmit_ring(txr);
634
635 return (0);
636 }
637
638 /*********************************************************************
639 *
640 * Free all transmit rings.
641 *
642 **********************************************************************/
643 void
644 ixgbe_free_transmit_structures(struct adapter *adapter)
645 {
646 struct tx_ring *txr = adapter->tx_rings;
647
648 for (int i = 0; i < adapter->num_queues; i++, txr++) {
649 ixgbe_free_transmit_buffers(txr);
650 ixgbe_dma_free(adapter, &txr->txdma);
651 IXGBE_TX_LOCK_DESTROY(txr);
652 }
653 free(adapter->tx_rings, M_DEVBUF);
654 }
655
656 /*********************************************************************
657 *
658 * Free transmit ring related data structures.
659 *
660 **********************************************************************/
661 static void
662 ixgbe_free_transmit_buffers(struct tx_ring *txr)
663 {
664 struct adapter *adapter = txr->adapter;
665 struct ixgbe_tx_buf *tx_buffer;
666 int i;
667
668 INIT_DEBUGOUT("ixgbe_free_transmit_buffers: begin");
669
670 if (txr->tx_buffers == NULL)
671 return;
672
673 tx_buffer = txr->tx_buffers;
674 for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
675 if (tx_buffer->m_head != NULL) {
676 bus_dmamap_sync(txr->txtag->dt_dmat, tx_buffer->map,
677 0, tx_buffer->m_head->m_pkthdr.len,
678 BUS_DMASYNC_POSTWRITE);
679 ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
680 m_freem(tx_buffer->m_head);
681 tx_buffer->m_head = NULL;
682 if (tx_buffer->map != NULL) {
683 ixgbe_dmamap_destroy(txr->txtag,
684 tx_buffer->map);
685 tx_buffer->map = NULL;
686 }
687 } else if (tx_buffer->map != NULL) {
688 ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
689 ixgbe_dmamap_destroy(txr->txtag, tx_buffer->map);
690 tx_buffer->map = NULL;
691 }
692 }
693 #ifndef IXGBE_LEGACY_TX
694 if (txr->txr_interq != NULL) {
695 struct mbuf *m;
696
697 while ((m = pcq_get(txr->txr_interq)) != NULL)
698 m_freem(m);
699 pcq_destroy(txr->txr_interq);
700 }
701 #endif
702 if (txr->tx_buffers != NULL) {
703 free(txr->tx_buffers, M_DEVBUF);
704 txr->tx_buffers = NULL;
705 }
706 if (txr->txtag != NULL) {
707 ixgbe_dma_tag_destroy(txr->txtag);
708 txr->txtag = NULL;
709 }
710 return;
711 }
712
713 /*********************************************************************
714 *
715 * Advanced Context Descriptor setup for VLAN, CSUM or TSO
716 *
717 **********************************************************************/
718
719 static int
720 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
721 u32 *cmd_type_len, u32 *olinfo_status)
722 {
723 struct adapter *adapter = txr->adapter;
724 struct ethercom *ec = &adapter->osdep.ec;
725 struct m_tag *mtag;
726 struct ixgbe_adv_tx_context_desc *TXD;
727 struct ether_vlan_header *eh;
728 #ifdef INET
729 struct ip *ip;
730 #endif
731 #ifdef INET6
732 struct ip6_hdr *ip6;
733 #endif
734 u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
735 int ehdrlen, ip_hlen = 0;
736 u16 etype;
737 u8 ipproto = 0;
738 int offload = TRUE;
739 int ctxd = txr->next_avail_desc;
740 u16 vtag = 0;
741 char *l3d;
742
743
744 /* First check if TSO is to be used */
745 if (mp->m_pkthdr.csum_flags & (M_CSUM_TSOv4|M_CSUM_TSOv6)) {
746 int rv = ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status);
747
748 if (rv != 0) {
749 ++adapter->tso_err.ev_count;
750 return rv;
751 }
752 }
753
754 if ((mp->m_pkthdr.csum_flags & M_CSUM_OFFLOAD) == 0)
755 offload = FALSE;
756
757 /* Indicate the whole packet as payload when not doing TSO */
758 *olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
759
760 /* Now ready a context descriptor */
761 TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
762
763 /*
764 ** In advanced descriptors the vlan tag must
765 ** be placed into the context descriptor. Hence
766 ** we need to make one even if not doing offloads.
767 */
768 if ((mtag = VLAN_OUTPUT_TAG(ec, mp)) != NULL) {
769 vtag = htole16(VLAN_TAG_VALUE(mtag) & 0xffff);
770 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
771 } else if (!IXGBE_IS_X550VF(adapter) && (offload == FALSE))
772 return (0);
773
774 /*
775 * Determine where frame payload starts.
776 * Jump over vlan headers if already present,
777 * helpful for QinQ too.
778 */
779 KASSERT(mp->m_len >= offsetof(struct ether_vlan_header, evl_tag));
780 eh = mtod(mp, struct ether_vlan_header *);
781 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
782 KASSERT(mp->m_len >= sizeof(struct ether_vlan_header));
783 etype = ntohs(eh->evl_proto);
784 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
785 } else {
786 etype = ntohs(eh->evl_encap_proto);
787 ehdrlen = ETHER_HDR_LEN;
788 }
789
790 /* Set the ether header length */
791 vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
792
793 if (offload == FALSE)
794 goto no_offloads;
795
796 /*
797 * If the first mbuf only includes the ethernet header, jump to the next one
798 * XXX: This assumes the stack splits mbufs containing headers on header boundaries
799 * XXX: And assumes the entire IP header is contained in one mbuf
800 */
801 if (mp->m_len == ehdrlen && mp->m_next)
802 l3d = mtod(mp->m_next, char *);
803 else
804 l3d = mtod(mp, char *) + ehdrlen;
805
806 switch (etype) {
807 #ifdef INET
808 case ETHERTYPE_IP:
809 ip = (struct ip *)(l3d);
810 ip_hlen = ip->ip_hl << 2;
811 ipproto = ip->ip_p;
812 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
813 KASSERT((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) == 0 ||
814 ip->ip_sum == 0);
815 break;
816 #endif
817 #ifdef INET6
818 case ETHERTYPE_IPV6:
819 ip6 = (struct ip6_hdr *)(l3d);
820 ip_hlen = sizeof(struct ip6_hdr);
821 ipproto = ip6->ip6_nxt;
822 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
823 break;
824 #endif
825 default:
826 offload = false;
827 break;
828 }
829
830 if ((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) != 0)
831 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
832
833 vlan_macip_lens |= ip_hlen;
834
835 /* No support for offloads for non-L4 next headers */
836 switch (ipproto) {
837 case IPPROTO_TCP:
838 if (mp->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_TCPv6))
839
840 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
841 else
842 offload = false;
843 break;
844 case IPPROTO_UDP:
845 if (mp->m_pkthdr.csum_flags & (M_CSUM_UDPv4|M_CSUM_UDPv6))
846 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
847 else
848 offload = false;
849 break;
850 default:
851 offload = false;
852 break;
853 }
854
855 if (offload) /* Insert L4 checksum into data descriptors */
856 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
857
858 no_offloads:
859 type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
860
861 /* Now copy bits into descriptor */
862 TXD->vlan_macip_lens = htole32(vlan_macip_lens);
863 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
864 TXD->seqnum_seed = htole32(0);
865 TXD->mss_l4len_idx = htole32(0);
866
867 /* We've consumed the first desc, adjust counters */
868 if (++ctxd == txr->num_desc)
869 ctxd = 0;
870 txr->next_avail_desc = ctxd;
871 --txr->tx_avail;
872
873 return 0;
874 }
875
876 /**********************************************************************
877 *
878 * Setup work for hardware segmentation offload (TSO) on
879 * adapters using advanced tx descriptors
880 *
881 **********************************************************************/
882 static int
883 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp,
884 u32 *cmd_type_len, u32 *olinfo_status)
885 {
886 struct m_tag *mtag;
887 struct adapter *adapter = txr->adapter;
888 struct ethercom *ec = &adapter->osdep.ec;
889 struct ixgbe_adv_tx_context_desc *TXD;
890 u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
891 u32 mss_l4len_idx = 0, paylen;
892 u16 vtag = 0, eh_type;
893 int ctxd, ehdrlen, ip_hlen, tcp_hlen;
894 struct ether_vlan_header *eh;
895 #ifdef INET6
896 struct ip6_hdr *ip6;
897 #endif
898 #ifdef INET
899 struct ip *ip;
900 #endif
901 struct tcphdr *th;
902
903 /*
904 * Determine where frame payload starts.
905 * Jump over vlan headers if already present
906 */
907 eh = mtod(mp, struct ether_vlan_header *);
908 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
909 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
910 eh_type = eh->evl_proto;
911 } else {
912 ehdrlen = ETHER_HDR_LEN;
913 eh_type = eh->evl_encap_proto;
914 }
915
916 switch (ntohs(eh_type)) {
917 #ifdef INET6
918 case ETHERTYPE_IPV6:
919 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
920 /* XXX-BZ For now we do not pretend to support ext. hdrs. */
921 if (ip6->ip6_nxt != IPPROTO_TCP)
922 return (ENXIO);
923 ip_hlen = sizeof(struct ip6_hdr);
924 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
925 th = (struct tcphdr *)((char *)ip6 + ip_hlen);
926 th->th_sum = in6_cksum_phdr(&ip6->ip6_src,
927 &ip6->ip6_dst, 0, htonl(IPPROTO_TCP));
928 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
929 break;
930 #endif
931 #ifdef INET
932 case ETHERTYPE_IP:
933 ip = (struct ip *)(mp->m_data + ehdrlen);
934 if (ip->ip_p != IPPROTO_TCP)
935 return (ENXIO);
936 ip->ip_sum = 0;
937 ip_hlen = ip->ip_hl << 2;
938 th = (struct tcphdr *)((char *)ip + ip_hlen);
939 th->th_sum = in_cksum_phdr(ip->ip_src.s_addr,
940 ip->ip_dst.s_addr, htons(IPPROTO_TCP));
941 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
942 /* Tell transmit desc to also do IPv4 checksum. */
943 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
944 break;
945 #endif
946 default:
947 panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
948 __func__, ntohs(eh_type));
949 break;
950 }
951
952 ctxd = txr->next_avail_desc;
953 TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
954
955 tcp_hlen = th->th_off << 2;
956
957 /* This is used in the transmit desc in encap */
958 paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
959
960 /* VLAN MACLEN IPLEN */
961 if ((mtag = VLAN_OUTPUT_TAG(ec, mp)) != NULL) {
962 vtag = htole16(VLAN_TAG_VALUE(mtag) & 0xffff);
963 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
964 }
965
966 vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
967 vlan_macip_lens |= ip_hlen;
968 TXD->vlan_macip_lens = htole32(vlan_macip_lens);
969
970 /* ADV DTYPE TUCMD */
971 type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
972 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
973 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
974
975 /* MSS L4LEN IDX */
976 mss_l4len_idx |= (mp->m_pkthdr.segsz << IXGBE_ADVTXD_MSS_SHIFT);
977 mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
978 TXD->mss_l4len_idx = htole32(mss_l4len_idx);
979
980 TXD->seqnum_seed = htole32(0);
981
982 if (++ctxd == txr->num_desc)
983 ctxd = 0;
984
985 txr->tx_avail--;
986 txr->next_avail_desc = ctxd;
987 *cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
988 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
989 *olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
990 ++txr->tso_tx.ev_count;
991 return (0);
992 }
993
994
995 /**********************************************************************
996 *
997 * Examine each tx_buffer in the used queue. If the hardware is done
998 * processing the packet then free associated resources. The
999 * tx_buffer is put back on the free queue.
1000 *
1001 **********************************************************************/
1002 void
1003 ixgbe_txeof(struct tx_ring *txr)
1004 {
1005 struct adapter *adapter = txr->adapter;
1006 struct ifnet *ifp = adapter->ifp;
1007 u32 work, processed = 0;
1008 u32 limit = adapter->tx_process_limit;
1009 struct ixgbe_tx_buf *buf;
1010 union ixgbe_adv_tx_desc *txd;
1011
1012 KASSERT(mutex_owned(&txr->tx_mtx));
1013
1014 #ifdef DEV_NETMAP
1015 if (ifp->if_capenable & IFCAP_NETMAP) {
1016 struct netmap_adapter *na = NA(ifp);
1017 struct netmap_kring *kring = &na->tx_rings[txr->me];
1018 txd = txr->tx_base;
1019 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1020 BUS_DMASYNC_POSTREAD);
1021 /*
1022 * In netmap mode, all the work is done in the context
1023 * of the client thread. Interrupt handlers only wake up
1024 * clients, which may be sleeping on individual rings
1025 * or on a global resource for all rings.
1026 * To implement tx interrupt mitigation, we wake up the client
1027 * thread roughly every half ring, even if the NIC interrupts
1028 * more frequently. This is implemented as follows:
1029 * - ixgbe_txsync() sets kring->nr_kflags with the index of
1030 * the slot that should wake up the thread (nkr_num_slots
1031 * means the user thread should not be woken up);
1032 * - the driver ignores tx interrupts unless netmap_mitigate=0
1033 * or the slot has the DD bit set.
1034 */
1035 if (!netmap_mitigate ||
1036 (kring->nr_kflags < kring->nkr_num_slots &&
1037 txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD)) {
1038 netmap_tx_irq(ifp, txr->me);
1039 }
1040 return;
1041 }
1042 #endif /* DEV_NETMAP */
1043
1044 if (txr->tx_avail == txr->num_desc) {
1045 txr->busy = 0;
1046 return;
1047 }
1048
1049 /* Get work starting point */
1050 work = txr->next_to_clean;
1051 buf = &txr->tx_buffers[work];
1052 txd = &txr->tx_base[work];
1053 work -= txr->num_desc; /* The distance to ring end */
1054 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1055 BUS_DMASYNC_POSTREAD);
1056
1057 do {
1058 union ixgbe_adv_tx_desc *eop = buf->eop;
1059 if (eop == NULL) /* No work */
1060 break;
1061
1062 if ((eop->wb.status & IXGBE_TXD_STAT_DD) == 0)
1063 break; /* I/O not complete */
1064
1065 if (buf->m_head) {
1066 txr->bytes +=
1067 buf->m_head->m_pkthdr.len;
1068 bus_dmamap_sync(txr->txtag->dt_dmat,
1069 buf->map,
1070 0, buf->m_head->m_pkthdr.len,
1071 BUS_DMASYNC_POSTWRITE);
1072 ixgbe_dmamap_unload(txr->txtag,
1073 buf->map);
1074 m_freem(buf->m_head);
1075 buf->m_head = NULL;
1076 }
1077 buf->eop = NULL;
1078 ++txr->tx_avail;
1079
1080 /* We clean the range if multi segment */
1081 while (txd != eop) {
1082 ++txd;
1083 ++buf;
1084 ++work;
1085 /* wrap the ring? */
1086 if (__predict_false(!work)) {
1087 work -= txr->num_desc;
1088 buf = txr->tx_buffers;
1089 txd = txr->tx_base;
1090 }
1091 if (buf->m_head) {
1092 txr->bytes +=
1093 buf->m_head->m_pkthdr.len;
1094 bus_dmamap_sync(txr->txtag->dt_dmat,
1095 buf->map,
1096 0, buf->m_head->m_pkthdr.len,
1097 BUS_DMASYNC_POSTWRITE);
1098 ixgbe_dmamap_unload(txr->txtag,
1099 buf->map);
1100 m_freem(buf->m_head);
1101 buf->m_head = NULL;
1102 }
1103 ++txr->tx_avail;
1104 buf->eop = NULL;
1105
1106 }
1107 ++txr->packets;
1108 ++processed;
1109 ++ifp->if_opackets;
1110
1111 /* Try the next packet */
1112 ++txd;
1113 ++buf;
1114 ++work;
1115 /* reset with a wrap */
1116 if (__predict_false(!work)) {
1117 work -= txr->num_desc;
1118 buf = txr->tx_buffers;
1119 txd = txr->tx_base;
1120 }
1121 prefetch(txd);
1122 } while (__predict_true(--limit));
1123
1124 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1125 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1126
1127 work += txr->num_desc;
1128 txr->next_to_clean = work;
1129
1130 /*
1131 ** Queue Hang detection, we know there's
1132 ** work outstanding or the first return
1133 ** would have been taken, so increment busy
1134 ** if nothing managed to get cleaned, then
1135 ** in local_timer it will be checked and
1136 ** marked as HUNG if it exceeds a MAX attempt.
1137 */
1138 if ((processed == 0) && (txr->busy != IXGBE_QUEUE_HUNG))
1139 ++txr->busy;
1140 /*
1141 ** If anything gets cleaned we reset state to 1,
1142 ** note this will turn off HUNG if its set.
1143 */
1144 if (processed)
1145 txr->busy = 1;
1146
1147 if (txr->tx_avail == txr->num_desc)
1148 txr->busy = 0;
1149
1150 return;
1151 }
1152
1153
1154 #ifdef IXGBE_FDIR
1155 /*
1156 ** This routine parses packet headers so that Flow
1157 ** Director can make a hashed filter table entry
1158 ** allowing traffic flows to be identified and kept
1159 ** on the same cpu. This would be a performance
1160 ** hit, but we only do it at IXGBE_FDIR_RATE of
1161 ** packets.
1162 */
1163 static void
1164 ixgbe_atr(struct tx_ring *txr, struct mbuf *mp)
1165 {
1166 struct adapter *adapter = txr->adapter;
1167 struct ix_queue *que;
1168 struct ip *ip;
1169 struct tcphdr *th;
1170 struct udphdr *uh;
1171 struct ether_vlan_header *eh;
1172 union ixgbe_atr_hash_dword input = {.dword = 0};
1173 union ixgbe_atr_hash_dword common = {.dword = 0};
1174 int ehdrlen, ip_hlen;
1175 u16 etype;
1176
1177 eh = mtod(mp, struct ether_vlan_header *);
1178 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
1179 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
1180 etype = eh->evl_proto;
1181 } else {
1182 ehdrlen = ETHER_HDR_LEN;
1183 etype = eh->evl_encap_proto;
1184 }
1185
1186 /* Only handling IPv4 */
1187 if (etype != htons(ETHERTYPE_IP))
1188 return;
1189
1190 ip = (struct ip *)(mp->m_data + ehdrlen);
1191 ip_hlen = ip->ip_hl << 2;
1192
1193 /* check if we're UDP or TCP */
1194 switch (ip->ip_p) {
1195 case IPPROTO_TCP:
1196 th = (struct tcphdr *)((char *)ip + ip_hlen);
1197 /* src and dst are inverted */
1198 common.port.dst ^= th->th_sport;
1199 common.port.src ^= th->th_dport;
1200 input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_TCPV4;
1201 break;
1202 case IPPROTO_UDP:
1203 uh = (struct udphdr *)((char *)ip + ip_hlen);
1204 /* src and dst are inverted */
1205 common.port.dst ^= uh->uh_sport;
1206 common.port.src ^= uh->uh_dport;
1207 input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_UDPV4;
1208 break;
1209 default:
1210 return;
1211 }
1212
1213 input.formatted.vlan_id = htobe16(mp->m_pkthdr.ether_vtag);
1214 if (mp->m_pkthdr.ether_vtag)
1215 common.flex_bytes ^= htons(ETHERTYPE_VLAN);
1216 else
1217 common.flex_bytes ^= etype;
1218 common.ip ^= ip->ip_src.s_addr ^ ip->ip_dst.s_addr;
1219
1220 que = &adapter->queues[txr->me];
1221 /*
1222 ** This assumes the Rx queue and Tx
1223 ** queue are bound to the same CPU
1224 */
1225 ixgbe_fdir_add_signature_filter_82599(&adapter->hw,
1226 input, common, que->msix);
1227 }
1228 #endif /* IXGBE_FDIR */
1229
1230 /*
1231 ** Used to detect a descriptor that has
1232 ** been merged by Hardware RSC.
1233 */
1234 static inline u32
1235 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1236 {
1237 return (le32toh(rx->wb.lower.lo_dword.data) &
1238 IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1239 }
1240
1241 /*********************************************************************
1242 *
1243 * Initialize Hardware RSC (LRO) feature on 82599
1244 * for an RX ring, this is toggled by the LRO capability
1245 * even though it is transparent to the stack.
1246 *
1247 * NOTE: since this HW feature only works with IPV4 and
1248 * our testing has shown soft LRO to be as effective
1249 * I have decided to disable this by default.
1250 *
1251 **********************************************************************/
1252 static void
1253 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
1254 {
1255 struct adapter *adapter = rxr->adapter;
1256 struct ixgbe_hw *hw = &adapter->hw;
1257 u32 rscctrl, rdrxctl;
1258
1259 /* If turning LRO/RSC off we need to disable it */
1260 if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) {
1261 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1262 rscctrl &= ~IXGBE_RSCCTL_RSCEN;
1263 return;
1264 }
1265
1266 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
1267 rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
1268 #ifdef DEV_NETMAP /* crcstrip is optional in netmap */
1269 if (adapter->ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip)
1270 #endif /* DEV_NETMAP */
1271 rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
1272 rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
1273 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
1274
1275 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1276 rscctrl |= IXGBE_RSCCTL_RSCEN;
1277 /*
1278 ** Limit the total number of descriptors that
1279 ** can be combined, so it does not exceed 64K
1280 */
1281 if (rxr->mbuf_sz == MCLBYTES)
1282 rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
1283 else if (rxr->mbuf_sz == MJUMPAGESIZE)
1284 rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
1285 else if (rxr->mbuf_sz == MJUM9BYTES)
1286 rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
1287 else /* Using 16K cluster */
1288 rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
1289
1290 IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
1291
1292 /* Enable TCP header recognition */
1293 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
1294 (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) |
1295 IXGBE_PSRTYPE_TCPHDR));
1296
1297 /* Disable RSC for ACK packets */
1298 IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
1299 (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
1300
1301 rxr->hw_rsc = TRUE;
1302 }
1303
1304 /*********************************************************************
1305 *
1306 * Refresh mbuf buffers for RX descriptor rings
1307 * - now keeps its own state so discards due to resource
1308 * exhaustion are unnecessary, if an mbuf cannot be obtained
1309 * it just returns, keeping its placeholder, thus it can simply
1310 * be recalled to try again.
1311 *
1312 **********************************************************************/
1313 static void
1314 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
1315 {
1316 struct adapter *adapter = rxr->adapter;
1317 struct ixgbe_rx_buf *rxbuf;
1318 struct mbuf *mp;
1319 int i, j, error;
1320 bool refreshed = false;
1321
1322 i = j = rxr->next_to_refresh;
1323 /* Control the loop with one beyond */
1324 if (++j == rxr->num_desc)
1325 j = 0;
1326
1327 while (j != limit) {
1328 rxbuf = &rxr->rx_buffers[i];
1329 if (rxbuf->buf == NULL) {
1330 mp = ixgbe_getjcl(&adapter->jcl_head, M_NOWAIT,
1331 MT_DATA, M_PKTHDR, rxr->mbuf_sz);
1332 if (mp == NULL) {
1333 rxr->no_jmbuf.ev_count++;
1334 goto update;
1335 }
1336 if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
1337 m_adj(mp, ETHER_ALIGN);
1338 } else
1339 mp = rxbuf->buf;
1340
1341 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1342
1343 /* If we're dealing with an mbuf that was copied rather
1344 * than replaced, there's no need to go through busdma.
1345 */
1346 if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
1347 /* Get the memory mapping */
1348 ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
1349 error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat,
1350 rxbuf->pmap, mp, BUS_DMA_NOWAIT);
1351 if (error != 0) {
1352 printf("Refresh mbufs: payload dmamap load"
1353 " failure - %d\n", error);
1354 m_free(mp);
1355 rxbuf->buf = NULL;
1356 goto update;
1357 }
1358 rxbuf->buf = mp;
1359 bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
1360 0, mp->m_pkthdr.len, BUS_DMASYNC_PREREAD);
1361 rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
1362 htole64(rxbuf->pmap->dm_segs[0].ds_addr);
1363 } else {
1364 rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
1365 rxbuf->flags &= ~IXGBE_RX_COPY;
1366 }
1367
1368 refreshed = true;
1369 /* Next is precalculated */
1370 i = j;
1371 rxr->next_to_refresh = i;
1372 if (++j == rxr->num_desc)
1373 j = 0;
1374 }
1375 update:
1376 if (refreshed) /* Update hardware tail index */
1377 IXGBE_WRITE_REG(&adapter->hw,
1378 rxr->tail, rxr->next_to_refresh);
1379 return;
1380 }
1381
1382 /*********************************************************************
1383 *
1384 * Allocate memory for rx_buffer structures. Since we use one
1385 * rx_buffer per received packet, the maximum number of rx_buffer's
1386 * that we'll need is equal to the number of receive descriptors
1387 * that we've allocated.
1388 *
1389 **********************************************************************/
1390 int
1391 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
1392 {
1393 struct adapter *adapter = rxr->adapter;
1394 device_t dev = adapter->dev;
1395 struct ixgbe_rx_buf *rxbuf;
1396 int bsize, error;
1397
1398 bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
1399 if (!(rxr->rx_buffers =
1400 (struct ixgbe_rx_buf *) malloc(bsize,
1401 M_DEVBUF, M_NOWAIT | M_ZERO))) {
1402 aprint_error_dev(dev, "Unable to allocate rx_buffer memory\n");
1403 error = ENOMEM;
1404 goto fail;
1405 }
1406
1407 if ((error = ixgbe_dma_tag_create(adapter->osdep.dmat, /* parent */
1408 1, 0, /* alignment, bounds */
1409 MJUM16BYTES, /* maxsize */
1410 1, /* nsegments */
1411 MJUM16BYTES, /* maxsegsize */
1412 0, /* flags */
1413 &rxr->ptag))) {
1414 aprint_error_dev(dev, "Unable to create RX DMA tag\n");
1415 goto fail;
1416 }
1417
1418 for (int i = 0; i < rxr->num_desc; i++, rxbuf++) {
1419 rxbuf = &rxr->rx_buffers[i];
1420 error = ixgbe_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
1421 if (error) {
1422 aprint_error_dev(dev, "Unable to create RX dma map\n");
1423 goto fail;
1424 }
1425 }
1426
1427 return (0);
1428
1429 fail:
1430 /* Frees all, but can handle partial completion */
1431 ixgbe_free_receive_structures(adapter);
1432 return (error);
1433 }
1434
1435 static void
1436 ixgbe_free_receive_ring(struct rx_ring *rxr)
1437 {
1438 struct ixgbe_rx_buf *rxbuf;
1439
1440 for (int i = 0; i < rxr->num_desc; i++) {
1441 rxbuf = &rxr->rx_buffers[i];
1442 if (rxbuf->buf != NULL) {
1443 bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
1444 0, rxbuf->buf->m_pkthdr.len,
1445 BUS_DMASYNC_POSTREAD);
1446 ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
1447 rxbuf->buf->m_flags |= M_PKTHDR;
1448 m_freem(rxbuf->buf);
1449 rxbuf->buf = NULL;
1450 rxbuf->flags = 0;
1451 }
1452 }
1453 }
1454
1455 /*********************************************************************
1456 *
1457 * Initialize a receive ring and its buffers.
1458 *
1459 **********************************************************************/
1460 static int
1461 ixgbe_setup_receive_ring(struct rx_ring *rxr)
1462 {
1463 struct adapter *adapter;
1464 struct ixgbe_rx_buf *rxbuf;
1465 #ifdef LRO
1466 struct ifnet *ifp;
1467 struct lro_ctrl *lro = &rxr->lro;
1468 #endif /* LRO */
1469 int rsize, error = 0;
1470 #ifdef DEV_NETMAP
1471 struct netmap_adapter *na = NA(rxr->adapter->ifp);
1472 struct netmap_slot *slot;
1473 #endif /* DEV_NETMAP */
1474
1475 adapter = rxr->adapter;
1476 #ifdef LRO
1477 ifp = adapter->ifp;
1478 #endif /* LRO */
1479
1480 /* Clear the ring contents */
1481 IXGBE_RX_LOCK(rxr);
1482 #ifdef DEV_NETMAP
1483 /* same as in ixgbe_setup_transmit_ring() */
1484 slot = netmap_reset(na, NR_RX, rxr->me, 0);
1485 #endif /* DEV_NETMAP */
1486 rsize = roundup2(adapter->num_rx_desc *
1487 sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
1488 bzero((void *)rxr->rx_base, rsize);
1489 /* Cache the size */
1490 rxr->mbuf_sz = adapter->rx_mbuf_sz;
1491
1492 /* Free current RX buffer structs and their mbufs */
1493 ixgbe_free_receive_ring(rxr);
1494
1495 IXGBE_RX_UNLOCK(rxr);
1496
1497 /* Now reinitialize our supply of jumbo mbufs. The number
1498 * or size of jumbo mbufs may have changed.
1499 */
1500 ixgbe_jcl_reinit(&adapter->jcl_head, rxr->ptag->dt_dmat,
1501 2 * adapter->num_rx_desc, adapter->rx_mbuf_sz);
1502
1503 IXGBE_RX_LOCK(rxr);
1504
1505 /* Now replenish the mbufs */
1506 for (int j = 0; j != rxr->num_desc; ++j) {
1507 struct mbuf *mp;
1508
1509 rxbuf = &rxr->rx_buffers[j];
1510 #ifdef DEV_NETMAP
1511 /*
1512 * In netmap mode, fill the map and set the buffer
1513 * address in the NIC ring, considering the offset
1514 * between the netmap and NIC rings (see comment in
1515 * ixgbe_setup_transmit_ring() ). No need to allocate
1516 * an mbuf, so end the block with a continue;
1517 */
1518 if (slot) {
1519 int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
1520 uint64_t paddr;
1521 void *addr;
1522
1523 addr = PNMB(na, slot + sj, &paddr);
1524 netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
1525 /* Update descriptor and the cached value */
1526 rxr->rx_base[j].read.pkt_addr = htole64(paddr);
1527 rxbuf->addr = htole64(paddr);
1528 continue;
1529 }
1530 #endif /* DEV_NETMAP */
1531 rxbuf->flags = 0;
1532 rxbuf->buf = ixgbe_getjcl(&adapter->jcl_head, M_NOWAIT,
1533 MT_DATA, M_PKTHDR, adapter->rx_mbuf_sz);
1534 if (rxbuf->buf == NULL) {
1535 error = ENOBUFS;
1536 goto fail;
1537 }
1538 mp = rxbuf->buf;
1539 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1540 /* Get the memory mapping */
1541 error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat,
1542 rxbuf->pmap, mp, BUS_DMA_NOWAIT);
1543 if (error != 0)
1544 goto fail;
1545 bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
1546 0, adapter->rx_mbuf_sz, BUS_DMASYNC_PREREAD);
1547 /* Update the descriptor and the cached value */
1548 rxr->rx_base[j].read.pkt_addr =
1549 htole64(rxbuf->pmap->dm_segs[0].ds_addr);
1550 rxbuf->addr = htole64(rxbuf->pmap->dm_segs[0].ds_addr);
1551 }
1552
1553
1554 /* Setup our descriptor indices */
1555 rxr->next_to_check = 0;
1556 rxr->next_to_refresh = 0;
1557 rxr->lro_enabled = FALSE;
1558 rxr->rx_copies.ev_count = 0;
1559 #if 0 /* NetBSD */
1560 rxr->rx_bytes.ev_count = 0;
1561 #if 1 /* Fix inconsistency */
1562 rxr->rx_packets.ev_count = 0;
1563 #endif
1564 #endif
1565 rxr->vtag_strip = FALSE;
1566
1567 ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1568 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1569
1570 /*
1571 ** Now set up the LRO interface:
1572 */
1573 if (ixgbe_rsc_enable)
1574 ixgbe_setup_hw_rsc(rxr);
1575 #ifdef LRO
1576 else if (ifp->if_capenable & IFCAP_LRO) {
1577 device_t dev = adapter->dev;
1578 int err = tcp_lro_init(lro);
1579 if (err) {
1580 device_printf(dev, "LRO Initialization failed!\n");
1581 goto fail;
1582 }
1583 INIT_DEBUGOUT("RX Soft LRO Initialized\n");
1584 rxr->lro_enabled = TRUE;
1585 lro->ifp = adapter->ifp;
1586 }
1587 #endif /* LRO */
1588
1589 IXGBE_RX_UNLOCK(rxr);
1590 return (0);
1591
1592 fail:
1593 ixgbe_free_receive_ring(rxr);
1594 IXGBE_RX_UNLOCK(rxr);
1595 return (error);
1596 }
1597
1598 /*********************************************************************
1599 *
1600 * Initialize all receive rings.
1601 *
1602 **********************************************************************/
1603 int
1604 ixgbe_setup_receive_structures(struct adapter *adapter)
1605 {
1606 struct rx_ring *rxr = adapter->rx_rings;
1607 int j;
1608
1609 for (j = 0; j < adapter->num_queues; j++, rxr++)
1610 if (ixgbe_setup_receive_ring(rxr))
1611 goto fail;
1612
1613 return (0);
1614 fail:
1615 /*
1616 * Free RX buffers allocated so far, we will only handle
1617 * the rings that completed, the failing case will have
1618 * cleaned up for itself. 'j' failed, so its the terminus.
1619 */
1620 for (int i = 0; i < j; ++i) {
1621 rxr = &adapter->rx_rings[i];
1622 ixgbe_free_receive_ring(rxr);
1623 }
1624
1625 return (ENOBUFS);
1626 }
1627
1628
1629 /*********************************************************************
1630 *
1631 * Free all receive rings.
1632 *
1633 **********************************************************************/
1634 void
1635 ixgbe_free_receive_structures(struct adapter *adapter)
1636 {
1637 struct rx_ring *rxr = adapter->rx_rings;
1638
1639 INIT_DEBUGOUT("ixgbe_free_receive_structures: begin");
1640
1641 for (int i = 0; i < adapter->num_queues; i++, rxr++) {
1642 #ifdef LRO
1643 struct lro_ctrl *lro = &rxr->lro;
1644 #endif /* LRO */
1645 ixgbe_free_receive_buffers(rxr);
1646 #ifdef LRO
1647 /* Free LRO memory */
1648 tcp_lro_free(lro);
1649 #endif /* LRO */
1650 /* Free the ring memory as well */
1651 ixgbe_dma_free(adapter, &rxr->rxdma);
1652 IXGBE_RX_LOCK_DESTROY(rxr);
1653 }
1654
1655 free(adapter->rx_rings, M_DEVBUF);
1656 }
1657
1658
1659 /*********************************************************************
1660 *
1661 * Free receive ring data structures
1662 *
1663 **********************************************************************/
1664 static void
1665 ixgbe_free_receive_buffers(struct rx_ring *rxr)
1666 {
1667 struct adapter *adapter = rxr->adapter;
1668 struct ixgbe_rx_buf *rxbuf;
1669
1670 INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin");
1671
1672 /* Cleanup any existing buffers */
1673 if (rxr->rx_buffers != NULL) {
1674 for (int i = 0; i < adapter->num_rx_desc; i++) {
1675 rxbuf = &rxr->rx_buffers[i];
1676 if (rxbuf->buf != NULL) {
1677 bus_dmamap_sync(rxr->ptag->dt_dmat,
1678 rxbuf->pmap, 0, rxbuf->buf->m_pkthdr.len,
1679 BUS_DMASYNC_POSTREAD);
1680 ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
1681 rxbuf->buf->m_flags |= M_PKTHDR;
1682 m_freem(rxbuf->buf);
1683 }
1684 rxbuf->buf = NULL;
1685 if (rxbuf->pmap != NULL) {
1686 ixgbe_dmamap_destroy(rxr->ptag, rxbuf->pmap);
1687 rxbuf->pmap = NULL;
1688 }
1689 }
1690 if (rxr->rx_buffers != NULL) {
1691 free(rxr->rx_buffers, M_DEVBUF);
1692 rxr->rx_buffers = NULL;
1693 }
1694 }
1695
1696 if (rxr->ptag != NULL) {
1697 ixgbe_dma_tag_destroy(rxr->ptag);
1698 rxr->ptag = NULL;
1699 }
1700
1701 return;
1702 }
1703
1704 static __inline void
1705 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
1706 {
1707 int s;
1708
1709 #ifdef LRO
1710 struct adapter *adapter = ifp->if_softc;
1711 struct ethercom *ec = &adapter->osdep.ec;
1712
1713 /*
1714 * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
1715 * should be computed by hardware. Also it should not have VLAN tag in
1716 * ethernet header. In case of IPv6 we do not yet support ext. hdrs.
1717 */
1718 if (rxr->lro_enabled &&
1719 (ec->ec_capenable & ETHERCAP_VLAN_HWTAGGING) != 0 &&
1720 (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
1721 ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1722 (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
1723 (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1724 (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
1725 (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
1726 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
1727 /*
1728 * Send to the stack if:
1729 ** - LRO not enabled, or
1730 ** - no LRO resources, or
1731 ** - lro enqueue fails
1732 */
1733 if (rxr->lro.lro_cnt != 0)
1734 if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
1735 return;
1736 }
1737 #endif /* LRO */
1738
1739 IXGBE_RX_UNLOCK(rxr);
1740
1741 s = splnet();
1742 if_input(ifp, m);
1743 splx(s);
1744
1745 IXGBE_RX_LOCK(rxr);
1746 }
1747
1748 static __inline void
1749 ixgbe_rx_discard(struct rx_ring *rxr, int i)
1750 {
1751 struct ixgbe_rx_buf *rbuf;
1752
1753 rbuf = &rxr->rx_buffers[i];
1754
1755
1756 /*
1757 ** With advanced descriptors the writeback
1758 ** clobbers the buffer addrs, so its easier
1759 ** to just free the existing mbufs and take
1760 ** the normal refresh path to get new buffers
1761 ** and mapping.
1762 */
1763
1764 if (rbuf->buf != NULL) {/* Partial chain ? */
1765 rbuf->fmp->m_flags |= M_PKTHDR;
1766 m_freem(rbuf->fmp);
1767 rbuf->fmp = NULL;
1768 rbuf->buf = NULL; /* rbuf->buf is part of fmp's chain */
1769 } else if (rbuf->buf) {
1770 m_free(rbuf->buf);
1771 rbuf->buf = NULL;
1772 }
1773 ixgbe_dmamap_unload(rxr->ptag, rbuf->pmap);
1774
1775 rbuf->flags = 0;
1776
1777 return;
1778 }
1779
1780
1781 /*********************************************************************
1782 *
1783 * This routine executes in interrupt context. It replenishes
1784 * the mbufs in the descriptor and sends data which has been
1785 * dma'ed into host memory to upper layer.
1786 *
1787 * Return TRUE for more work, FALSE for all clean.
1788 *********************************************************************/
1789 bool
1790 ixgbe_rxeof(struct ix_queue *que)
1791 {
1792 struct adapter *adapter = que->adapter;
1793 struct rx_ring *rxr = que->rxr;
1794 struct ifnet *ifp = adapter->ifp;
1795 #ifdef LRO
1796 struct lro_ctrl *lro = &rxr->lro;
1797 #endif /* LRO */
1798 int i, nextp, processed = 0;
1799 u32 staterr = 0;
1800 u32 count = adapter->rx_process_limit;
1801 union ixgbe_adv_rx_desc *cur;
1802 struct ixgbe_rx_buf *rbuf, *nbuf;
1803 #ifdef RSS
1804 u16 pkt_info;
1805 #endif
1806
1807 IXGBE_RX_LOCK(rxr);
1808
1809 #ifdef DEV_NETMAP
1810 /* Same as the txeof routine: wakeup clients on intr. */
1811 if (netmap_rx_irq(ifp, rxr->me, &processed)) {
1812 IXGBE_RX_UNLOCK(rxr);
1813 return (FALSE);
1814 }
1815 #endif /* DEV_NETMAP */
1816
1817 for (i = rxr->next_to_check; count != 0;) {
1818 struct mbuf *sendmp, *mp;
1819 u32 rsc, ptype;
1820 u16 len;
1821 u16 vtag = 0;
1822 bool eop;
1823
1824 /* Sync the ring. */
1825 ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1826 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1827
1828 cur = &rxr->rx_base[i];
1829 staterr = le32toh(cur->wb.upper.status_error);
1830 #ifdef RSS
1831 pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
1832 #endif
1833
1834 if ((staterr & IXGBE_RXD_STAT_DD) == 0)
1835 break;
1836 if ((ifp->if_flags & IFF_RUNNING) == 0)
1837 break;
1838
1839 count--;
1840 sendmp = NULL;
1841 nbuf = NULL;
1842 rsc = 0;
1843 cur->wb.upper.status_error = 0;
1844 rbuf = &rxr->rx_buffers[i];
1845 mp = rbuf->buf;
1846
1847 len = le16toh(cur->wb.upper.length);
1848 ptype = le32toh(cur->wb.lower.lo_dword.data) &
1849 IXGBE_RXDADV_PKTTYPE_MASK;
1850 eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
1851
1852 /* Make sure bad packets are discarded */
1853 if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) {
1854 #if __FreeBSD_version >= 1100036
1855 if (IXGBE_IS_VF(adapter))
1856 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
1857 #endif
1858 rxr->rx_discarded.ev_count++;
1859 ixgbe_rx_discard(rxr, i);
1860 goto next_desc;
1861 }
1862
1863 /*
1864 ** On 82599 which supports a hardware
1865 ** LRO (called HW RSC), packets need
1866 ** not be fragmented across sequential
1867 ** descriptors, rather the next descriptor
1868 ** is indicated in bits of the descriptor.
1869 ** This also means that we might proceses
1870 ** more than one packet at a time, something
1871 ** that has never been true before, it
1872 ** required eliminating global chain pointers
1873 ** in favor of what we are doing here. -jfv
1874 */
1875 if (!eop) {
1876 /*
1877 ** Figure out the next descriptor
1878 ** of this frame.
1879 */
1880 if (rxr->hw_rsc == TRUE) {
1881 rsc = ixgbe_rsc_count(cur);
1882 rxr->rsc_num += (rsc - 1);
1883 }
1884 if (rsc) { /* Get hardware index */
1885 nextp = ((staterr &
1886 IXGBE_RXDADV_NEXTP_MASK) >>
1887 IXGBE_RXDADV_NEXTP_SHIFT);
1888 } else { /* Just sequential */
1889 nextp = i + 1;
1890 if (nextp == adapter->num_rx_desc)
1891 nextp = 0;
1892 }
1893 nbuf = &rxr->rx_buffers[nextp];
1894 prefetch(nbuf);
1895 }
1896 /*
1897 ** Rather than using the fmp/lmp global pointers
1898 ** we now keep the head of a packet chain in the
1899 ** buffer struct and pass this along from one
1900 ** descriptor to the next, until we get EOP.
1901 */
1902 mp->m_len = len;
1903 /*
1904 ** See if there is a stored head
1905 ** that determines what we are
1906 */
1907 sendmp = rbuf->fmp;
1908 if (sendmp != NULL) { /* secondary frag */
1909 rbuf->buf = rbuf->fmp = NULL;
1910 mp->m_flags &= ~M_PKTHDR;
1911 sendmp->m_pkthdr.len += mp->m_len;
1912 } else {
1913 /*
1914 * Optimize. This might be a small packet,
1915 * maybe just a TCP ACK. Do a fast copy that
1916 * is cache aligned into a new mbuf, and
1917 * leave the old mbuf+cluster for re-use.
1918 */
1919 if (eop && len <= IXGBE_RX_COPY_LEN) {
1920 sendmp = m_gethdr(M_NOWAIT, MT_DATA);
1921 if (sendmp != NULL) {
1922 sendmp->m_data +=
1923 IXGBE_RX_COPY_ALIGN;
1924 ixgbe_bcopy(mp->m_data,
1925 sendmp->m_data, len);
1926 sendmp->m_len = len;
1927 rxr->rx_copies.ev_count++;
1928 rbuf->flags |= IXGBE_RX_COPY;
1929 }
1930 }
1931 if (sendmp == NULL) {
1932 rbuf->buf = rbuf->fmp = NULL;
1933 sendmp = mp;
1934 }
1935
1936 /* first desc of a non-ps chain */
1937 sendmp->m_flags |= M_PKTHDR;
1938 sendmp->m_pkthdr.len = mp->m_len;
1939 }
1940 ++processed;
1941
1942 /* Pass the head pointer on */
1943 if (eop == 0) {
1944 nbuf->fmp = sendmp;
1945 sendmp = NULL;
1946 mp->m_next = nbuf->buf;
1947 } else { /* Sending this frame */
1948 m_set_rcvif(sendmp, ifp);
1949 ifp->if_ipackets++;
1950 rxr->rx_packets.ev_count++;
1951 /* capture data for AIM */
1952 rxr->bytes += sendmp->m_pkthdr.len;
1953 rxr->rx_bytes.ev_count += sendmp->m_pkthdr.len;
1954 /* Process vlan info */
1955 if ((rxr->vtag_strip) &&
1956 (staterr & IXGBE_RXD_STAT_VP))
1957 vtag = le16toh(cur->wb.upper.vlan);
1958 if (vtag) {
1959 VLAN_INPUT_TAG(ifp, sendmp, vtag,
1960 printf("%s: could not apply VLAN "
1961 "tag", __func__));
1962 }
1963 if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) {
1964 ixgbe_rx_checksum(staterr, sendmp, ptype,
1965 &adapter->stats.pf);
1966 }
1967
1968 #if 0 /* FreeBSD */
1969 /*
1970 * In case of multiqueue, we have RXCSUM.PCSD bit set
1971 * and never cleared. This means we have RSS hash
1972 * available to be used.
1973 */
1974 if (adapter->num_queues > 1) {
1975 sendmp->m_pkthdr.flowid =
1976 le32toh(cur->wb.lower.hi_dword.rss);
1977 switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) {
1978 case IXGBE_RXDADV_RSSTYPE_IPV4:
1979 M_HASHTYPE_SET(sendmp,
1980 M_HASHTYPE_RSS_IPV4);
1981 break;
1982 case IXGBE_RXDADV_RSSTYPE_IPV4_TCP:
1983 M_HASHTYPE_SET(sendmp,
1984 M_HASHTYPE_RSS_TCP_IPV4);
1985 break;
1986 case IXGBE_RXDADV_RSSTYPE_IPV6:
1987 M_HASHTYPE_SET(sendmp,
1988 M_HASHTYPE_RSS_IPV6);
1989 break;
1990 case IXGBE_RXDADV_RSSTYPE_IPV6_TCP:
1991 M_HASHTYPE_SET(sendmp,
1992 M_HASHTYPE_RSS_TCP_IPV6);
1993 break;
1994 case IXGBE_RXDADV_RSSTYPE_IPV6_EX:
1995 M_HASHTYPE_SET(sendmp,
1996 M_HASHTYPE_RSS_IPV6_EX);
1997 break;
1998 case IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX:
1999 M_HASHTYPE_SET(sendmp,
2000 M_HASHTYPE_RSS_TCP_IPV6_EX);
2001 break;
2002 #if __FreeBSD_version > 1100000
2003 case IXGBE_RXDADV_RSSTYPE_IPV4_UDP:
2004 M_HASHTYPE_SET(sendmp,
2005 M_HASHTYPE_RSS_UDP_IPV4);
2006 break;
2007 case IXGBE_RXDADV_RSSTYPE_IPV6_UDP:
2008 M_HASHTYPE_SET(sendmp,
2009 M_HASHTYPE_RSS_UDP_IPV6);
2010 break;
2011 case IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX:
2012 M_HASHTYPE_SET(sendmp,
2013 M_HASHTYPE_RSS_UDP_IPV6_EX);
2014 break;
2015 #endif
2016 default:
2017 M_HASHTYPE_SET(sendmp,
2018 M_HASHTYPE_OPAQUE_HASH);
2019 }
2020 } else {
2021 sendmp->m_pkthdr.flowid = que->msix;
2022 M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
2023 }
2024 #endif
2025 }
2026 next_desc:
2027 ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
2028 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2029
2030 /* Advance our pointers to the next descriptor. */
2031 if (++i == rxr->num_desc)
2032 i = 0;
2033
2034 /* Now send to the stack or do LRO */
2035 if (sendmp != NULL) {
2036 rxr->next_to_check = i;
2037 ixgbe_rx_input(rxr, ifp, sendmp, ptype);
2038 i = rxr->next_to_check;
2039 }
2040
2041 /* Every 8 descriptors we go to refresh mbufs */
2042 if (processed == 8) {
2043 ixgbe_refresh_mbufs(rxr, i);
2044 processed = 0;
2045 }
2046 }
2047
2048 /* Refresh any remaining buf structs */
2049 if (ixgbe_rx_unrefreshed(rxr))
2050 ixgbe_refresh_mbufs(rxr, i);
2051
2052 rxr->next_to_check = i;
2053
2054 #ifdef LRO
2055 /*
2056 * Flush any outstanding LRO work
2057 */
2058 tcp_lro_flush_all(lro);
2059 #endif /* LRO */
2060
2061 IXGBE_RX_UNLOCK(rxr);
2062
2063 /*
2064 ** Still have cleaning to do?
2065 */
2066 if ((staterr & IXGBE_RXD_STAT_DD) != 0)
2067 return true;
2068 else
2069 return false;
2070 }
2071
2072
2073 /*********************************************************************
2074 *
2075 * Verify that the hardware indicated that the checksum is valid.
2076 * Inform the stack about the status of checksum so that stack
2077 * doesn't spend time verifying the checksum.
2078 *
2079 *********************************************************************/
2080 static void
2081 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype,
2082 struct ixgbe_hw_stats *stats)
2083 {
2084 u16 status = (u16) staterr;
2085 u8 errors = (u8) (staterr >> 24);
2086 #if 0
2087 bool sctp = false;
2088
2089 if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
2090 (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
2091 sctp = true;
2092 #endif
2093
2094 /* IPv4 checksum */
2095 if (status & IXGBE_RXD_STAT_IPCS) {
2096 stats->ipcs.ev_count++;
2097 if (!(errors & IXGBE_RXD_ERR_IPE)) {
2098 /* IP Checksum Good */
2099 mp->m_pkthdr.csum_flags = M_CSUM_IPv4;
2100 } else {
2101 stats->ipcs_bad.ev_count++;
2102 mp->m_pkthdr.csum_flags = M_CSUM_IPv4|M_CSUM_IPv4_BAD;
2103 }
2104 }
2105 /* TCP/UDP/SCTP checksum */
2106 if (status & IXGBE_RXD_STAT_L4CS) {
2107 stats->l4cs.ev_count++;
2108 int type = M_CSUM_TCPv4|M_CSUM_TCPv6|M_CSUM_UDPv4|M_CSUM_UDPv6;
2109 if (!(errors & IXGBE_RXD_ERR_TCPE)) {
2110 mp->m_pkthdr.csum_flags |= type;
2111 } else {
2112 stats->l4cs_bad.ev_count++;
2113 mp->m_pkthdr.csum_flags |= type | M_CSUM_TCP_UDP_BAD;
2114 }
2115 }
2116 }
2117
2118
2119 /********************************************************************
2120 * Manage DMA'able memory.
2121 *******************************************************************/
2122
2123 int
2124 ixgbe_dma_malloc(struct adapter *adapter, const bus_size_t size,
2125 struct ixgbe_dma_alloc *dma, const int mapflags)
2126 {
2127 device_t dev = adapter->dev;
2128 int r, rsegs;
2129
2130 r = ixgbe_dma_tag_create(adapter->osdep.dmat, /* parent */
2131 DBA_ALIGN, 0, /* alignment, bounds */
2132 size, /* maxsize */
2133 1, /* nsegments */
2134 size, /* maxsegsize */
2135 BUS_DMA_ALLOCNOW, /* flags */
2136 &dma->dma_tag);
2137 if (r != 0) {
2138 aprint_error_dev(dev,
2139 "%s: ixgbe_dma_tag_create failed; error %d\n", __func__, r);
2140 goto fail_0;
2141 }
2142
2143 r = bus_dmamem_alloc(dma->dma_tag->dt_dmat,
2144 size,
2145 dma->dma_tag->dt_alignment,
2146 dma->dma_tag->dt_boundary,
2147 &dma->dma_seg, 1, &rsegs, BUS_DMA_NOWAIT);
2148 if (r != 0) {
2149 aprint_error_dev(dev,
2150 "%s: bus_dmamem_alloc failed; error %d\n", __func__, r);
2151 goto fail_1;
2152 }
2153
2154 r = bus_dmamem_map(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs,
2155 size, &dma->dma_vaddr, BUS_DMA_NOWAIT);
2156 if (r != 0) {
2157 aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
2158 __func__, r);
2159 goto fail_2;
2160 }
2161
2162 r = ixgbe_dmamap_create(dma->dma_tag, 0, &dma->dma_map);
2163 if (r != 0) {
2164 aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
2165 __func__, r);
2166 goto fail_3;
2167 }
2168
2169 r = bus_dmamap_load(dma->dma_tag->dt_dmat, dma->dma_map, dma->dma_vaddr,
2170 size,
2171 NULL,
2172 mapflags | BUS_DMA_NOWAIT);
2173 if (r != 0) {
2174 aprint_error_dev(dev, "%s: bus_dmamap_load failed; error %d\n",
2175 __func__, r);
2176 goto fail_4;
2177 }
2178 dma->dma_paddr = dma->dma_map->dm_segs[0].ds_addr;
2179 dma->dma_size = size;
2180 return 0;
2181 fail_4:
2182 ixgbe_dmamap_destroy(dma->dma_tag, dma->dma_map);
2183 fail_3:
2184 bus_dmamem_unmap(dma->dma_tag->dt_dmat, dma->dma_vaddr, size);
2185 fail_2:
2186 bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs);
2187 fail_1:
2188 ixgbe_dma_tag_destroy(dma->dma_tag);
2189 fail_0:
2190 return r;
2191 }
2192
2193 void
2194 ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma)
2195 {
2196 bus_dmamap_sync(dma->dma_tag->dt_dmat, dma->dma_map, 0, dma->dma_size,
2197 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2198 ixgbe_dmamap_unload(dma->dma_tag, dma->dma_map);
2199 bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, 1);
2200 ixgbe_dma_tag_destroy(dma->dma_tag);
2201 }
2202
2203
2204 /*********************************************************************
2205 *
2206 * Allocate memory for the transmit and receive rings, and then
2207 * the descriptors associated with each, called only once at attach.
2208 *
2209 **********************************************************************/
2210 int
2211 ixgbe_allocate_queues(struct adapter *adapter)
2212 {
2213 device_t dev = adapter->dev;
2214 struct ix_queue *que;
2215 struct tx_ring *txr;
2216 struct rx_ring *rxr;
2217 int rsize, tsize, error = IXGBE_SUCCESS;
2218 int txconf = 0, rxconf = 0;
2219 #ifdef PCI_IOV
2220 enum ixgbe_iov_mode iov_mode;
2221 #endif
2222
2223 /* First allocate the top level queue structs */
2224 if (!(adapter->queues =
2225 (struct ix_queue *) malloc(sizeof(struct ix_queue) *
2226 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2227 aprint_error_dev(dev, "Unable to allocate queue memory\n");
2228 error = ENOMEM;
2229 goto fail;
2230 }
2231
2232 /* First allocate the TX ring struct memory */
2233 if (!(adapter->tx_rings =
2234 (struct tx_ring *) malloc(sizeof(struct tx_ring) *
2235 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2236 aprint_error_dev(dev, "Unable to allocate TX ring memory\n");
2237 error = ENOMEM;
2238 goto tx_fail;
2239 }
2240
2241 /* Next allocate the RX */
2242 if (!(adapter->rx_rings =
2243 (struct rx_ring *) malloc(sizeof(struct rx_ring) *
2244 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2245 aprint_error_dev(dev, "Unable to allocate RX ring memory\n");
2246 error = ENOMEM;
2247 goto rx_fail;
2248 }
2249
2250 /* For the ring itself */
2251 tsize = roundup2(adapter->num_tx_desc *
2252 sizeof(union ixgbe_adv_tx_desc), DBA_ALIGN);
2253
2254 #ifdef PCI_IOV
2255 iov_mode = ixgbe_get_iov_mode(adapter);
2256 adapter->pool = ixgbe_max_vfs(iov_mode);
2257 #else
2258 adapter->pool = 0;
2259 #endif
2260 /*
2261 * Now set up the TX queues, txconf is needed to handle the
2262 * possibility that things fail midcourse and we need to
2263 * undo memory gracefully
2264 */
2265 for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2266 /* Set up some basics */
2267 txr = &adapter->tx_rings[i];
2268 txr->adapter = adapter;
2269 #ifdef PCI_IOV
2270 txr->me = ixgbe_pf_que_index(iov_mode, i);
2271 #else
2272 txr->me = i;
2273 #endif
2274 txr->num_desc = adapter->num_tx_desc;
2275
2276 /* Initialize the TX side lock */
2277 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
2278 device_xname(dev), txr->me);
2279 mutex_init(&txr->tx_mtx, MUTEX_DEFAULT, IPL_NET);
2280
2281 if (ixgbe_dma_malloc(adapter, tsize,
2282 &txr->txdma, BUS_DMA_NOWAIT)) {
2283 aprint_error_dev(dev,
2284 "Unable to allocate TX Descriptor memory\n");
2285 error = ENOMEM;
2286 goto err_tx_desc;
2287 }
2288 txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
2289 bzero((void *)txr->tx_base, tsize);
2290
2291 /* Now allocate transmit buffers for the ring */
2292 if (ixgbe_allocate_transmit_buffers(txr)) {
2293 aprint_error_dev(dev,
2294 "Critical Failure setting up transmit buffers\n");
2295 error = ENOMEM;
2296 goto err_tx_desc;
2297 }
2298 #ifndef IXGBE_LEGACY_TX
2299 /* Allocate a buf ring */
2300 txr->txr_interq = pcq_create(IXGBE_BR_SIZE, KM_SLEEP);
2301 if (txr->txr_interq == NULL) {
2302 aprint_error_dev(dev,
2303 "Critical Failure setting up buf ring\n");
2304 error = ENOMEM;
2305 goto err_tx_desc;
2306 }
2307 #endif
2308 }
2309
2310 /*
2311 * Next the RX queues...
2312 */
2313 rsize = roundup2(adapter->num_rx_desc *
2314 sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
2315 for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2316 rxr = &adapter->rx_rings[i];
2317 /* Set up some basics */
2318 rxr->adapter = adapter;
2319 #ifdef PCI_IOV
2320 rxr->me = ixgbe_pf_que_index(iov_mode, i);
2321 #else
2322 rxr->me = i;
2323 #endif
2324 rxr->num_desc = adapter->num_rx_desc;
2325
2326 /* Initialize the RX side lock */
2327 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
2328 device_xname(dev), rxr->me);
2329 mutex_init(&rxr->rx_mtx, MUTEX_DEFAULT, IPL_NET);
2330
2331 if (ixgbe_dma_malloc(adapter, rsize,
2332 &rxr->rxdma, BUS_DMA_NOWAIT)) {
2333 aprint_error_dev(dev,
2334 "Unable to allocate RxDescriptor memory\n");
2335 error = ENOMEM;
2336 goto err_rx_desc;
2337 }
2338 rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2339 bzero((void *)rxr->rx_base, rsize);
2340
2341 /* Allocate receive buffers for the ring*/
2342 if (ixgbe_allocate_receive_buffers(rxr)) {
2343 aprint_error_dev(dev,
2344 "Critical Failure setting up receive buffers\n");
2345 error = ENOMEM;
2346 goto err_rx_desc;
2347 }
2348 }
2349
2350 /*
2351 ** Finally set up the queue holding structs
2352 */
2353 for (int i = 0; i < adapter->num_queues; i++) {
2354 que = &adapter->queues[i];
2355 que->adapter = adapter;
2356 que->me = i;
2357 que->txr = &adapter->tx_rings[i];
2358 que->rxr = &adapter->rx_rings[i];
2359 }
2360
2361 return (0);
2362
2363 err_rx_desc:
2364 for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2365 ixgbe_dma_free(adapter, &rxr->rxdma);
2366 err_tx_desc:
2367 for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2368 ixgbe_dma_free(adapter, &txr->txdma);
2369 free(adapter->rx_rings, M_DEVBUF);
2370 rx_fail:
2371 free(adapter->tx_rings, M_DEVBUF);
2372 tx_fail:
2373 free(adapter->queues, M_DEVBUF);
2374 fail:
2375 return (error);
2376 }
2377