ix_txrx.c revision 1.22 1 /******************************************************************************
2
3 Copyright (c) 2001-2015, Intel Corporation
4 All rights reserved.
5
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
8
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
11
12 2. Redistributions in binary form must reproduce the above copyright
13 notice, this list of conditions and the following disclaimer in the
14 documentation and/or other materials provided with the distribution.
15
16 3. Neither the name of the Intel Corporation nor the names of its
17 contributors may be used to endorse or promote products derived from
18 this software without specific prior written permission.
19
20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*
34 * Copyright (c) 2011 The NetBSD Foundation, Inc.
35 * All rights reserved.
36 *
37 * This code is derived from software contributed to The NetBSD Foundation
38 * by Coyote Point Systems, Inc.
39 *
40 * Redistribution and use in source and binary forms, with or without
41 * modification, are permitted provided that the following conditions
42 * are met:
43 * 1. Redistributions of source code must retain the above copyright
44 * notice, this list of conditions and the following disclaimer.
45 * 2. Redistributions in binary form must reproduce the above copyright
46 * notice, this list of conditions and the following disclaimer in the
47 * documentation and/or other materials provided with the distribution.
48 *
49 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
50 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
51 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
52 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
53 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
54 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
55 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
56 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
57 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
58 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
59 * POSSIBILITY OF SUCH DAMAGE.
60 */
61 /*$FreeBSD: head/sys/dev/ixgbe/ix_txrx.c 301538 2016-06-07 04:51:50Z sephe $*/
62 /*$NetBSD: ix_txrx.c,v 1.22 2017/03/02 05:35:01 msaitoh Exp $*/
63
64 #include "opt_inet.h"
65 #include "opt_inet6.h"
66
67 #include "ixgbe.h"
68
69 #ifdef DEV_NETMAP
70 #include <net/netmap.h>
71 #include <sys/selinfo.h>
72 #include <dev/netmap/netmap_kern.h>
73
74 extern int ix_crcstrip;
75 #endif
76
77 /*
78 ** HW RSC control:
79 ** this feature only works with
80 ** IPv4, and only on 82599 and later.
81 ** Also this will cause IP forwarding to
82 ** fail and that can't be controlled by
83 ** the stack as LRO can. For all these
84 ** reasons I've deemed it best to leave
85 ** this off and not bother with a tuneable
86 ** interface, this would need to be compiled
87 ** to enable.
88 */
89 static bool ixgbe_rsc_enable = FALSE;
90
91 #ifdef IXGBE_FDIR
92 /*
93 ** For Flow Director: this is the
94 ** number of TX packets we sample
95 ** for the filter pool, this means
96 ** every 20th packet will be probed.
97 **
98 ** This feature can be disabled by
99 ** setting this to 0.
100 */
101 static int atr_sample_rate = 20;
102 #endif
103
104 /*********************************************************************
105 * Local Function prototypes
106 *********************************************************************/
107 static void ixgbe_setup_transmit_ring(struct tx_ring *);
108 static void ixgbe_free_transmit_buffers(struct tx_ring *);
109 static int ixgbe_setup_receive_ring(struct rx_ring *);
110 static void ixgbe_free_receive_buffers(struct rx_ring *);
111
112 static void ixgbe_rx_checksum(u32, struct mbuf *, u32,
113 struct ixgbe_hw_stats *);
114 static void ixgbe_refresh_mbufs(struct rx_ring *, int);
115 static int ixgbe_xmit(struct tx_ring *, struct mbuf *);
116 static int ixgbe_tx_ctx_setup(struct tx_ring *,
117 struct mbuf *, u32 *, u32 *);
118 static int ixgbe_tso_setup(struct tx_ring *,
119 struct mbuf *, u32 *, u32 *);
120 #ifdef IXGBE_FDIR
121 static void ixgbe_atr(struct tx_ring *, struct mbuf *);
122 #endif
123 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
124 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
125 struct mbuf *, u32);
126
127 static void ixgbe_setup_hw_rsc(struct rx_ring *);
128
129 /*********************************************************************
130 * Transmit entry point
131 *
132 * ixgbe_start is called by the stack to initiate a transmit.
133 * The driver will remain in this routine as long as there are
134 * packets to transmit and transmit resources are available.
135 * In case resources are not available stack is notified and
136 * the packet is requeued.
137 **********************************************************************/
138
139 void
140 ixgbe_start_locked(struct tx_ring *txr, struct ifnet * ifp)
141 {
142 int rc;
143 struct mbuf *m_head;
144 struct adapter *adapter = txr->adapter;
145
146 IXGBE_TX_LOCK_ASSERT(txr);
147
148 if ((ifp->if_flags & IFF_RUNNING) == 0)
149 return;
150 if (!adapter->link_active)
151 return;
152
153 while (!IFQ_IS_EMPTY(&ifp->if_snd)) {
154 if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
155 break;
156
157 IFQ_POLL(&ifp->if_snd, m_head);
158 if (m_head == NULL)
159 break;
160
161 if ((rc = ixgbe_xmit(txr, m_head)) == EAGAIN) {
162 break;
163 }
164 IFQ_DEQUEUE(&ifp->if_snd, m_head);
165 if (rc != 0) {
166 m_freem(m_head);
167 continue;
168 }
169
170 /* Send a copy of the frame to the BPF listener */
171 bpf_mtap(ifp, m_head);
172 }
173 return;
174 }
175
176 /*
177 * Legacy TX start - called by the stack, this
178 * always uses the first tx ring, and should
179 * not be used with multiqueue tx enabled.
180 */
181 void
182 ixgbe_start(struct ifnet *ifp)
183 {
184 struct adapter *adapter = ifp->if_softc;
185 struct tx_ring *txr = adapter->tx_rings;
186
187 if (ifp->if_flags & IFF_RUNNING) {
188 IXGBE_TX_LOCK(txr);
189 ixgbe_start_locked(txr, ifp);
190 IXGBE_TX_UNLOCK(txr);
191 }
192 return;
193 }
194
195 #ifndef IXGBE_LEGACY_TX
196
197 /*
198 ** Multiqueue Transmit Entry Point
199 ** (if_transmit function)
200 */
201 int
202 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
203 {
204 struct adapter *adapter = ifp->if_softc;
205 struct tx_ring *txr;
206 int i, err = 0;
207 #ifdef RSS
208 uint32_t bucket_id;
209 #endif
210
211 /*
212 * When doing RSS, map it to the same outbound queue
213 * as the incoming flow would be mapped to.
214 *
215 * If everything is setup correctly, it should be the
216 * same bucket that the current CPU we're on is.
217 */
218 #if 0
219 #if __FreeBSD_version < 1100054
220 if (m->m_flags & M_FLOWID) {
221 #else
222 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
223 #endif
224 #ifdef RSS
225 if (rss_hash2bucket(m->m_pkthdr.flowid,
226 M_HASHTYPE_GET(m), &bucket_id) == 0) {
227 /* TODO: spit out something if bucket_id > num_queues? */
228 i = bucket_id % adapter->num_queues;
229 #ifdef IXGBE_DEBUG
230 if (bucket_id > adapter->num_queues)
231 if_printf(ifp, "bucket_id (%d) > num_queues "
232 "(%d)\n", bucket_id, adapter->num_queues);
233 #endif
234 } else
235 #endif /* RSS */
236 i = m->m_pkthdr.flowid % adapter->num_queues;
237 } else
238 #endif
239 i = cpu_index(curcpu()) % adapter->num_queues;
240
241 /* Check for a hung queue and pick alternative */
242 if (((1 << i) & adapter->active_queues) == 0)
243 i = ffs64(adapter->active_queues);
244
245 txr = &adapter->tx_rings[i];
246
247 err = pcq_put(txr->txr_interq, m);
248 if (err == false) {
249 m_freem(m);
250 txr->pcq_drops.ev_count++;
251 return (err);
252 }
253 if (IXGBE_TX_TRYLOCK(txr)) {
254 ixgbe_mq_start_locked(ifp, txr);
255 IXGBE_TX_UNLOCK(txr);
256 } else
257 softint_schedule(txr->txr_si);
258
259 return (0);
260 }
261
262 int
263 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
264 {
265 struct adapter *adapter = txr->adapter;
266 struct mbuf *next;
267 int enqueued = 0, err = 0;
268
269 if (((ifp->if_flags & IFF_RUNNING) == 0) ||
270 adapter->link_active == 0)
271 return (ENETDOWN);
272
273 /* Process the queue */
274 while ((next = pcq_get(txr->txr_interq)) != NULL) {
275 if ((err = ixgbe_xmit(txr, next)) != 0) {
276 m_freem(next);
277 /* All errors are counted in ixgbe_xmit() */
278 break;
279 }
280 enqueued++;
281 #if 0 // this is VF-only
282 #if __FreeBSD_version >= 1100036
283 /*
284 * Since we're looking at the tx ring, we can check
285 * to see if we're a VF by examing our tail register
286 * address.
287 */
288 if (txr->tail < IXGBE_TDT(0) && next->m_flags & M_MCAST)
289 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
290 #endif
291 #endif /* 0 */
292 /* Send a copy of the frame to the BPF listener */
293 bpf_mtap(ifp, next);
294 if ((ifp->if_flags & IFF_RUNNING) == 0)
295 break;
296 }
297
298 if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD)
299 ixgbe_txeof(txr);
300
301 return (err);
302 }
303
304 /*
305 * Called from a taskqueue to drain queued transmit packets.
306 */
307 void
308 ixgbe_deferred_mq_start(void *arg)
309 {
310 struct tx_ring *txr = arg;
311 struct adapter *adapter = txr->adapter;
312 struct ifnet *ifp = adapter->ifp;
313
314 IXGBE_TX_LOCK(txr);
315 if (pcq_peek(txr->txr_interq) != NULL)
316 ixgbe_mq_start_locked(ifp, txr);
317 IXGBE_TX_UNLOCK(txr);
318 }
319
320 #endif /* IXGBE_LEGACY_TX */
321
322
323 /*********************************************************************
324 *
325 * This routine maps the mbufs to tx descriptors, allowing the
326 * TX engine to transmit the packets.
327 * - return 0 on success, positive on failure
328 *
329 **********************************************************************/
330
331 static int
332 ixgbe_xmit(struct tx_ring *txr, struct mbuf *m_head)
333 {
334 struct m_tag *mtag;
335 struct adapter *adapter = txr->adapter;
336 struct ethercom *ec = &adapter->osdep.ec;
337 u32 olinfo_status = 0, cmd_type_len;
338 int i, j, error;
339 int first;
340 bool remap = TRUE;
341 bus_dmamap_t map;
342 struct ixgbe_tx_buf *txbuf;
343 union ixgbe_adv_tx_desc *txd = NULL;
344
345 /* Basic descriptor defines */
346 cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
347 IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
348
349 if ((mtag = VLAN_OUTPUT_TAG(ec, m_head)) != NULL)
350 cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
351
352 /*
353 * Important to capture the first descriptor
354 * used because it will contain the index of
355 * the one we tell the hardware to report back
356 */
357 first = txr->next_avail_desc;
358 txbuf = &txr->tx_buffers[first];
359 map = txbuf->map;
360
361 /*
362 * Map the packet for DMA.
363 */
364 retry:
365 error = bus_dmamap_load_mbuf(txr->txtag->dt_dmat, map,
366 m_head, BUS_DMA_NOWAIT);
367
368 if (__predict_false(error)) {
369 struct mbuf *m;
370
371 switch (error) {
372 case EAGAIN:
373 adapter->eagain_tx_dma_setup.ev_count++;
374 return EAGAIN;
375 case ENOMEM:
376 adapter->enomem_tx_dma_setup.ev_count++;
377 return EAGAIN;
378 case EFBIG:
379 /* Try it again? - one try */
380 if (remap == TRUE) {
381 remap = FALSE;
382 /*
383 * XXX: m_defrag will choke on
384 * non-MCLBYTES-sized clusters
385 */
386 adapter->efbig_tx_dma_setup.ev_count++;
387 m = m_defrag(m_head, M_NOWAIT);
388 if (m == NULL) {
389 adapter->mbuf_defrag_failed.ev_count++;
390 return ENOBUFS;
391 }
392 m_head = m;
393 goto retry;
394 } else {
395 adapter->efbig2_tx_dma_setup.ev_count++;
396 return error;
397 }
398 case EINVAL:
399 adapter->einval_tx_dma_setup.ev_count++;
400 return error;
401 default:
402 adapter->other_tx_dma_setup.ev_count++;
403 return error;
404 }
405 }
406
407 /* Make certain there are enough descriptors */
408 if (txr->tx_avail < (map->dm_nsegs + 2)) {
409 txr->no_desc_avail.ev_count++;
410 ixgbe_dmamap_unload(txr->txtag, txbuf->map);
411 return EAGAIN;
412 }
413
414 /*
415 * Set up the appropriate offload context
416 * this will consume the first descriptor
417 */
418 error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
419 if (__predict_false(error)) {
420 return (error);
421 }
422
423 #ifdef IXGBE_FDIR
424 /* Do the flow director magic */
425 if ((txr->atr_sample) && (!adapter->fdir_reinit)) {
426 ++txr->atr_count;
427 if (txr->atr_count >= atr_sample_rate) {
428 ixgbe_atr(txr, m_head);
429 txr->atr_count = 0;
430 }
431 }
432 #endif
433
434 olinfo_status |= IXGBE_ADVTXD_CC;
435 i = txr->next_avail_desc;
436 for (j = 0; j < map->dm_nsegs; j++) {
437 bus_size_t seglen;
438 bus_addr_t segaddr;
439
440 txbuf = &txr->tx_buffers[i];
441 txd = &txr->tx_base[i];
442 seglen = map->dm_segs[j].ds_len;
443 segaddr = htole64(map->dm_segs[j].ds_addr);
444
445 txd->read.buffer_addr = segaddr;
446 txd->read.cmd_type_len = htole32(txr->txd_cmd |
447 cmd_type_len |seglen);
448 txd->read.olinfo_status = htole32(olinfo_status);
449
450 if (++i == txr->num_desc)
451 i = 0;
452 }
453
454 txd->read.cmd_type_len |=
455 htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
456 txr->tx_avail -= map->dm_nsegs;
457 txr->next_avail_desc = i;
458
459 txbuf->m_head = m_head;
460 /*
461 * Here we swap the map so the last descriptor,
462 * which gets the completion interrupt has the
463 * real map, and the first descriptor gets the
464 * unused map from this descriptor.
465 */
466 txr->tx_buffers[first].map = txbuf->map;
467 txbuf->map = map;
468 bus_dmamap_sync(txr->txtag->dt_dmat, map, 0, m_head->m_pkthdr.len,
469 BUS_DMASYNC_PREWRITE);
470
471 /* Set the EOP descriptor that will be marked done */
472 txbuf = &txr->tx_buffers[first];
473 txbuf->eop = txd;
474
475 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
476 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
477 /*
478 * Advance the Transmit Descriptor Tail (Tdt), this tells the
479 * hardware that this frame is available to transmit.
480 */
481 ++txr->total_packets.ev_count;
482 IXGBE_WRITE_REG(&adapter->hw, txr->tail, i);
483
484 /* Mark queue as having work */
485 if (txr->busy == 0)
486 txr->busy = 1;
487
488 return 0;
489 }
490
491
492 /*********************************************************************
493 *
494 * Allocate memory for tx_buffer structures. The tx_buffer stores all
495 * the information needed to transmit a packet on the wire. This is
496 * called only once at attach, setup is done every reset.
497 *
498 **********************************************************************/
499 int
500 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
501 {
502 struct adapter *adapter = txr->adapter;
503 device_t dev = adapter->dev;
504 struct ixgbe_tx_buf *txbuf;
505 int error, i;
506
507 /*
508 * Setup DMA descriptor areas.
509 */
510 if ((error = ixgbe_dma_tag_create(adapter->osdep.dmat, /* parent */
511 1, 0, /* alignment, bounds */
512 IXGBE_TSO_SIZE, /* maxsize */
513 adapter->num_segs, /* nsegments */
514 PAGE_SIZE, /* maxsegsize */
515 0, /* flags */
516 &txr->txtag))) {
517 aprint_error_dev(dev,"Unable to allocate TX DMA tag\n");
518 goto fail;
519 }
520
521 if (!(txr->tx_buffers =
522 (struct ixgbe_tx_buf *) malloc(sizeof(struct ixgbe_tx_buf) *
523 adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
524 aprint_error_dev(dev, "Unable to allocate tx_buffer memory\n");
525 error = ENOMEM;
526 goto fail;
527 }
528
529 /* Create the descriptor buffer dma maps */
530 txbuf = txr->tx_buffers;
531 for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
532 error = ixgbe_dmamap_create(txr->txtag, 0, &txbuf->map);
533 if (error != 0) {
534 aprint_error_dev(dev,
535 "Unable to create TX DMA map (%d)\n", error);
536 goto fail;
537 }
538 }
539
540 return 0;
541 fail:
542 /* We free all, it handles case where we are in the middle */
543 #if 0 /* XXX was FreeBSD */
544 ixgbe_free_transmit_structures(adapter);
545 #else
546 ixgbe_free_transmit_buffers(txr);
547 #endif
548 return (error);
549 }
550
551 /*********************************************************************
552 *
553 * Initialize a transmit ring.
554 *
555 **********************************************************************/
556 static void
557 ixgbe_setup_transmit_ring(struct tx_ring *txr)
558 {
559 struct adapter *adapter = txr->adapter;
560 struct ixgbe_tx_buf *txbuf;
561 #ifdef DEV_NETMAP
562 struct netmap_adapter *na = NA(adapter->ifp);
563 struct netmap_slot *slot;
564 #endif /* DEV_NETMAP */
565
566 /* Clear the old ring contents */
567 IXGBE_TX_LOCK(txr);
568 #ifdef DEV_NETMAP
569 /*
570 * (under lock): if in netmap mode, do some consistency
571 * checks and set slot to entry 0 of the netmap ring.
572 */
573 slot = netmap_reset(na, NR_TX, txr->me, 0);
574 #endif /* DEV_NETMAP */
575 bzero((void *)txr->tx_base,
576 (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
577 /* Reset indices */
578 txr->next_avail_desc = 0;
579 txr->next_to_clean = 0;
580
581 /* Free any existing tx buffers. */
582 txbuf = txr->tx_buffers;
583 for (int i = 0; i < txr->num_desc; i++, txbuf++) {
584 if (txbuf->m_head != NULL) {
585 bus_dmamap_sync(txr->txtag->dt_dmat, txbuf->map,
586 0, txbuf->m_head->m_pkthdr.len,
587 BUS_DMASYNC_POSTWRITE);
588 ixgbe_dmamap_unload(txr->txtag, txbuf->map);
589 m_freem(txbuf->m_head);
590 txbuf->m_head = NULL;
591 }
592 #ifdef DEV_NETMAP
593 /*
594 * In netmap mode, set the map for the packet buffer.
595 * NOTE: Some drivers (not this one) also need to set
596 * the physical buffer address in the NIC ring.
597 * Slots in the netmap ring (indexed by "si") are
598 * kring->nkr_hwofs positions "ahead" wrt the
599 * corresponding slot in the NIC ring. In some drivers
600 * (not here) nkr_hwofs can be negative. Function
601 * netmap_idx_n2k() handles wraparounds properly.
602 */
603 if (slot) {
604 int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
605 netmap_load_map(na, txr->txtag,
606 txbuf->map, NMB(na, slot + si));
607 }
608 #endif /* DEV_NETMAP */
609 /* Clear the EOP descriptor pointer */
610 txbuf->eop = NULL;
611 }
612
613 #ifdef IXGBE_FDIR
614 /* Set the rate at which we sample packets */
615 if (adapter->hw.mac.type != ixgbe_mac_82598EB)
616 txr->atr_sample = atr_sample_rate;
617 #endif
618
619 /* Set number of descriptors available */
620 txr->tx_avail = adapter->num_tx_desc;
621
622 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
623 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
624 IXGBE_TX_UNLOCK(txr);
625 }
626
627 /*********************************************************************
628 *
629 * Initialize all transmit rings.
630 *
631 **********************************************************************/
632 int
633 ixgbe_setup_transmit_structures(struct adapter *adapter)
634 {
635 struct tx_ring *txr = adapter->tx_rings;
636
637 for (int i = 0; i < adapter->num_queues; i++, txr++)
638 ixgbe_setup_transmit_ring(txr);
639
640 return (0);
641 }
642
643 /*********************************************************************
644 *
645 * Free all transmit rings.
646 *
647 **********************************************************************/
648 void
649 ixgbe_free_transmit_structures(struct adapter *adapter)
650 {
651 struct tx_ring *txr = adapter->tx_rings;
652
653 for (int i = 0; i < adapter->num_queues; i++, txr++) {
654 ixgbe_free_transmit_buffers(txr);
655 ixgbe_dma_free(adapter, &txr->txdma);
656 IXGBE_TX_LOCK_DESTROY(txr);
657 }
658 free(adapter->tx_rings, M_DEVBUF);
659 }
660
661 /*********************************************************************
662 *
663 * Free transmit ring related data structures.
664 *
665 **********************************************************************/
666 static void
667 ixgbe_free_transmit_buffers(struct tx_ring *txr)
668 {
669 struct adapter *adapter = txr->adapter;
670 struct ixgbe_tx_buf *tx_buffer;
671 int i;
672
673 INIT_DEBUGOUT("ixgbe_free_transmit_buffers: begin");
674
675 if (txr->tx_buffers == NULL)
676 return;
677
678 tx_buffer = txr->tx_buffers;
679 for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
680 if (tx_buffer->m_head != NULL) {
681 bus_dmamap_sync(txr->txtag->dt_dmat, tx_buffer->map,
682 0, tx_buffer->m_head->m_pkthdr.len,
683 BUS_DMASYNC_POSTWRITE);
684 ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
685 m_freem(tx_buffer->m_head);
686 tx_buffer->m_head = NULL;
687 if (tx_buffer->map != NULL) {
688 ixgbe_dmamap_destroy(txr->txtag,
689 tx_buffer->map);
690 tx_buffer->map = NULL;
691 }
692 } else if (tx_buffer->map != NULL) {
693 ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
694 ixgbe_dmamap_destroy(txr->txtag, tx_buffer->map);
695 tx_buffer->map = NULL;
696 }
697 }
698 #ifndef IXGBE_LEGACY_TX
699 if (txr->txr_interq != NULL) {
700 struct mbuf *m;
701
702 while ((m = pcq_get(txr->txr_interq)) != NULL)
703 m_freem(m);
704 pcq_destroy(txr->txr_interq);
705 }
706 #endif
707 if (txr->tx_buffers != NULL) {
708 free(txr->tx_buffers, M_DEVBUF);
709 txr->tx_buffers = NULL;
710 }
711 if (txr->txtag != NULL) {
712 ixgbe_dma_tag_destroy(txr->txtag);
713 txr->txtag = NULL;
714 }
715 return;
716 }
717
718 /*********************************************************************
719 *
720 * Advanced Context Descriptor setup for VLAN, CSUM or TSO
721 *
722 **********************************************************************/
723
724 static int
725 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
726 u32 *cmd_type_len, u32 *olinfo_status)
727 {
728 struct adapter *adapter = txr->adapter;
729 struct ethercom *ec = &adapter->osdep.ec;
730 struct m_tag *mtag;
731 struct ixgbe_adv_tx_context_desc *TXD;
732 struct ether_vlan_header *eh;
733 #ifdef INET
734 struct ip *ip;
735 #endif
736 #ifdef INET6
737 struct ip6_hdr *ip6;
738 #endif
739 u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
740 int ehdrlen, ip_hlen = 0;
741 u16 etype;
742 u8 ipproto = 0;
743 int offload = TRUE;
744 int ctxd = txr->next_avail_desc;
745 u16 vtag = 0;
746 char *l3d;
747
748
749 /* First check if TSO is to be used */
750 if (mp->m_pkthdr.csum_flags & (M_CSUM_TSOv4|M_CSUM_TSOv6)) {
751 int rv = ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status);
752
753 if (rv != 0)
754 ++adapter->tso_err.ev_count;
755 return rv;
756 }
757
758 if ((mp->m_pkthdr.csum_flags & M_CSUM_OFFLOAD) == 0)
759 offload = FALSE;
760
761 /* Indicate the whole packet as payload when not doing TSO */
762 *olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
763
764 /* Now ready a context descriptor */
765 TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
766
767 /*
768 ** In advanced descriptors the vlan tag must
769 ** be placed into the context descriptor. Hence
770 ** we need to make one even if not doing offloads.
771 */
772 if ((mtag = VLAN_OUTPUT_TAG(ec, mp)) != NULL) {
773 vtag = htole16(VLAN_TAG_VALUE(mtag) & 0xffff);
774 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
775 } else if (!IXGBE_IS_X550VF(adapter) && (offload == FALSE))
776 return (0);
777
778 /*
779 * Determine where frame payload starts.
780 * Jump over vlan headers if already present,
781 * helpful for QinQ too.
782 */
783 KASSERT(mp->m_len >= offsetof(struct ether_vlan_header, evl_tag));
784 eh = mtod(mp, struct ether_vlan_header *);
785 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
786 KASSERT(mp->m_len >= sizeof(struct ether_vlan_header));
787 etype = ntohs(eh->evl_proto);
788 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
789 } else {
790 etype = ntohs(eh->evl_encap_proto);
791 ehdrlen = ETHER_HDR_LEN;
792 }
793
794 /* Set the ether header length */
795 vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
796
797 if (offload == FALSE)
798 goto no_offloads;
799
800 /*
801 * If the first mbuf only includes the ethernet header, jump to the next one
802 * XXX: This assumes the stack splits mbufs containing headers on header boundaries
803 * XXX: And assumes the entire IP header is contained in one mbuf
804 */
805 if (mp->m_len == ehdrlen && mp->m_next)
806 l3d = mtod(mp->m_next, char *);
807 else
808 l3d = mtod(mp, char *) + ehdrlen;
809
810 switch (etype) {
811 #ifdef INET
812 case ETHERTYPE_IP:
813 ip = (struct ip *)(l3d);
814 ip_hlen = ip->ip_hl << 2;
815 ipproto = ip->ip_p;
816 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
817 KASSERT((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) == 0 ||
818 ip->ip_sum == 0);
819 break;
820 #endif
821 #ifdef INET6
822 case ETHERTYPE_IPV6:
823 ip6 = (struct ip6_hdr *)(l3d);
824 ip_hlen = sizeof(struct ip6_hdr);
825 ipproto = ip6->ip6_nxt;
826 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
827 break;
828 #endif
829 default:
830 offload = false;
831 break;
832 }
833
834 if ((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) != 0)
835 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
836
837 vlan_macip_lens |= ip_hlen;
838
839 /* No support for offloads for non-L4 next headers */
840 switch (ipproto) {
841 case IPPROTO_TCP:
842 if (mp->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_TCPv6))
843
844 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
845 else
846 offload = false;
847 break;
848 case IPPROTO_UDP:
849 if (mp->m_pkthdr.csum_flags & (M_CSUM_UDPv4|M_CSUM_UDPv6))
850 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
851 else
852 offload = false;
853 break;
854 default:
855 offload = false;
856 break;
857 }
858
859 if (offload) /* Insert L4 checksum into data descriptors */
860 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
861
862 no_offloads:
863 type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
864
865 /* Now copy bits into descriptor */
866 TXD->vlan_macip_lens = htole32(vlan_macip_lens);
867 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
868 TXD->seqnum_seed = htole32(0);
869 TXD->mss_l4len_idx = htole32(0);
870
871 /* We've consumed the first desc, adjust counters */
872 if (++ctxd == txr->num_desc)
873 ctxd = 0;
874 txr->next_avail_desc = ctxd;
875 --txr->tx_avail;
876
877 return 0;
878 }
879
880 /**********************************************************************
881 *
882 * Setup work for hardware segmentation offload (TSO) on
883 * adapters using advanced tx descriptors
884 *
885 **********************************************************************/
886 static int
887 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp,
888 u32 *cmd_type_len, u32 *olinfo_status)
889 {
890 struct m_tag *mtag;
891 struct adapter *adapter = txr->adapter;
892 struct ethercom *ec = &adapter->osdep.ec;
893 struct ixgbe_adv_tx_context_desc *TXD;
894 u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
895 u32 mss_l4len_idx = 0, paylen;
896 u16 vtag = 0, eh_type;
897 int ctxd, ehdrlen, ip_hlen, tcp_hlen;
898 struct ether_vlan_header *eh;
899 #ifdef INET6
900 struct ip6_hdr *ip6;
901 #endif
902 #ifdef INET
903 struct ip *ip;
904 #endif
905 struct tcphdr *th;
906
907 /*
908 * Determine where frame payload starts.
909 * Jump over vlan headers if already present
910 */
911 eh = mtod(mp, struct ether_vlan_header *);
912 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
913 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
914 eh_type = eh->evl_proto;
915 } else {
916 ehdrlen = ETHER_HDR_LEN;
917 eh_type = eh->evl_encap_proto;
918 }
919
920 switch (ntohs(eh_type)) {
921 #ifdef INET6
922 case ETHERTYPE_IPV6:
923 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
924 /* XXX-BZ For now we do not pretend to support ext. hdrs. */
925 if (ip6->ip6_nxt != IPPROTO_TCP)
926 return (ENXIO);
927 ip_hlen = sizeof(struct ip6_hdr);
928 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
929 th = (struct tcphdr *)((char *)ip6 + ip_hlen);
930 th->th_sum = in6_cksum_phdr(&ip6->ip6_src,
931 &ip6->ip6_dst, 0, htonl(IPPROTO_TCP));
932 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
933 break;
934 #endif
935 #ifdef INET
936 case ETHERTYPE_IP:
937 ip = (struct ip *)(mp->m_data + ehdrlen);
938 if (ip->ip_p != IPPROTO_TCP)
939 return (ENXIO);
940 ip->ip_sum = 0;
941 ip_hlen = ip->ip_hl << 2;
942 th = (struct tcphdr *)((char *)ip + ip_hlen);
943 th->th_sum = in_cksum_phdr(ip->ip_src.s_addr,
944 ip->ip_dst.s_addr, htons(IPPROTO_TCP));
945 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
946 /* Tell transmit desc to also do IPv4 checksum. */
947 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
948 break;
949 #endif
950 default:
951 panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
952 __func__, ntohs(eh_type));
953 break;
954 }
955
956 ctxd = txr->next_avail_desc;
957 TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
958
959 tcp_hlen = th->th_off << 2;
960
961 /* This is used in the transmit desc in encap */
962 paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
963
964 /* VLAN MACLEN IPLEN */
965 if ((mtag = VLAN_OUTPUT_TAG(ec, mp)) != NULL) {
966 vtag = htole16(VLAN_TAG_VALUE(mtag) & 0xffff);
967 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
968 }
969
970 vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
971 vlan_macip_lens |= ip_hlen;
972 TXD->vlan_macip_lens = htole32(vlan_macip_lens);
973
974 /* ADV DTYPE TUCMD */
975 type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
976 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
977 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
978
979 /* MSS L4LEN IDX */
980 mss_l4len_idx |= (mp->m_pkthdr.segsz << IXGBE_ADVTXD_MSS_SHIFT);
981 mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
982 TXD->mss_l4len_idx = htole32(mss_l4len_idx);
983
984 TXD->seqnum_seed = htole32(0);
985
986 if (++ctxd == txr->num_desc)
987 ctxd = 0;
988
989 txr->tx_avail--;
990 txr->next_avail_desc = ctxd;
991 *cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
992 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
993 *olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
994 ++txr->tso_tx.ev_count;
995 return (0);
996 }
997
998
999 /**********************************************************************
1000 *
1001 * Examine each tx_buffer in the used queue. If the hardware is done
1002 * processing the packet then free associated resources. The
1003 * tx_buffer is put back on the free queue.
1004 *
1005 **********************************************************************/
1006 void
1007 ixgbe_txeof(struct tx_ring *txr)
1008 {
1009 struct adapter *adapter = txr->adapter;
1010 struct ifnet *ifp = adapter->ifp;
1011 u32 work, processed = 0;
1012 u32 limit = adapter->tx_process_limit;
1013 struct ixgbe_tx_buf *buf;
1014 union ixgbe_adv_tx_desc *txd;
1015
1016 KASSERT(mutex_owned(&txr->tx_mtx));
1017
1018 #ifdef DEV_NETMAP
1019 if (ifp->if_capenable & IFCAP_NETMAP) {
1020 struct netmap_adapter *na = NA(ifp);
1021 struct netmap_kring *kring = &na->tx_rings[txr->me];
1022 txd = txr->tx_base;
1023 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1024 BUS_DMASYNC_POSTREAD);
1025 /*
1026 * In netmap mode, all the work is done in the context
1027 * of the client thread. Interrupt handlers only wake up
1028 * clients, which may be sleeping on individual rings
1029 * or on a global resource for all rings.
1030 * To implement tx interrupt mitigation, we wake up the client
1031 * thread roughly every half ring, even if the NIC interrupts
1032 * more frequently. This is implemented as follows:
1033 * - ixgbe_txsync() sets kring->nr_kflags with the index of
1034 * the slot that should wake up the thread (nkr_num_slots
1035 * means the user thread should not be woken up);
1036 * - the driver ignores tx interrupts unless netmap_mitigate=0
1037 * or the slot has the DD bit set.
1038 */
1039 if (!netmap_mitigate ||
1040 (kring->nr_kflags < kring->nkr_num_slots &&
1041 txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD)) {
1042 netmap_tx_irq(ifp, txr->me);
1043 }
1044 return;
1045 }
1046 #endif /* DEV_NETMAP */
1047
1048 if (txr->tx_avail == txr->num_desc) {
1049 txr->busy = 0;
1050 return;
1051 }
1052
1053 /* Get work starting point */
1054 work = txr->next_to_clean;
1055 buf = &txr->tx_buffers[work];
1056 txd = &txr->tx_base[work];
1057 work -= txr->num_desc; /* The distance to ring end */
1058 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1059 BUS_DMASYNC_POSTREAD);
1060
1061 do {
1062 union ixgbe_adv_tx_desc *eop = buf->eop;
1063 if (eop == NULL) /* No work */
1064 break;
1065
1066 if ((eop->wb.status & IXGBE_TXD_STAT_DD) == 0)
1067 break; /* I/O not complete */
1068
1069 if (buf->m_head) {
1070 txr->bytes +=
1071 buf->m_head->m_pkthdr.len;
1072 bus_dmamap_sync(txr->txtag->dt_dmat,
1073 buf->map,
1074 0, buf->m_head->m_pkthdr.len,
1075 BUS_DMASYNC_POSTWRITE);
1076 ixgbe_dmamap_unload(txr->txtag,
1077 buf->map);
1078 m_freem(buf->m_head);
1079 buf->m_head = NULL;
1080 }
1081 buf->eop = NULL;
1082 ++txr->tx_avail;
1083
1084 /* We clean the range if multi segment */
1085 while (txd != eop) {
1086 ++txd;
1087 ++buf;
1088 ++work;
1089 /* wrap the ring? */
1090 if (__predict_false(!work)) {
1091 work -= txr->num_desc;
1092 buf = txr->tx_buffers;
1093 txd = txr->tx_base;
1094 }
1095 if (buf->m_head) {
1096 txr->bytes +=
1097 buf->m_head->m_pkthdr.len;
1098 bus_dmamap_sync(txr->txtag->dt_dmat,
1099 buf->map,
1100 0, buf->m_head->m_pkthdr.len,
1101 BUS_DMASYNC_POSTWRITE);
1102 ixgbe_dmamap_unload(txr->txtag,
1103 buf->map);
1104 m_freem(buf->m_head);
1105 buf->m_head = NULL;
1106 }
1107 ++txr->tx_avail;
1108 buf->eop = NULL;
1109
1110 }
1111 ++txr->packets;
1112 ++processed;
1113 ++ifp->if_opackets;
1114
1115 /* Try the next packet */
1116 ++txd;
1117 ++buf;
1118 ++work;
1119 /* reset with a wrap */
1120 if (__predict_false(!work)) {
1121 work -= txr->num_desc;
1122 buf = txr->tx_buffers;
1123 txd = txr->tx_base;
1124 }
1125 prefetch(txd);
1126 } while (__predict_true(--limit));
1127
1128 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1129 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1130
1131 work += txr->num_desc;
1132 txr->next_to_clean = work;
1133
1134 /*
1135 ** Queue Hang detection, we know there's
1136 ** work outstanding or the first return
1137 ** would have been taken, so increment busy
1138 ** if nothing managed to get cleaned, then
1139 ** in local_timer it will be checked and
1140 ** marked as HUNG if it exceeds a MAX attempt.
1141 */
1142 if ((processed == 0) && (txr->busy != IXGBE_QUEUE_HUNG))
1143 ++txr->busy;
1144 /*
1145 ** If anything gets cleaned we reset state to 1,
1146 ** note this will turn off HUNG if its set.
1147 */
1148 if (processed)
1149 txr->busy = 1;
1150
1151 if (txr->tx_avail == txr->num_desc)
1152 txr->busy = 0;
1153
1154 return;
1155 }
1156
1157
1158 #ifdef IXGBE_FDIR
1159 /*
1160 ** This routine parses packet headers so that Flow
1161 ** Director can make a hashed filter table entry
1162 ** allowing traffic flows to be identified and kept
1163 ** on the same cpu. This would be a performance
1164 ** hit, but we only do it at IXGBE_FDIR_RATE of
1165 ** packets.
1166 */
1167 static void
1168 ixgbe_atr(struct tx_ring *txr, struct mbuf *mp)
1169 {
1170 struct adapter *adapter = txr->adapter;
1171 struct ix_queue *que;
1172 struct ip *ip;
1173 struct tcphdr *th;
1174 struct udphdr *uh;
1175 struct ether_vlan_header *eh;
1176 union ixgbe_atr_hash_dword input = {.dword = 0};
1177 union ixgbe_atr_hash_dword common = {.dword = 0};
1178 int ehdrlen, ip_hlen;
1179 u16 etype;
1180
1181 eh = mtod(mp, struct ether_vlan_header *);
1182 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
1183 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
1184 etype = eh->evl_proto;
1185 } else {
1186 ehdrlen = ETHER_HDR_LEN;
1187 etype = eh->evl_encap_proto;
1188 }
1189
1190 /* Only handling IPv4 */
1191 if (etype != htons(ETHERTYPE_IP))
1192 return;
1193
1194 ip = (struct ip *)(mp->m_data + ehdrlen);
1195 ip_hlen = ip->ip_hl << 2;
1196
1197 /* check if we're UDP or TCP */
1198 switch (ip->ip_p) {
1199 case IPPROTO_TCP:
1200 th = (struct tcphdr *)((char *)ip + ip_hlen);
1201 /* src and dst are inverted */
1202 common.port.dst ^= th->th_sport;
1203 common.port.src ^= th->th_dport;
1204 input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_TCPV4;
1205 break;
1206 case IPPROTO_UDP:
1207 uh = (struct udphdr *)((char *)ip + ip_hlen);
1208 /* src and dst are inverted */
1209 common.port.dst ^= uh->uh_sport;
1210 common.port.src ^= uh->uh_dport;
1211 input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_UDPV4;
1212 break;
1213 default:
1214 return;
1215 }
1216
1217 input.formatted.vlan_id = htobe16(mp->m_pkthdr.ether_vtag);
1218 if (mp->m_pkthdr.ether_vtag)
1219 common.flex_bytes ^= htons(ETHERTYPE_VLAN);
1220 else
1221 common.flex_bytes ^= etype;
1222 common.ip ^= ip->ip_src.s_addr ^ ip->ip_dst.s_addr;
1223
1224 que = &adapter->queues[txr->me];
1225 /*
1226 ** This assumes the Rx queue and Tx
1227 ** queue are bound to the same CPU
1228 */
1229 ixgbe_fdir_add_signature_filter_82599(&adapter->hw,
1230 input, common, que->msix);
1231 }
1232 #endif /* IXGBE_FDIR */
1233
1234 /*
1235 ** Used to detect a descriptor that has
1236 ** been merged by Hardware RSC.
1237 */
1238 static inline u32
1239 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1240 {
1241 return (le32toh(rx->wb.lower.lo_dword.data) &
1242 IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1243 }
1244
1245 /*********************************************************************
1246 *
1247 * Initialize Hardware RSC (LRO) feature on 82599
1248 * for an RX ring, this is toggled by the LRO capability
1249 * even though it is transparent to the stack.
1250 *
1251 * NOTE: since this HW feature only works with IPV4 and
1252 * our testing has shown soft LRO to be as effective
1253 * I have decided to disable this by default.
1254 *
1255 **********************************************************************/
1256 static void
1257 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
1258 {
1259 struct adapter *adapter = rxr->adapter;
1260 struct ixgbe_hw *hw = &adapter->hw;
1261 u32 rscctrl, rdrxctl;
1262
1263 /* If turning LRO/RSC off we need to disable it */
1264 if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) {
1265 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1266 rscctrl &= ~IXGBE_RSCCTL_RSCEN;
1267 return;
1268 }
1269
1270 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
1271 rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
1272 #ifdef DEV_NETMAP /* crcstrip is optional in netmap */
1273 if (adapter->ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip)
1274 #endif /* DEV_NETMAP */
1275 rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
1276 rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
1277 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
1278
1279 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1280 rscctrl |= IXGBE_RSCCTL_RSCEN;
1281 /*
1282 ** Limit the total number of descriptors that
1283 ** can be combined, so it does not exceed 64K
1284 */
1285 if (rxr->mbuf_sz == MCLBYTES)
1286 rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
1287 else if (rxr->mbuf_sz == MJUMPAGESIZE)
1288 rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
1289 else if (rxr->mbuf_sz == MJUM9BYTES)
1290 rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
1291 else /* Using 16K cluster */
1292 rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
1293
1294 IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
1295
1296 /* Enable TCP header recognition */
1297 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
1298 (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) |
1299 IXGBE_PSRTYPE_TCPHDR));
1300
1301 /* Disable RSC for ACK packets */
1302 IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
1303 (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
1304
1305 rxr->hw_rsc = TRUE;
1306 }
1307
1308 /*********************************************************************
1309 *
1310 * Refresh mbuf buffers for RX descriptor rings
1311 * - now keeps its own state so discards due to resource
1312 * exhaustion are unnecessary, if an mbuf cannot be obtained
1313 * it just returns, keeping its placeholder, thus it can simply
1314 * be recalled to try again.
1315 *
1316 **********************************************************************/
1317 static void
1318 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
1319 {
1320 struct adapter *adapter = rxr->adapter;
1321 struct ixgbe_rx_buf *rxbuf;
1322 struct mbuf *mp;
1323 int i, j, error;
1324 bool refreshed = false;
1325
1326 i = j = rxr->next_to_refresh;
1327 /* Control the loop with one beyond */
1328 if (++j == rxr->num_desc)
1329 j = 0;
1330
1331 while (j != limit) {
1332 rxbuf = &rxr->rx_buffers[i];
1333 if (rxbuf->buf == NULL) {
1334 mp = ixgbe_getjcl(&adapter->jcl_head, M_NOWAIT,
1335 MT_DATA, M_PKTHDR, rxr->mbuf_sz);
1336 if (mp == NULL) {
1337 rxr->no_jmbuf.ev_count++;
1338 goto update;
1339 }
1340 if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
1341 m_adj(mp, ETHER_ALIGN);
1342 } else
1343 mp = rxbuf->buf;
1344
1345 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1346
1347 /* If we're dealing with an mbuf that was copied rather
1348 * than replaced, there's no need to go through busdma.
1349 */
1350 if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
1351 /* Get the memory mapping */
1352 ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
1353 error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat,
1354 rxbuf->pmap, mp, BUS_DMA_NOWAIT);
1355 if (error != 0) {
1356 printf("Refresh mbufs: payload dmamap load"
1357 " failure - %d\n", error);
1358 m_free(mp);
1359 rxbuf->buf = NULL;
1360 goto update;
1361 }
1362 rxbuf->buf = mp;
1363 bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
1364 0, mp->m_pkthdr.len, BUS_DMASYNC_PREREAD);
1365 rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
1366 htole64(rxbuf->pmap->dm_segs[0].ds_addr);
1367 } else {
1368 rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
1369 rxbuf->flags &= ~IXGBE_RX_COPY;
1370 }
1371
1372 refreshed = true;
1373 /* Next is precalculated */
1374 i = j;
1375 rxr->next_to_refresh = i;
1376 if (++j == rxr->num_desc)
1377 j = 0;
1378 }
1379 update:
1380 if (refreshed) /* Update hardware tail index */
1381 IXGBE_WRITE_REG(&adapter->hw,
1382 rxr->tail, rxr->next_to_refresh);
1383 return;
1384 }
1385
1386 /*********************************************************************
1387 *
1388 * Allocate memory for rx_buffer structures. Since we use one
1389 * rx_buffer per received packet, the maximum number of rx_buffer's
1390 * that we'll need is equal to the number of receive descriptors
1391 * that we've allocated.
1392 *
1393 **********************************************************************/
1394 int
1395 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
1396 {
1397 struct adapter *adapter = rxr->adapter;
1398 device_t dev = adapter->dev;
1399 struct ixgbe_rx_buf *rxbuf;
1400 int bsize, error;
1401
1402 bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
1403 if (!(rxr->rx_buffers =
1404 (struct ixgbe_rx_buf *) malloc(bsize,
1405 M_DEVBUF, M_NOWAIT | M_ZERO))) {
1406 aprint_error_dev(dev, "Unable to allocate rx_buffer memory\n");
1407 error = ENOMEM;
1408 goto fail;
1409 }
1410
1411 if ((error = ixgbe_dma_tag_create(adapter->osdep.dmat, /* parent */
1412 1, 0, /* alignment, bounds */
1413 MJUM16BYTES, /* maxsize */
1414 1, /* nsegments */
1415 MJUM16BYTES, /* maxsegsize */
1416 0, /* flags */
1417 &rxr->ptag))) {
1418 aprint_error_dev(dev, "Unable to create RX DMA tag\n");
1419 goto fail;
1420 }
1421
1422 for (int i = 0; i < rxr->num_desc; i++, rxbuf++) {
1423 rxbuf = &rxr->rx_buffers[i];
1424 error = ixgbe_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
1425 if (error) {
1426 aprint_error_dev(dev, "Unable to create RX dma map\n");
1427 goto fail;
1428 }
1429 }
1430
1431 return (0);
1432
1433 fail:
1434 /* Frees all, but can handle partial completion */
1435 ixgbe_free_receive_structures(adapter);
1436 return (error);
1437 }
1438
1439 static void
1440 ixgbe_free_receive_ring(struct rx_ring *rxr)
1441 {
1442 struct ixgbe_rx_buf *rxbuf;
1443
1444 for (int i = 0; i < rxr->num_desc; i++) {
1445 rxbuf = &rxr->rx_buffers[i];
1446 if (rxbuf->buf != NULL) {
1447 bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
1448 0, rxbuf->buf->m_pkthdr.len,
1449 BUS_DMASYNC_POSTREAD);
1450 ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
1451 rxbuf->buf->m_flags |= M_PKTHDR;
1452 m_freem(rxbuf->buf);
1453 rxbuf->buf = NULL;
1454 rxbuf->flags = 0;
1455 }
1456 }
1457 }
1458
1459 /*********************************************************************
1460 *
1461 * Initialize a receive ring and its buffers.
1462 *
1463 **********************************************************************/
1464 static int
1465 ixgbe_setup_receive_ring(struct rx_ring *rxr)
1466 {
1467 struct adapter *adapter;
1468 struct ixgbe_rx_buf *rxbuf;
1469 #ifdef LRO
1470 struct ifnet *ifp;
1471 struct lro_ctrl *lro = &rxr->lro;
1472 #endif /* LRO */
1473 int rsize, error = 0;
1474 #ifdef DEV_NETMAP
1475 struct netmap_adapter *na = NA(rxr->adapter->ifp);
1476 struct netmap_slot *slot;
1477 #endif /* DEV_NETMAP */
1478
1479 adapter = rxr->adapter;
1480 #ifdef LRO
1481 ifp = adapter->ifp;
1482 #endif /* LRO */
1483
1484 /* Clear the ring contents */
1485 IXGBE_RX_LOCK(rxr);
1486 #ifdef DEV_NETMAP
1487 /* same as in ixgbe_setup_transmit_ring() */
1488 slot = netmap_reset(na, NR_RX, rxr->me, 0);
1489 #endif /* DEV_NETMAP */
1490 rsize = roundup2(adapter->num_rx_desc *
1491 sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
1492 bzero((void *)rxr->rx_base, rsize);
1493 /* Cache the size */
1494 rxr->mbuf_sz = adapter->rx_mbuf_sz;
1495
1496 /* Free current RX buffer structs and their mbufs */
1497 ixgbe_free_receive_ring(rxr);
1498
1499 IXGBE_RX_UNLOCK(rxr);
1500
1501 /* Now reinitialize our supply of jumbo mbufs. The number
1502 * or size of jumbo mbufs may have changed.
1503 */
1504 ixgbe_jcl_reinit(&adapter->jcl_head, rxr->ptag->dt_dmat,
1505 2 * adapter->num_rx_desc, adapter->rx_mbuf_sz);
1506
1507 IXGBE_RX_LOCK(rxr);
1508
1509 /* Now replenish the mbufs */
1510 for (int j = 0; j != rxr->num_desc; ++j) {
1511 struct mbuf *mp;
1512
1513 rxbuf = &rxr->rx_buffers[j];
1514 #ifdef DEV_NETMAP
1515 /*
1516 * In netmap mode, fill the map and set the buffer
1517 * address in the NIC ring, considering the offset
1518 * between the netmap and NIC rings (see comment in
1519 * ixgbe_setup_transmit_ring() ). No need to allocate
1520 * an mbuf, so end the block with a continue;
1521 */
1522 if (slot) {
1523 int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
1524 uint64_t paddr;
1525 void *addr;
1526
1527 addr = PNMB(na, slot + sj, &paddr);
1528 netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
1529 /* Update descriptor and the cached value */
1530 rxr->rx_base[j].read.pkt_addr = htole64(paddr);
1531 rxbuf->addr = htole64(paddr);
1532 continue;
1533 }
1534 #endif /* DEV_NETMAP */
1535 rxbuf->flags = 0;
1536 rxbuf->buf = ixgbe_getjcl(&adapter->jcl_head, M_NOWAIT,
1537 MT_DATA, M_PKTHDR, adapter->rx_mbuf_sz);
1538 if (rxbuf->buf == NULL) {
1539 error = ENOBUFS;
1540 goto fail;
1541 }
1542 mp = rxbuf->buf;
1543 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1544 /* Get the memory mapping */
1545 error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat,
1546 rxbuf->pmap, mp, BUS_DMA_NOWAIT);
1547 if (error != 0)
1548 goto fail;
1549 bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
1550 0, adapter->rx_mbuf_sz, BUS_DMASYNC_PREREAD);
1551 /* Update the descriptor and the cached value */
1552 rxr->rx_base[j].read.pkt_addr =
1553 htole64(rxbuf->pmap->dm_segs[0].ds_addr);
1554 rxbuf->addr = htole64(rxbuf->pmap->dm_segs[0].ds_addr);
1555 }
1556
1557
1558 /* Setup our descriptor indices */
1559 rxr->next_to_check = 0;
1560 rxr->next_to_refresh = 0;
1561 rxr->lro_enabled = FALSE;
1562 rxr->rx_copies.ev_count = 0;
1563 #if 0 /* NetBSD */
1564 rxr->rx_bytes.ev_count = 0;
1565 #if 1 /* Fix inconsistency */
1566 rxr->rx_packets.ev_count = 0;
1567 #endif
1568 #endif
1569 rxr->vtag_strip = FALSE;
1570
1571 ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1572 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1573
1574 /*
1575 ** Now set up the LRO interface:
1576 */
1577 if (ixgbe_rsc_enable)
1578 ixgbe_setup_hw_rsc(rxr);
1579 #ifdef LRO
1580 else if (ifp->if_capenable & IFCAP_LRO) {
1581 device_t dev = adapter->dev;
1582 int err = tcp_lro_init(lro);
1583 if (err) {
1584 device_printf(dev, "LRO Initialization failed!\n");
1585 goto fail;
1586 }
1587 INIT_DEBUGOUT("RX Soft LRO Initialized\n");
1588 rxr->lro_enabled = TRUE;
1589 lro->ifp = adapter->ifp;
1590 }
1591 #endif /* LRO */
1592
1593 IXGBE_RX_UNLOCK(rxr);
1594 return (0);
1595
1596 fail:
1597 ixgbe_free_receive_ring(rxr);
1598 IXGBE_RX_UNLOCK(rxr);
1599 return (error);
1600 }
1601
1602 /*********************************************************************
1603 *
1604 * Initialize all receive rings.
1605 *
1606 **********************************************************************/
1607 int
1608 ixgbe_setup_receive_structures(struct adapter *adapter)
1609 {
1610 struct rx_ring *rxr = adapter->rx_rings;
1611 int j;
1612
1613 for (j = 0; j < adapter->num_queues; j++, rxr++)
1614 if (ixgbe_setup_receive_ring(rxr))
1615 goto fail;
1616
1617 return (0);
1618 fail:
1619 /*
1620 * Free RX buffers allocated so far, we will only handle
1621 * the rings that completed, the failing case will have
1622 * cleaned up for itself. 'j' failed, so its the terminus.
1623 */
1624 for (int i = 0; i < j; ++i) {
1625 rxr = &adapter->rx_rings[i];
1626 ixgbe_free_receive_ring(rxr);
1627 }
1628
1629 return (ENOBUFS);
1630 }
1631
1632
1633 /*********************************************************************
1634 *
1635 * Free all receive rings.
1636 *
1637 **********************************************************************/
1638 void
1639 ixgbe_free_receive_structures(struct adapter *adapter)
1640 {
1641 struct rx_ring *rxr = adapter->rx_rings;
1642
1643 INIT_DEBUGOUT("ixgbe_free_receive_structures: begin");
1644
1645 for (int i = 0; i < adapter->num_queues; i++, rxr++) {
1646 #ifdef LRO
1647 struct lro_ctrl *lro = &rxr->lro;
1648 #endif /* LRO */
1649 ixgbe_free_receive_buffers(rxr);
1650 #ifdef LRO
1651 /* Free LRO memory */
1652 tcp_lro_free(lro);
1653 #endif /* LRO */
1654 /* Free the ring memory as well */
1655 ixgbe_dma_free(adapter, &rxr->rxdma);
1656 IXGBE_RX_LOCK_DESTROY(rxr);
1657 }
1658
1659 free(adapter->rx_rings, M_DEVBUF);
1660 }
1661
1662
1663 /*********************************************************************
1664 *
1665 * Free receive ring data structures
1666 *
1667 **********************************************************************/
1668 static void
1669 ixgbe_free_receive_buffers(struct rx_ring *rxr)
1670 {
1671 struct adapter *adapter = rxr->adapter;
1672 struct ixgbe_rx_buf *rxbuf;
1673
1674 INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin");
1675
1676 /* Cleanup any existing buffers */
1677 if (rxr->rx_buffers != NULL) {
1678 for (int i = 0; i < adapter->num_rx_desc; i++) {
1679 rxbuf = &rxr->rx_buffers[i];
1680 if (rxbuf->buf != NULL) {
1681 bus_dmamap_sync(rxr->ptag->dt_dmat,
1682 rxbuf->pmap, 0, rxbuf->buf->m_pkthdr.len,
1683 BUS_DMASYNC_POSTREAD);
1684 ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
1685 rxbuf->buf->m_flags |= M_PKTHDR;
1686 m_freem(rxbuf->buf);
1687 }
1688 rxbuf->buf = NULL;
1689 if (rxbuf->pmap != NULL) {
1690 ixgbe_dmamap_destroy(rxr->ptag, rxbuf->pmap);
1691 rxbuf->pmap = NULL;
1692 }
1693 }
1694 if (rxr->rx_buffers != NULL) {
1695 free(rxr->rx_buffers, M_DEVBUF);
1696 rxr->rx_buffers = NULL;
1697 }
1698 }
1699
1700 if (rxr->ptag != NULL) {
1701 ixgbe_dma_tag_destroy(rxr->ptag);
1702 rxr->ptag = NULL;
1703 }
1704
1705 return;
1706 }
1707
1708 static __inline void
1709 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
1710 {
1711 struct adapter *adapter = ifp->if_softc;
1712
1713 #ifdef LRO
1714 struct ethercom *ec = &adapter->osdep.ec;
1715
1716 /*
1717 * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
1718 * should be computed by hardware. Also it should not have VLAN tag in
1719 * ethernet header. In case of IPv6 we do not yet support ext. hdrs.
1720 */
1721 if (rxr->lro_enabled &&
1722 (ec->ec_capenable & ETHERCAP_VLAN_HWTAGGING) != 0 &&
1723 (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
1724 ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1725 (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
1726 (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1727 (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
1728 (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
1729 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
1730 /*
1731 * Send to the stack if:
1732 ** - LRO not enabled, or
1733 ** - no LRO resources, or
1734 ** - lro enqueue fails
1735 */
1736 if (rxr->lro.lro_cnt != 0)
1737 if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
1738 return;
1739 }
1740 #endif /* LRO */
1741
1742 IXGBE_RX_UNLOCK(rxr);
1743
1744 if_percpuq_enqueue(adapter->ipq, m);
1745
1746 IXGBE_RX_LOCK(rxr);
1747 }
1748
1749 static __inline void
1750 ixgbe_rx_discard(struct rx_ring *rxr, int i)
1751 {
1752 struct ixgbe_rx_buf *rbuf;
1753
1754 rbuf = &rxr->rx_buffers[i];
1755
1756
1757 /*
1758 ** With advanced descriptors the writeback
1759 ** clobbers the buffer addrs, so its easier
1760 ** to just free the existing mbufs and take
1761 ** the normal refresh path to get new buffers
1762 ** and mapping.
1763 */
1764
1765 if (rbuf->buf != NULL) {/* Partial chain ? */
1766 rbuf->fmp->m_flags |= M_PKTHDR;
1767 m_freem(rbuf->fmp);
1768 rbuf->fmp = NULL;
1769 rbuf->buf = NULL; /* rbuf->buf is part of fmp's chain */
1770 } else if (rbuf->buf) {
1771 m_free(rbuf->buf);
1772 rbuf->buf = NULL;
1773 }
1774 ixgbe_dmamap_unload(rxr->ptag, rbuf->pmap);
1775
1776 rbuf->flags = 0;
1777
1778 return;
1779 }
1780
1781
1782 /*********************************************************************
1783 *
1784 * This routine executes in interrupt context. It replenishes
1785 * the mbufs in the descriptor and sends data which has been
1786 * dma'ed into host memory to upper layer.
1787 *
1788 * Return TRUE for more work, FALSE for all clean.
1789 *********************************************************************/
1790 bool
1791 ixgbe_rxeof(struct ix_queue *que)
1792 {
1793 struct adapter *adapter = que->adapter;
1794 struct rx_ring *rxr = que->rxr;
1795 struct ifnet *ifp = adapter->ifp;
1796 #ifdef LRO
1797 struct lro_ctrl *lro = &rxr->lro;
1798 #endif /* LRO */
1799 int i, nextp, processed = 0;
1800 u32 staterr = 0;
1801 u32 count = adapter->rx_process_limit;
1802 union ixgbe_adv_rx_desc *cur;
1803 struct ixgbe_rx_buf *rbuf, *nbuf;
1804 #ifdef RSS
1805 u16 pkt_info;
1806 #endif
1807
1808 IXGBE_RX_LOCK(rxr);
1809
1810 #ifdef DEV_NETMAP
1811 /* Same as the txeof routine: wakeup clients on intr. */
1812 if (netmap_rx_irq(ifp, rxr->me, &processed)) {
1813 IXGBE_RX_UNLOCK(rxr);
1814 return (FALSE);
1815 }
1816 #endif /* DEV_NETMAP */
1817
1818 for (i = rxr->next_to_check; count != 0;) {
1819 struct mbuf *sendmp, *mp;
1820 u32 rsc, ptype;
1821 u16 len;
1822 u16 vtag = 0;
1823 bool eop;
1824
1825 /* Sync the ring. */
1826 ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1827 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1828
1829 cur = &rxr->rx_base[i];
1830 staterr = le32toh(cur->wb.upper.status_error);
1831 #ifdef RSS
1832 pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
1833 #endif
1834
1835 if ((staterr & IXGBE_RXD_STAT_DD) == 0)
1836 break;
1837 if ((ifp->if_flags & IFF_RUNNING) == 0)
1838 break;
1839
1840 count--;
1841 sendmp = NULL;
1842 nbuf = NULL;
1843 rsc = 0;
1844 cur->wb.upper.status_error = 0;
1845 rbuf = &rxr->rx_buffers[i];
1846 mp = rbuf->buf;
1847
1848 len = le16toh(cur->wb.upper.length);
1849 ptype = le32toh(cur->wb.lower.lo_dword.data) &
1850 IXGBE_RXDADV_PKTTYPE_MASK;
1851 eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
1852
1853 /* Make sure bad packets are discarded */
1854 if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) {
1855 #if __FreeBSD_version >= 1100036
1856 if (IXGBE_IS_VF(adapter))
1857 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
1858 #endif
1859 rxr->rx_discarded.ev_count++;
1860 ixgbe_rx_discard(rxr, i);
1861 goto next_desc;
1862 }
1863
1864 /*
1865 ** On 82599 which supports a hardware
1866 ** LRO (called HW RSC), packets need
1867 ** not be fragmented across sequential
1868 ** descriptors, rather the next descriptor
1869 ** is indicated in bits of the descriptor.
1870 ** This also means that we might proceses
1871 ** more than one packet at a time, something
1872 ** that has never been true before, it
1873 ** required eliminating global chain pointers
1874 ** in favor of what we are doing here. -jfv
1875 */
1876 if (!eop) {
1877 /*
1878 ** Figure out the next descriptor
1879 ** of this frame.
1880 */
1881 if (rxr->hw_rsc == TRUE) {
1882 rsc = ixgbe_rsc_count(cur);
1883 rxr->rsc_num += (rsc - 1);
1884 }
1885 if (rsc) { /* Get hardware index */
1886 nextp = ((staterr &
1887 IXGBE_RXDADV_NEXTP_MASK) >>
1888 IXGBE_RXDADV_NEXTP_SHIFT);
1889 } else { /* Just sequential */
1890 nextp = i + 1;
1891 if (nextp == adapter->num_rx_desc)
1892 nextp = 0;
1893 }
1894 nbuf = &rxr->rx_buffers[nextp];
1895 prefetch(nbuf);
1896 }
1897 /*
1898 ** Rather than using the fmp/lmp global pointers
1899 ** we now keep the head of a packet chain in the
1900 ** buffer struct and pass this along from one
1901 ** descriptor to the next, until we get EOP.
1902 */
1903 mp->m_len = len;
1904 /*
1905 ** See if there is a stored head
1906 ** that determines what we are
1907 */
1908 sendmp = rbuf->fmp;
1909 if (sendmp != NULL) { /* secondary frag */
1910 rbuf->buf = rbuf->fmp = NULL;
1911 mp->m_flags &= ~M_PKTHDR;
1912 sendmp->m_pkthdr.len += mp->m_len;
1913 } else {
1914 /*
1915 * Optimize. This might be a small packet,
1916 * maybe just a TCP ACK. Do a fast copy that
1917 * is cache aligned into a new mbuf, and
1918 * leave the old mbuf+cluster for re-use.
1919 */
1920 if (eop && len <= IXGBE_RX_COPY_LEN) {
1921 sendmp = m_gethdr(M_NOWAIT, MT_DATA);
1922 if (sendmp != NULL) {
1923 sendmp->m_data +=
1924 IXGBE_RX_COPY_ALIGN;
1925 ixgbe_bcopy(mp->m_data,
1926 sendmp->m_data, len);
1927 sendmp->m_len = len;
1928 rxr->rx_copies.ev_count++;
1929 rbuf->flags |= IXGBE_RX_COPY;
1930 }
1931 }
1932 if (sendmp == NULL) {
1933 rbuf->buf = rbuf->fmp = NULL;
1934 sendmp = mp;
1935 }
1936
1937 /* first desc of a non-ps chain */
1938 sendmp->m_flags |= M_PKTHDR;
1939 sendmp->m_pkthdr.len = mp->m_len;
1940 }
1941 ++processed;
1942
1943 /* Pass the head pointer on */
1944 if (eop == 0) {
1945 nbuf->fmp = sendmp;
1946 sendmp = NULL;
1947 mp->m_next = nbuf->buf;
1948 } else { /* Sending this frame */
1949 m_set_rcvif(sendmp, ifp);
1950 ifp->if_ipackets++;
1951 rxr->rx_packets.ev_count++;
1952 /* capture data for AIM */
1953 rxr->bytes += sendmp->m_pkthdr.len;
1954 rxr->rx_bytes.ev_count += sendmp->m_pkthdr.len;
1955 /* Process vlan info */
1956 if ((rxr->vtag_strip) &&
1957 (staterr & IXGBE_RXD_STAT_VP))
1958 vtag = le16toh(cur->wb.upper.vlan);
1959 if (vtag) {
1960 VLAN_INPUT_TAG(ifp, sendmp, vtag,
1961 printf("%s: could not apply VLAN "
1962 "tag", __func__));
1963 }
1964 if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) {
1965 ixgbe_rx_checksum(staterr, sendmp, ptype,
1966 &adapter->stats.pf);
1967 }
1968
1969 #if 0 /* FreeBSD */
1970 /*
1971 * In case of multiqueue, we have RXCSUM.PCSD bit set
1972 * and never cleared. This means we have RSS hash
1973 * available to be used.
1974 */
1975 if (adapter->num_queues > 1) {
1976 sendmp->m_pkthdr.flowid =
1977 le32toh(cur->wb.lower.hi_dword.rss);
1978 switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) {
1979 case IXGBE_RXDADV_RSSTYPE_IPV4:
1980 M_HASHTYPE_SET(sendmp,
1981 M_HASHTYPE_RSS_IPV4);
1982 break;
1983 case IXGBE_RXDADV_RSSTYPE_IPV4_TCP:
1984 M_HASHTYPE_SET(sendmp,
1985 M_HASHTYPE_RSS_TCP_IPV4);
1986 break;
1987 case IXGBE_RXDADV_RSSTYPE_IPV6:
1988 M_HASHTYPE_SET(sendmp,
1989 M_HASHTYPE_RSS_IPV6);
1990 break;
1991 case IXGBE_RXDADV_RSSTYPE_IPV6_TCP:
1992 M_HASHTYPE_SET(sendmp,
1993 M_HASHTYPE_RSS_TCP_IPV6);
1994 break;
1995 case IXGBE_RXDADV_RSSTYPE_IPV6_EX:
1996 M_HASHTYPE_SET(sendmp,
1997 M_HASHTYPE_RSS_IPV6_EX);
1998 break;
1999 case IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX:
2000 M_HASHTYPE_SET(sendmp,
2001 M_HASHTYPE_RSS_TCP_IPV6_EX);
2002 break;
2003 #if __FreeBSD_version > 1100000
2004 case IXGBE_RXDADV_RSSTYPE_IPV4_UDP:
2005 M_HASHTYPE_SET(sendmp,
2006 M_HASHTYPE_RSS_UDP_IPV4);
2007 break;
2008 case IXGBE_RXDADV_RSSTYPE_IPV6_UDP:
2009 M_HASHTYPE_SET(sendmp,
2010 M_HASHTYPE_RSS_UDP_IPV6);
2011 break;
2012 case IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX:
2013 M_HASHTYPE_SET(sendmp,
2014 M_HASHTYPE_RSS_UDP_IPV6_EX);
2015 break;
2016 #endif
2017 default:
2018 M_HASHTYPE_SET(sendmp,
2019 M_HASHTYPE_OPAQUE_HASH);
2020 }
2021 } else {
2022 sendmp->m_pkthdr.flowid = que->msix;
2023 M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
2024 }
2025 #endif
2026 }
2027 next_desc:
2028 ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
2029 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2030
2031 /* Advance our pointers to the next descriptor. */
2032 if (++i == rxr->num_desc)
2033 i = 0;
2034
2035 /* Now send to the stack or do LRO */
2036 if (sendmp != NULL) {
2037 rxr->next_to_check = i;
2038 ixgbe_rx_input(rxr, ifp, sendmp, ptype);
2039 i = rxr->next_to_check;
2040 }
2041
2042 /* Every 8 descriptors we go to refresh mbufs */
2043 if (processed == 8) {
2044 ixgbe_refresh_mbufs(rxr, i);
2045 processed = 0;
2046 }
2047 }
2048
2049 /* Refresh any remaining buf structs */
2050 if (ixgbe_rx_unrefreshed(rxr))
2051 ixgbe_refresh_mbufs(rxr, i);
2052
2053 rxr->next_to_check = i;
2054
2055 #ifdef LRO
2056 /*
2057 * Flush any outstanding LRO work
2058 */
2059 tcp_lro_flush_all(lro);
2060 #endif /* LRO */
2061
2062 IXGBE_RX_UNLOCK(rxr);
2063
2064 /*
2065 ** Still have cleaning to do?
2066 */
2067 if ((staterr & IXGBE_RXD_STAT_DD) != 0)
2068 return true;
2069 else
2070 return false;
2071 }
2072
2073
2074 /*********************************************************************
2075 *
2076 * Verify that the hardware indicated that the checksum is valid.
2077 * Inform the stack about the status of checksum so that stack
2078 * doesn't spend time verifying the checksum.
2079 *
2080 *********************************************************************/
2081 static void
2082 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype,
2083 struct ixgbe_hw_stats *stats)
2084 {
2085 u16 status = (u16) staterr;
2086 u8 errors = (u8) (staterr >> 24);
2087 #if 0
2088 bool sctp = false;
2089
2090 if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
2091 (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
2092 sctp = true;
2093 #endif
2094
2095 /* IPv4 checksum */
2096 if (status & IXGBE_RXD_STAT_IPCS) {
2097 stats->ipcs.ev_count++;
2098 if (!(errors & IXGBE_RXD_ERR_IPE)) {
2099 /* IP Checksum Good */
2100 mp->m_pkthdr.csum_flags = M_CSUM_IPv4;
2101 } else {
2102 stats->ipcs_bad.ev_count++;
2103 mp->m_pkthdr.csum_flags = M_CSUM_IPv4|M_CSUM_IPv4_BAD;
2104 }
2105 }
2106 /* TCP/UDP/SCTP checksum */
2107 if (status & IXGBE_RXD_STAT_L4CS) {
2108 stats->l4cs.ev_count++;
2109 int type = M_CSUM_TCPv4|M_CSUM_TCPv6|M_CSUM_UDPv4|M_CSUM_UDPv6;
2110 if (!(errors & IXGBE_RXD_ERR_TCPE)) {
2111 mp->m_pkthdr.csum_flags |= type;
2112 } else {
2113 stats->l4cs_bad.ev_count++;
2114 mp->m_pkthdr.csum_flags |= type | M_CSUM_TCP_UDP_BAD;
2115 }
2116 }
2117 }
2118
2119
2120 /********************************************************************
2121 * Manage DMA'able memory.
2122 *******************************************************************/
2123
2124 int
2125 ixgbe_dma_malloc(struct adapter *adapter, const bus_size_t size,
2126 struct ixgbe_dma_alloc *dma, const int mapflags)
2127 {
2128 device_t dev = adapter->dev;
2129 int r, rsegs;
2130
2131 r = ixgbe_dma_tag_create(adapter->osdep.dmat, /* parent */
2132 DBA_ALIGN, 0, /* alignment, bounds */
2133 size, /* maxsize */
2134 1, /* nsegments */
2135 size, /* maxsegsize */
2136 BUS_DMA_ALLOCNOW, /* flags */
2137 &dma->dma_tag);
2138 if (r != 0) {
2139 aprint_error_dev(dev,
2140 "%s: ixgbe_dma_tag_create failed; error %d\n", __func__, r);
2141 goto fail_0;
2142 }
2143
2144 r = bus_dmamem_alloc(dma->dma_tag->dt_dmat,
2145 size,
2146 dma->dma_tag->dt_alignment,
2147 dma->dma_tag->dt_boundary,
2148 &dma->dma_seg, 1, &rsegs, BUS_DMA_NOWAIT);
2149 if (r != 0) {
2150 aprint_error_dev(dev,
2151 "%s: bus_dmamem_alloc failed; error %d\n", __func__, r);
2152 goto fail_1;
2153 }
2154
2155 r = bus_dmamem_map(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs,
2156 size, &dma->dma_vaddr, BUS_DMA_NOWAIT);
2157 if (r != 0) {
2158 aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
2159 __func__, r);
2160 goto fail_2;
2161 }
2162
2163 r = ixgbe_dmamap_create(dma->dma_tag, 0, &dma->dma_map);
2164 if (r != 0) {
2165 aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
2166 __func__, r);
2167 goto fail_3;
2168 }
2169
2170 r = bus_dmamap_load(dma->dma_tag->dt_dmat, dma->dma_map, dma->dma_vaddr,
2171 size,
2172 NULL,
2173 mapflags | BUS_DMA_NOWAIT);
2174 if (r != 0) {
2175 aprint_error_dev(dev, "%s: bus_dmamap_load failed; error %d\n",
2176 __func__, r);
2177 goto fail_4;
2178 }
2179 dma->dma_paddr = dma->dma_map->dm_segs[0].ds_addr;
2180 dma->dma_size = size;
2181 return 0;
2182 fail_4:
2183 ixgbe_dmamap_destroy(dma->dma_tag, dma->dma_map);
2184 fail_3:
2185 bus_dmamem_unmap(dma->dma_tag->dt_dmat, dma->dma_vaddr, size);
2186 fail_2:
2187 bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs);
2188 fail_1:
2189 ixgbe_dma_tag_destroy(dma->dma_tag);
2190 fail_0:
2191 return r;
2192 }
2193
2194 void
2195 ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma)
2196 {
2197 bus_dmamap_sync(dma->dma_tag->dt_dmat, dma->dma_map, 0, dma->dma_size,
2198 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2199 ixgbe_dmamap_unload(dma->dma_tag, dma->dma_map);
2200 bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, 1);
2201 ixgbe_dma_tag_destroy(dma->dma_tag);
2202 }
2203
2204
2205 /*********************************************************************
2206 *
2207 * Allocate memory for the transmit and receive rings, and then
2208 * the descriptors associated with each, called only once at attach.
2209 *
2210 **********************************************************************/
2211 int
2212 ixgbe_allocate_queues(struct adapter *adapter)
2213 {
2214 device_t dev = adapter->dev;
2215 struct ix_queue *que;
2216 struct tx_ring *txr;
2217 struct rx_ring *rxr;
2218 int rsize, tsize, error = IXGBE_SUCCESS;
2219 int txconf = 0, rxconf = 0;
2220 #ifdef PCI_IOV
2221 enum ixgbe_iov_mode iov_mode;
2222 #endif
2223
2224 /* First allocate the top level queue structs */
2225 if (!(adapter->queues =
2226 (struct ix_queue *) malloc(sizeof(struct ix_queue) *
2227 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2228 aprint_error_dev(dev, "Unable to allocate queue memory\n");
2229 error = ENOMEM;
2230 goto fail;
2231 }
2232
2233 /* First allocate the TX ring struct memory */
2234 if (!(adapter->tx_rings =
2235 (struct tx_ring *) malloc(sizeof(struct tx_ring) *
2236 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2237 aprint_error_dev(dev, "Unable to allocate TX ring memory\n");
2238 error = ENOMEM;
2239 goto tx_fail;
2240 }
2241
2242 /* Next allocate the RX */
2243 if (!(adapter->rx_rings =
2244 (struct rx_ring *) malloc(sizeof(struct rx_ring) *
2245 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2246 aprint_error_dev(dev, "Unable to allocate RX ring memory\n");
2247 error = ENOMEM;
2248 goto rx_fail;
2249 }
2250
2251 /* For the ring itself */
2252 tsize = roundup2(adapter->num_tx_desc *
2253 sizeof(union ixgbe_adv_tx_desc), DBA_ALIGN);
2254
2255 #ifdef PCI_IOV
2256 iov_mode = ixgbe_get_iov_mode(adapter);
2257 adapter->pool = ixgbe_max_vfs(iov_mode);
2258 #else
2259 adapter->pool = 0;
2260 #endif
2261 /*
2262 * Now set up the TX queues, txconf is needed to handle the
2263 * possibility that things fail midcourse and we need to
2264 * undo memory gracefully
2265 */
2266 for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2267 /* Set up some basics */
2268 txr = &adapter->tx_rings[i];
2269 txr->adapter = adapter;
2270 #ifdef PCI_IOV
2271 txr->me = ixgbe_pf_que_index(iov_mode, i);
2272 #else
2273 txr->me = i;
2274 #endif
2275 txr->num_desc = adapter->num_tx_desc;
2276
2277 /* Initialize the TX side lock */
2278 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
2279 device_xname(dev), txr->me);
2280 mutex_init(&txr->tx_mtx, MUTEX_DEFAULT, IPL_NET);
2281
2282 if (ixgbe_dma_malloc(adapter, tsize,
2283 &txr->txdma, BUS_DMA_NOWAIT)) {
2284 aprint_error_dev(dev,
2285 "Unable to allocate TX Descriptor memory\n");
2286 error = ENOMEM;
2287 goto err_tx_desc;
2288 }
2289 txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
2290 bzero((void *)txr->tx_base, tsize);
2291
2292 /* Now allocate transmit buffers for the ring */
2293 if (ixgbe_allocate_transmit_buffers(txr)) {
2294 aprint_error_dev(dev,
2295 "Critical Failure setting up transmit buffers\n");
2296 error = ENOMEM;
2297 goto err_tx_desc;
2298 }
2299 #ifndef IXGBE_LEGACY_TX
2300 /* Allocate a buf ring */
2301 txr->txr_interq = pcq_create(IXGBE_BR_SIZE, KM_SLEEP);
2302 if (txr->txr_interq == NULL) {
2303 aprint_error_dev(dev,
2304 "Critical Failure setting up buf ring\n");
2305 error = ENOMEM;
2306 goto err_tx_desc;
2307 }
2308 #endif
2309 }
2310
2311 /*
2312 * Next the RX queues...
2313 */
2314 rsize = roundup2(adapter->num_rx_desc *
2315 sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
2316 for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2317 rxr = &adapter->rx_rings[i];
2318 /* Set up some basics */
2319 rxr->adapter = adapter;
2320 #ifdef PCI_IOV
2321 rxr->me = ixgbe_pf_que_index(iov_mode, i);
2322 #else
2323 rxr->me = i;
2324 #endif
2325 rxr->num_desc = adapter->num_rx_desc;
2326
2327 /* Initialize the RX side lock */
2328 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
2329 device_xname(dev), rxr->me);
2330 mutex_init(&rxr->rx_mtx, MUTEX_DEFAULT, IPL_NET);
2331
2332 if (ixgbe_dma_malloc(adapter, rsize,
2333 &rxr->rxdma, BUS_DMA_NOWAIT)) {
2334 aprint_error_dev(dev,
2335 "Unable to allocate RxDescriptor memory\n");
2336 error = ENOMEM;
2337 goto err_rx_desc;
2338 }
2339 rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2340 bzero((void *)rxr->rx_base, rsize);
2341
2342 /* Allocate receive buffers for the ring*/
2343 if (ixgbe_allocate_receive_buffers(rxr)) {
2344 aprint_error_dev(dev,
2345 "Critical Failure setting up receive buffers\n");
2346 error = ENOMEM;
2347 goto err_rx_desc;
2348 }
2349 }
2350
2351 /*
2352 ** Finally set up the queue holding structs
2353 */
2354 for (int i = 0; i < adapter->num_queues; i++) {
2355 que = &adapter->queues[i];
2356 que->adapter = adapter;
2357 que->me = i;
2358 que->txr = &adapter->tx_rings[i];
2359 que->rxr = &adapter->rx_rings[i];
2360 }
2361
2362 return (0);
2363
2364 err_rx_desc:
2365 for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2366 ixgbe_dma_free(adapter, &rxr->rxdma);
2367 err_tx_desc:
2368 for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2369 ixgbe_dma_free(adapter, &txr->txdma);
2370 free(adapter->rx_rings, M_DEVBUF);
2371 rx_fail:
2372 free(adapter->tx_rings, M_DEVBUF);
2373 tx_fail:
2374 free(adapter->queues, M_DEVBUF);
2375 fail:
2376 return (error);
2377 }
2378