ix_txrx.c revision 1.9 1 /******************************************************************************
2
3 Copyright (c) 2001-2015, Intel Corporation
4 All rights reserved.
5
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
8
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
11
12 2. Redistributions in binary form must reproduce the above copyright
13 notice, this list of conditions and the following disclaimer in the
14 documentation and/or other materials provided with the distribution.
15
16 3. Neither the name of the Intel Corporation nor the names of its
17 contributors may be used to endorse or promote products derived from
18 this software without specific prior written permission.
19
20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*
34 * Copyright (c) 2011 The NetBSD Foundation, Inc.
35 * All rights reserved.
36 *
37 * This code is derived from software contributed to The NetBSD Foundation
38 * by Coyote Point Systems, Inc.
39 *
40 * Redistribution and use in source and binary forms, with or without
41 * modification, are permitted provided that the following conditions
42 * are met:
43 * 1. Redistributions of source code must retain the above copyright
44 * notice, this list of conditions and the following disclaimer.
45 * 2. Redistributions in binary form must reproduce the above copyright
46 * notice, this list of conditions and the following disclaimer in the
47 * documentation and/or other materials provided with the distribution.
48 *
49 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
50 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
51 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
52 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
53 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
54 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
55 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
56 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
57 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
58 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
59 * POSSIBILITY OF SUCH DAMAGE.
60 */
61 /*$FreeBSD: head/sys/dev/ixgbe/ix_txrx.c 292751 2015-12-26 17:27:48Z bz $*/
62 /*$NetBSD: ix_txrx.c,v 1.9 2016/12/02 12:14:37 msaitoh Exp $*/
63
64 #include "opt_inet.h"
65 #include "opt_inet6.h"
66
67 #include "ixgbe.h"
68
69 #ifdef DEV_NETMAP
70 #include <net/netmap.h>
71 #include <sys/selinfo.h>
72 #include <dev/netmap/netmap_kern.h>
73
74 extern int ix_crcstrip;
75 #endif
76
77 /*
78 ** HW RSC control:
79 ** this feature only works with
80 ** IPv4, and only on 82599 and later.
81 ** Also this will cause IP forwarding to
82 ** fail and that can't be controlled by
83 ** the stack as LRO can. For all these
84 ** reasons I've deemed it best to leave
85 ** this off and not bother with a tuneable
86 ** interface, this would need to be compiled
87 ** to enable.
88 */
89 static bool ixgbe_rsc_enable = FALSE;
90
91 #ifdef IXGBE_FDIR
92 /*
93 ** For Flow Director: this is the
94 ** number of TX packets we sample
95 ** for the filter pool, this means
96 ** every 20th packet will be probed.
97 **
98 ** This feature can be disabled by
99 ** setting this to 0.
100 */
101 static int atr_sample_rate = 20;
102 #endif
103
104 /*********************************************************************
105 * Local Function prototypes
106 *********************************************************************/
107 static void ixgbe_setup_transmit_ring(struct tx_ring *);
108 static void ixgbe_free_transmit_buffers(struct tx_ring *);
109 static int ixgbe_setup_receive_ring(struct rx_ring *);
110 static void ixgbe_free_receive_buffers(struct rx_ring *);
111
112 static void ixgbe_rx_checksum(u32, struct mbuf *, u32,
113 struct ixgbe_hw_stats *);
114 static void ixgbe_refresh_mbufs(struct rx_ring *, int);
115 static int ixgbe_xmit(struct tx_ring *, struct mbuf *);
116 static int ixgbe_tx_ctx_setup(struct tx_ring *,
117 struct mbuf *, u32 *, u32 *);
118 static int ixgbe_tso_setup(struct tx_ring *,
119 struct mbuf *, u32 *, u32 *);
120 #ifdef IXGBE_FDIR
121 static void ixgbe_atr(struct tx_ring *, struct mbuf *);
122 #endif
123 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
124 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
125 struct mbuf *, u32);
126
127 static void ixgbe_setup_hw_rsc(struct rx_ring *);
128
129 #ifdef IXGBE_LEGACY_TX
130 /*********************************************************************
131 * Transmit entry point
132 *
133 * ixgbe_start is called by the stack to initiate a transmit.
134 * The driver will remain in this routine as long as there are
135 * packets to transmit and transmit resources are available.
136 * In case resources are not available stack is notified and
137 * the packet is requeued.
138 **********************************************************************/
139
140 void
141 ixgbe_start_locked(struct tx_ring *txr, struct ifnet * ifp)
142 {
143 int rc;
144 struct mbuf *m_head;
145 struct adapter *adapter = txr->adapter;
146
147 IXGBE_TX_LOCK_ASSERT(txr);
148
149 if ((ifp->if_flags & IFF_RUNNING) == 0)
150 return;
151 if (!adapter->link_active)
152 return;
153
154 while (!IFQ_IS_EMPTY(&ifp->if_snd)) {
155 if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
156 break;
157
158 IFQ_POLL(&ifp->if_snd, m_head);
159 if (m_head == NULL)
160 break;
161
162 if ((rc = ixgbe_xmit(txr, m_head)) == EAGAIN) {
163 break;
164 }
165 IFQ_DEQUEUE(&ifp->if_snd, m_head);
166 if (rc == EFBIG) {
167 struct mbuf *mtmp;
168
169 if ((mtmp = m_defrag(m_head, M_NOWAIT)) != NULL) {
170 m_head = mtmp;
171 rc = ixgbe_xmit(txr, m_head);
172 if (rc != 0)
173 adapter->efbig2_tx_dma_setup.ev_count++;
174 } else
175 adapter->m_defrag_failed.ev_count++;
176 }
177 if (rc != 0) {
178 m_freem(m_head);
179 continue;
180 }
181
182 /* Send a copy of the frame to the BPF listener */
183 bpf_mtap(ifp, m_head);
184 }
185 return;
186 }
187
188 /*
189 * Legacy TX start - called by the stack, this
190 * always uses the first tx ring, and should
191 * not be used with multiqueue tx enabled.
192 */
193 void
194 ixgbe_start(struct ifnet *ifp)
195 {
196 struct adapter *adapter = ifp->if_softc;
197 struct tx_ring *txr = adapter->tx_rings;
198
199 if (ifp->if_flags & IFF_RUNNING) {
200 IXGBE_TX_LOCK(txr);
201 ixgbe_start_locked(txr, ifp);
202 IXGBE_TX_UNLOCK(txr);
203 }
204 return;
205 }
206
207 #else /* ! IXGBE_LEGACY_TX */
208
209 /*
210 ** Multiqueue Transmit Entry Point
211 ** (if_transmit function)
212 */
213 int
214 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
215 {
216 struct adapter *adapter = ifp->if_softc;
217 struct ix_queue *que;
218 struct tx_ring *txr;
219 int i, err = 0;
220 #ifdef RSS
221 uint32_t bucket_id;
222 #endif
223
224 /*
225 * When doing RSS, map it to the same outbound queue
226 * as the incoming flow would be mapped to.
227 *
228 * If everything is setup correctly, it should be the
229 * same bucket that the current CPU we're on is.
230 */
231 #if __FreeBSD_version < 1100054
232 if (m->m_flags & M_FLOWID) {
233 #else
234 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
235 #endif
236 #ifdef RSS
237 if (rss_hash2bucket(m->m_pkthdr.flowid,
238 M_HASHTYPE_GET(m), &bucket_id) == 0) {
239 /* TODO: spit out something if bucket_id > num_queues? */
240 i = bucket_id % adapter->num_queues;
241 #ifdef IXGBE_DEBUG
242 if (bucket_id > adapter->num_queues)
243 if_printf(ifp, "bucket_id (%d) > num_queues "
244 "(%d)\n", bucket_id, adapter->num_queues);
245 #endif
246 } else
247 #endif
248 i = m->m_pkthdr.flowid % adapter->num_queues;
249 } else
250 i = curcpu % adapter->num_queues;
251
252 /* Check for a hung queue and pick alternative */
253 if (((1 << i) & adapter->active_queues) == 0)
254 i = ffsl(adapter->active_queues);
255
256 txr = &adapter->tx_rings[i];
257 que = &adapter->queues[i];
258
259 err = drbr_enqueue(ifp, txr->br, m);
260 if (err)
261 return (err);
262 if (IXGBE_TX_TRYLOCK(txr)) {
263 ixgbe_mq_start_locked(ifp, txr);
264 IXGBE_TX_UNLOCK(txr);
265 } else
266 softint_schedule(txr->txq_si);
267
268 return (0);
269 }
270
271 int
272 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
273 {
274 struct adapter *adapter = txr->adapter;
275 struct mbuf *next;
276 int enqueued = 0, err = 0;
277
278 if (((ifp->if_flags & IFF_RUNNING) == 0) ||
279 adapter->link_active == 0)
280 return (ENETDOWN);
281
282 /* Process the queue */
283 #if __FreeBSD_version < 901504
284 next = drbr_dequeue(ifp, txr->br);
285 while (next != NULL) {
286 if ((err = ixgbe_xmit(txr, &next)) != 0) {
287 if (next != NULL)
288 err = drbr_enqueue(ifp, txr->br, next);
289 #else
290 while ((next = drbr_peek(ifp, txr->br)) != NULL) {
291 if ((err = ixgbe_xmit(txr, &next)) != 0) {
292 if (next == NULL) {
293 drbr_advance(ifp, txr->br);
294 } else {
295 drbr_putback(ifp, txr->br, next);
296 }
297 #endif
298 break;
299 }
300 #if __FreeBSD_version >= 901504
301 drbr_advance(ifp, txr->br);
302 #endif
303 enqueued++;
304 #if 0 // this is VF-only
305 #if __FreeBSD_version >= 1100036
306 /*
307 * Since we're looking at the tx ring, we can check
308 * to see if we're a VF by examing our tail register
309 * address.
310 */
311 if (txr->tail < IXGBE_TDT(0) && next->m_flags & M_MCAST)
312 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
313 #endif
314 #endif
315 /* Send a copy of the frame to the BPF listener */
316 bpf_mtap(ifp, next);
317 if ((ifp->if_flags & IFF_RUNNING) == 0)
318 break;
319 #if __FreeBSD_version < 901504
320 next = drbr_dequeue(ifp, txr->br);
321 #endif
322 }
323
324 if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD)
325 ixgbe_txeof(txr);
326
327 return (err);
328 }
329
330 /*
331 * Called from a taskqueue to drain queued transmit packets.
332 */
333 void
334 ixgbe_deferred_mq_start(void *arg, int pending)
335 {
336 struct tx_ring *txr = arg;
337 struct adapter *adapter = txr->adapter;
338 struct ifnet *ifp = adapter->ifp;
339
340 IXGBE_TX_LOCK(txr);
341 if (!drbr_empty(ifp, txr->br))
342 ixgbe_mq_start_locked(ifp, txr);
343 IXGBE_TX_UNLOCK(txr);
344 }
345
346 /*
347 * Flush all ring buffers
348 */
349 void
350 ixgbe_qflush(struct ifnet *ifp)
351 {
352 struct adapter *adapter = ifp->if_softc;
353 struct tx_ring *txr = adapter->tx_rings;
354 struct mbuf *m;
355
356 for (int i = 0; i < adapter->num_queues; i++, txr++) {
357 IXGBE_TX_LOCK(txr);
358 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
359 m_freem(m);
360 IXGBE_TX_UNLOCK(txr);
361 }
362 if_qflush(ifp);
363 }
364 #endif /* IXGBE_LEGACY_TX */
365
366
367 /*********************************************************************
368 *
369 * This routine maps the mbufs to tx descriptors, allowing the
370 * TX engine to transmit the packets.
371 * - return 0 on success, positive on failure
372 *
373 **********************************************************************/
374
375 static int
376 ixgbe_xmit(struct tx_ring *txr, struct mbuf *m_head)
377 {
378 struct m_tag *mtag;
379 struct adapter *adapter = txr->adapter;
380 struct ethercom *ec = &adapter->osdep.ec;
381 u32 olinfo_status = 0, cmd_type_len;
382 int i, j, error;
383 int first;
384 bus_dmamap_t map;
385 struct ixgbe_tx_buf *txbuf;
386 union ixgbe_adv_tx_desc *txd = NULL;
387
388 /* Basic descriptor defines */
389 cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
390 IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
391
392 if ((mtag = VLAN_OUTPUT_TAG(ec, m_head)) != NULL)
393 cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
394
395 /*
396 * Important to capture the first descriptor
397 * used because it will contain the index of
398 * the one we tell the hardware to report back
399 */
400 first = txr->next_avail_desc;
401 txbuf = &txr->tx_buffers[first];
402 map = txbuf->map;
403
404 /*
405 * Map the packet for DMA.
406 */
407 error = bus_dmamap_load_mbuf(txr->txtag->dt_dmat, map,
408 m_head, BUS_DMA_NOWAIT);
409
410 if (__predict_false(error)) {
411
412 switch (error) {
413 case EAGAIN:
414 adapter->eagain_tx_dma_setup.ev_count++;
415 return EAGAIN;
416 case ENOMEM:
417 adapter->enomem_tx_dma_setup.ev_count++;
418 return EAGAIN;
419 case EFBIG:
420 /*
421 * XXX Try it again?
422 * do m_defrag() and retry bus_dmamap_load_mbuf().
423 */
424 adapter->efbig_tx_dma_setup.ev_count++;
425 return error;
426 case EINVAL:
427 adapter->einval_tx_dma_setup.ev_count++;
428 return error;
429 default:
430 adapter->other_tx_dma_setup.ev_count++;
431 return error;
432 }
433 }
434
435 /* Make certain there are enough descriptors */
436 if (map->dm_nsegs > txr->tx_avail - 2) {
437 txr->no_desc_avail.ev_count++;
438 ixgbe_dmamap_unload(txr->txtag, txbuf->map);
439 return EAGAIN;
440 }
441
442 /*
443 * Set up the appropriate offload context
444 * this will consume the first descriptor
445 */
446 error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
447 if (__predict_false(error)) {
448 return (error);
449 }
450
451 #ifdef IXGBE_FDIR
452 /* Do the flow director magic */
453 if ((txr->atr_sample) && (!adapter->fdir_reinit)) {
454 ++txr->atr_count;
455 if (txr->atr_count >= atr_sample_rate) {
456 ixgbe_atr(txr, m_head);
457 txr->atr_count = 0;
458 }
459 }
460 #endif
461
462 olinfo_status |= IXGBE_ADVTXD_CC;
463 i = txr->next_avail_desc;
464 for (j = 0; j < map->dm_nsegs; j++) {
465 bus_size_t seglen;
466 bus_addr_t segaddr;
467
468 txbuf = &txr->tx_buffers[i];
469 txd = &txr->tx_base[i];
470 seglen = map->dm_segs[j].ds_len;
471 segaddr = htole64(map->dm_segs[j].ds_addr);
472
473 txd->read.buffer_addr = segaddr;
474 txd->read.cmd_type_len = htole32(txr->txd_cmd |
475 cmd_type_len |seglen);
476 txd->read.olinfo_status = htole32(olinfo_status);
477
478 if (++i == txr->num_desc)
479 i = 0;
480 }
481
482 txd->read.cmd_type_len |=
483 htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
484 txr->tx_avail -= map->dm_nsegs;
485 txr->next_avail_desc = i;
486
487 txbuf->m_head = m_head;
488 /*
489 * Here we swap the map so the last descriptor,
490 * which gets the completion interrupt has the
491 * real map, and the first descriptor gets the
492 * unused map from this descriptor.
493 */
494 txr->tx_buffers[first].map = txbuf->map;
495 txbuf->map = map;
496 bus_dmamap_sync(txr->txtag->dt_dmat, map, 0, m_head->m_pkthdr.len,
497 BUS_DMASYNC_PREWRITE);
498
499 /* Set the EOP descriptor that will be marked done */
500 txbuf = &txr->tx_buffers[first];
501 txbuf->eop = txd;
502
503 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
504 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
505 /*
506 * Advance the Transmit Descriptor Tail (Tdt), this tells the
507 * hardware that this frame is available to transmit.
508 */
509 ++txr->total_packets.ev_count;
510 IXGBE_WRITE_REG(&adapter->hw, txr->tail, i);
511
512 /* Mark queue as having work */
513 if (txr->busy == 0)
514 txr->busy = 1;
515
516 return 0;
517 }
518
519 /*********************************************************************
520 *
521 * Allocate memory for tx_buffer structures. The tx_buffer stores all
522 * the information needed to transmit a packet on the wire. This is
523 * called only once at attach, setup is done every reset.
524 *
525 **********************************************************************/
526 int
527 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
528 {
529 struct adapter *adapter = txr->adapter;
530 device_t dev = adapter->dev;
531 struct ixgbe_tx_buf *txbuf;
532 int error, i;
533
534 /*
535 * Setup DMA descriptor areas.
536 */
537 if ((error = ixgbe_dma_tag_create(adapter->osdep.dmat, /* parent */
538 1, 0, /* alignment, bounds */
539 IXGBE_TSO_SIZE, /* maxsize */
540 adapter->num_segs, /* nsegments */
541 PAGE_SIZE, /* maxsegsize */
542 0, /* flags */
543 &txr->txtag))) {
544 aprint_error_dev(dev,"Unable to allocate TX DMA tag\n");
545 goto fail;
546 }
547
548 if (!(txr->tx_buffers =
549 (struct ixgbe_tx_buf *) malloc(sizeof(struct ixgbe_tx_buf) *
550 adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
551 aprint_error_dev(dev, "Unable to allocate tx_buffer memory\n");
552 error = ENOMEM;
553 goto fail;
554 }
555
556 /* Create the descriptor buffer dma maps */
557 txbuf = txr->tx_buffers;
558 for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
559 error = ixgbe_dmamap_create(txr->txtag, 0, &txbuf->map);
560 if (error != 0) {
561 aprint_error_dev(dev,
562 "Unable to create TX DMA map (%d)\n", error);
563 goto fail;
564 }
565 }
566
567 return 0;
568 fail:
569 /* We free all, it handles case where we are in the middle */
570 ixgbe_free_transmit_structures(adapter);
571 return (error);
572 }
573
574 /*********************************************************************
575 *
576 * Initialize a transmit ring.
577 *
578 **********************************************************************/
579 static void
580 ixgbe_setup_transmit_ring(struct tx_ring *txr)
581 {
582 struct adapter *adapter = txr->adapter;
583 struct ixgbe_tx_buf *txbuf;
584 #ifdef DEV_NETMAP
585 struct netmap_adapter *na = NA(adapter->ifp);
586 struct netmap_slot *slot;
587 #endif /* DEV_NETMAP */
588
589 /* Clear the old ring contents */
590 IXGBE_TX_LOCK(txr);
591 #ifdef DEV_NETMAP
592 /*
593 * (under lock): if in netmap mode, do some consistency
594 * checks and set slot to entry 0 of the netmap ring.
595 */
596 slot = netmap_reset(na, NR_TX, txr->me, 0);
597 #endif /* DEV_NETMAP */
598 bzero((void *)txr->tx_base,
599 (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
600 /* Reset indices */
601 txr->next_avail_desc = 0;
602 txr->next_to_clean = 0;
603
604 /* Free any existing tx buffers. */
605 txbuf = txr->tx_buffers;
606 for (int i = 0; i < txr->num_desc; i++, txbuf++) {
607 if (txbuf->m_head != NULL) {
608 bus_dmamap_sync(txr->txtag->dt_dmat, txbuf->map,
609 0, txbuf->m_head->m_pkthdr.len,
610 BUS_DMASYNC_POSTWRITE);
611 ixgbe_dmamap_unload(txr->txtag, txbuf->map);
612 m_freem(txbuf->m_head);
613 txbuf->m_head = NULL;
614 }
615 #ifdef DEV_NETMAP
616 /*
617 * In netmap mode, set the map for the packet buffer.
618 * NOTE: Some drivers (not this one) also need to set
619 * the physical buffer address in the NIC ring.
620 * Slots in the netmap ring (indexed by "si") are
621 * kring->nkr_hwofs positions "ahead" wrt the
622 * corresponding slot in the NIC ring. In some drivers
623 * (not here) nkr_hwofs can be negative. Function
624 * netmap_idx_n2k() handles wraparounds properly.
625 */
626 if (slot) {
627 int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
628 netmap_load_map(na, txr->txtag,
629 txbuf->map, NMB(na, slot + si));
630 }
631 #endif /* DEV_NETMAP */
632 /* Clear the EOP descriptor pointer */
633 txbuf->eop = NULL;
634 }
635
636 #ifdef IXGBE_FDIR
637 /* Set the rate at which we sample packets */
638 if (adapter->hw.mac.type != ixgbe_mac_82598EB)
639 txr->atr_sample = atr_sample_rate;
640 #endif
641
642 /* Set number of descriptors available */
643 txr->tx_avail = adapter->num_tx_desc;
644
645 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
646 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
647 IXGBE_TX_UNLOCK(txr);
648 }
649
650 /*********************************************************************
651 *
652 * Initialize all transmit rings.
653 *
654 **********************************************************************/
655 int
656 ixgbe_setup_transmit_structures(struct adapter *adapter)
657 {
658 struct tx_ring *txr = adapter->tx_rings;
659
660 for (int i = 0; i < adapter->num_queues; i++, txr++)
661 ixgbe_setup_transmit_ring(txr);
662
663 return (0);
664 }
665
666 /*********************************************************************
667 *
668 * Free all transmit rings.
669 *
670 **********************************************************************/
671 void
672 ixgbe_free_transmit_structures(struct adapter *adapter)
673 {
674 struct tx_ring *txr = adapter->tx_rings;
675
676 for (int i = 0; i < adapter->num_queues; i++, txr++) {
677 ixgbe_free_transmit_buffers(txr);
678 ixgbe_dma_free(adapter, &txr->txdma);
679 IXGBE_TX_LOCK_DESTROY(txr);
680 }
681 free(adapter->tx_rings, M_DEVBUF);
682 }
683
684 /*********************************************************************
685 *
686 * Free transmit ring related data structures.
687 *
688 **********************************************************************/
689 static void
690 ixgbe_free_transmit_buffers(struct tx_ring *txr)
691 {
692 struct adapter *adapter = txr->adapter;
693 struct ixgbe_tx_buf *tx_buffer;
694 int i;
695
696 INIT_DEBUGOUT("ixgbe_free_transmit_ring: begin");
697
698 if (txr->tx_buffers == NULL)
699 return;
700
701 tx_buffer = txr->tx_buffers;
702 for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
703 if (tx_buffer->m_head != NULL) {
704 bus_dmamap_sync(txr->txtag->dt_dmat, tx_buffer->map,
705 0, tx_buffer->m_head->m_pkthdr.len,
706 BUS_DMASYNC_POSTWRITE);
707 ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
708 m_freem(tx_buffer->m_head);
709 tx_buffer->m_head = NULL;
710 if (tx_buffer->map != NULL) {
711 ixgbe_dmamap_destroy(txr->txtag,
712 tx_buffer->map);
713 tx_buffer->map = NULL;
714 }
715 } else if (tx_buffer->map != NULL) {
716 ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
717 ixgbe_dmamap_destroy(txr->txtag, tx_buffer->map);
718 tx_buffer->map = NULL;
719 }
720 }
721 #ifndef IXGBE_LEGACY_TX
722 if (txr->br != NULL)
723 buf_ring_free(txr->br, M_DEVBUF);
724 #endif
725 if (txr->tx_buffers != NULL) {
726 free(txr->tx_buffers, M_DEVBUF);
727 txr->tx_buffers = NULL;
728 }
729 if (txr->txtag != NULL) {
730 ixgbe_dma_tag_destroy(txr->txtag);
731 txr->txtag = NULL;
732 }
733 return;
734 }
735
736 /*********************************************************************
737 *
738 * Advanced Context Descriptor setup for VLAN, CSUM or TSO
739 *
740 **********************************************************************/
741
742 static int
743 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
744 u32 *cmd_type_len, u32 *olinfo_status)
745 {
746 struct adapter *adapter = txr->adapter;
747 struct ethercom *ec = &adapter->osdep.ec;
748 struct m_tag *mtag;
749 struct ixgbe_adv_tx_context_desc *TXD;
750 struct ether_vlan_header *eh;
751 #ifdef INET
752 struct ip *ip;
753 #endif
754 #ifdef INET6
755 struct ip6_hdr *ip6;
756 #endif
757 u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
758 int ehdrlen, ip_hlen = 0;
759 u16 etype;
760 u8 ipproto = 0;
761 int offload = TRUE;
762 int ctxd = txr->next_avail_desc;
763 u16 vtag = 0;
764 char *l3d;
765
766
767 /* First check if TSO is to be used */
768 if (mp->m_pkthdr.csum_flags & (M_CSUM_TSOv4|M_CSUM_TSOv6))
769 return (ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status));
770
771 if ((mp->m_pkthdr.csum_flags & M_CSUM_OFFLOAD) == 0)
772 offload = FALSE;
773
774 /* Indicate the whole packet as payload when not doing TSO */
775 *olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
776
777 /* Now ready a context descriptor */
778 TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
779
780 /*
781 ** In advanced descriptors the vlan tag must
782 ** be placed into the context descriptor. Hence
783 ** we need to make one even if not doing offloads.
784 */
785 if ((mtag = VLAN_OUTPUT_TAG(ec, mp)) != NULL) {
786 vtag = htole16(VLAN_TAG_VALUE(mtag) & 0xffff);
787 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
788 } else if (!IXGBE_IS_X550VF(adapter) && (offload == FALSE))
789 return (0);
790
791 /*
792 * Determine where frame payload starts.
793 * Jump over vlan headers if already present,
794 * helpful for QinQ too.
795 */
796 KASSERT(mp->m_len >= offsetof(struct ether_vlan_header, evl_tag));
797 eh = mtod(mp, struct ether_vlan_header *);
798 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
799 KASSERT(mp->m_len >= sizeof(struct ether_vlan_header));
800 etype = ntohs(eh->evl_proto);
801 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
802 } else {
803 etype = ntohs(eh->evl_encap_proto);
804 ehdrlen = ETHER_HDR_LEN;
805 }
806
807 /* Set the ether header length */
808 vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
809
810 if (offload == FALSE)
811 goto no_offloads;
812
813 /*
814 * If the first mbuf only includes the ethernet header, jump to the next one
815 * XXX: This assumes the stack splits mbufs containing headers on header boundaries
816 * XXX: And assumes the entire IP header is contained in one mbuf
817 */
818 if (mp->m_len == ehdrlen && mp->m_next)
819 l3d = mtod(mp->m_next, char *);
820 else
821 l3d = mtod(mp, char *) + ehdrlen;
822
823 switch (etype) {
824 #ifdef INET
825 case ETHERTYPE_IP:
826 ip = (struct ip *)(l3d);
827 ip_hlen = ip->ip_hl << 2;
828 ipproto = ip->ip_p;
829 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
830 KASSERT((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) == 0 ||
831 ip->ip_sum == 0);
832 break;
833 #endif
834 #ifdef INET6
835 case ETHERTYPE_IPV6:
836 ip6 = (struct ip6_hdr *)(l3d);
837 ip_hlen = sizeof(struct ip6_hdr);
838 ipproto = ip6->ip6_nxt;
839 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
840 break;
841 #endif
842 default:
843 break;
844 }
845
846 if ((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) != 0)
847 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
848
849 vlan_macip_lens |= ip_hlen;
850
851 /* No support for offloads for non-L4 next headers */
852 switch (ipproto) {
853 case IPPROTO_TCP:
854 if (mp->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_TCPv6))
855
856 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
857 else
858 offload = false;
859 break;
860 case IPPROTO_UDP:
861 if (mp->m_pkthdr.csum_flags & (M_CSUM_UDPv4|M_CSUM_UDPv6))
862 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
863 else
864 offload = false;
865 break;
866 }
867
868 if (offload) /* Insert L4 checksum into data descriptors */
869 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
870
871 no_offloads:
872 type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
873
874 /* Now copy bits into descriptor */
875 TXD->vlan_macip_lens = htole32(vlan_macip_lens);
876 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
877 TXD->seqnum_seed = htole32(0);
878 TXD->mss_l4len_idx = htole32(0);
879
880 /* We've consumed the first desc, adjust counters */
881 if (++ctxd == txr->num_desc)
882 ctxd = 0;
883 txr->next_avail_desc = ctxd;
884 --txr->tx_avail;
885
886 return 0;
887 }
888
889 /**********************************************************************
890 *
891 * Setup work for hardware segmentation offload (TSO) on
892 * adapters using advanced tx descriptors
893 *
894 **********************************************************************/
895 static int
896 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp,
897 u32 *cmd_type_len, u32 *olinfo_status)
898 {
899 struct m_tag *mtag;
900 struct adapter *adapter = txr->adapter;
901 struct ethercom *ec = &adapter->osdep.ec;
902 struct ixgbe_adv_tx_context_desc *TXD;
903 u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
904 u32 mss_l4len_idx = 0, paylen;
905 u16 vtag = 0, eh_type;
906 int ctxd, ehdrlen, ip_hlen, tcp_hlen;
907 struct ether_vlan_header *eh;
908 #ifdef INET6
909 struct ip6_hdr *ip6;
910 #endif
911 #ifdef INET
912 struct ip *ip;
913 #endif
914 struct tcphdr *th;
915
916 /*
917 * Determine where frame payload starts.
918 * Jump over vlan headers if already present
919 */
920 eh = mtod(mp, struct ether_vlan_header *);
921 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
922 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
923 eh_type = eh->evl_proto;
924 } else {
925 ehdrlen = ETHER_HDR_LEN;
926 eh_type = eh->evl_encap_proto;
927 }
928
929 switch (ntohs(eh_type)) {
930 #ifdef INET6
931 case ETHERTYPE_IPV6:
932 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
933 /* XXX-BZ For now we do not pretend to support ext. hdrs. */
934 if (ip6->ip6_nxt != IPPROTO_TCP)
935 return (ENXIO);
936 ip_hlen = sizeof(struct ip6_hdr);
937 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
938 th = (struct tcphdr *)((char *)ip6 + ip_hlen);
939 th->th_sum = in6_cksum_phdr(&ip6->ip6_src,
940 &ip6->ip6_dst, 0, htonl(IPPROTO_TCP));
941 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
942 break;
943 #endif
944 #ifdef INET
945 case ETHERTYPE_IP:
946 ip = (struct ip *)(mp->m_data + ehdrlen);
947 if (ip->ip_p != IPPROTO_TCP)
948 return (ENXIO);
949 ip->ip_sum = 0;
950 ip_hlen = ip->ip_hl << 2;
951 th = (struct tcphdr *)((char *)ip + ip_hlen);
952 th->th_sum = in_cksum_phdr(ip->ip_src.s_addr,
953 ip->ip_dst.s_addr, htons(IPPROTO_TCP));
954 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
955 /* Tell transmit desc to also do IPv4 checksum. */
956 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
957 break;
958 #endif
959 default:
960 panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
961 __func__, ntohs(eh_type));
962 break;
963 }
964
965 ctxd = txr->next_avail_desc;
966 TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
967
968 tcp_hlen = th->th_off << 2;
969
970 /* This is used in the transmit desc in encap */
971 paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
972
973 /* VLAN MACLEN IPLEN */
974 if ((mtag = VLAN_OUTPUT_TAG(ec, mp)) != NULL) {
975 vtag = htole16(VLAN_TAG_VALUE(mtag) & 0xffff);
976 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
977 }
978
979 vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
980 vlan_macip_lens |= ip_hlen;
981 TXD->vlan_macip_lens = htole32(vlan_macip_lens);
982
983 /* ADV DTYPE TUCMD */
984 type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
985 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
986 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
987
988 /* MSS L4LEN IDX */
989 mss_l4len_idx |= (mp->m_pkthdr.segsz << IXGBE_ADVTXD_MSS_SHIFT);
990 mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
991 TXD->mss_l4len_idx = htole32(mss_l4len_idx);
992
993 TXD->seqnum_seed = htole32(0);
994
995 if (++ctxd == txr->num_desc)
996 ctxd = 0;
997
998 txr->tx_avail--;
999 txr->next_avail_desc = ctxd;
1000 *cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
1001 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
1002 *olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
1003 ++txr->tso_tx.ev_count;
1004 return (0);
1005 }
1006
1007
1008 /**********************************************************************
1009 *
1010 * Examine each tx_buffer in the used queue. If the hardware is done
1011 * processing the packet then free associated resources. The
1012 * tx_buffer is put back on the free queue.
1013 *
1014 **********************************************************************/
1015 void
1016 ixgbe_txeof(struct tx_ring *txr)
1017 {
1018 struct adapter *adapter = txr->adapter;
1019 struct ifnet *ifp = adapter->ifp;
1020 u32 work, processed = 0;
1021 u32 limit = adapter->tx_process_limit;
1022 struct ixgbe_tx_buf *buf;
1023 union ixgbe_adv_tx_desc *txd;
1024
1025 KASSERT(mutex_owned(&txr->tx_mtx));
1026
1027 #ifdef DEV_NETMAP
1028 if (ifp->if_capenable & IFCAP_NETMAP) {
1029 struct netmap_adapter *na = NA(ifp);
1030 struct netmap_kring *kring = &na->tx_rings[txr->me];
1031 txd = txr->tx_base;
1032 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1033 BUS_DMASYNC_POSTREAD);
1034 /*
1035 * In netmap mode, all the work is done in the context
1036 * of the client thread. Interrupt handlers only wake up
1037 * clients, which may be sleeping on individual rings
1038 * or on a global resource for all rings.
1039 * To implement tx interrupt mitigation, we wake up the client
1040 * thread roughly every half ring, even if the NIC interrupts
1041 * more frequently. This is implemented as follows:
1042 * - ixgbe_txsync() sets kring->nr_kflags with the index of
1043 * the slot that should wake up the thread (nkr_num_slots
1044 * means the user thread should not be woken up);
1045 * - the driver ignores tx interrupts unless netmap_mitigate=0
1046 * or the slot has the DD bit set.
1047 */
1048 if (!netmap_mitigate ||
1049 (kring->nr_kflags < kring->nkr_num_slots &&
1050 txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD)) {
1051 netmap_tx_irq(ifp, txr->me);
1052 }
1053 return;
1054 }
1055 #endif /* DEV_NETMAP */
1056
1057 if (txr->tx_avail == txr->num_desc) {
1058 txr->busy = 0;
1059 return;
1060 }
1061
1062 /* Get work starting point */
1063 work = txr->next_to_clean;
1064 buf = &txr->tx_buffers[work];
1065 txd = &txr->tx_base[work];
1066 work -= txr->num_desc; /* The distance to ring end */
1067 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1068 BUS_DMASYNC_POSTREAD);
1069
1070 do {
1071 union ixgbe_adv_tx_desc *eop = buf->eop;
1072 if (eop == NULL) /* No work */
1073 break;
1074
1075 if ((eop->wb.status & IXGBE_TXD_STAT_DD) == 0)
1076 break; /* I/O not complete */
1077
1078 if (buf->m_head) {
1079 txr->bytes +=
1080 buf->m_head->m_pkthdr.len;
1081 bus_dmamap_sync(txr->txtag->dt_dmat,
1082 buf->map,
1083 0, buf->m_head->m_pkthdr.len,
1084 BUS_DMASYNC_POSTWRITE);
1085 ixgbe_dmamap_unload(txr->txtag,
1086 buf->map);
1087 m_freem(buf->m_head);
1088 buf->m_head = NULL;
1089 }
1090 buf->eop = NULL;
1091 ++txr->tx_avail;
1092
1093 /* We clean the range if multi segment */
1094 while (txd != eop) {
1095 ++txd;
1096 ++buf;
1097 ++work;
1098 /* wrap the ring? */
1099 if (__predict_false(!work)) {
1100 work -= txr->num_desc;
1101 buf = txr->tx_buffers;
1102 txd = txr->tx_base;
1103 }
1104 if (buf->m_head) {
1105 txr->bytes +=
1106 buf->m_head->m_pkthdr.len;
1107 bus_dmamap_sync(txr->txtag->dt_dmat,
1108 buf->map,
1109 0, buf->m_head->m_pkthdr.len,
1110 BUS_DMASYNC_POSTWRITE);
1111 ixgbe_dmamap_unload(txr->txtag,
1112 buf->map);
1113 m_freem(buf->m_head);
1114 buf->m_head = NULL;
1115 }
1116 ++txr->tx_avail;
1117 buf->eop = NULL;
1118
1119 }
1120 ++txr->packets;
1121 ++processed;
1122 ++ifp->if_opackets;
1123
1124 /* Try the next packet */
1125 ++txd;
1126 ++buf;
1127 ++work;
1128 /* reset with a wrap */
1129 if (__predict_false(!work)) {
1130 work -= txr->num_desc;
1131 buf = txr->tx_buffers;
1132 txd = txr->tx_base;
1133 }
1134 prefetch(txd);
1135 } while (__predict_true(--limit));
1136
1137 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1138 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1139
1140 work += txr->num_desc;
1141 txr->next_to_clean = work;
1142
1143 /*
1144 ** Queue Hang detection, we know there's
1145 ** work outstanding or the first return
1146 ** would have been taken, so increment busy
1147 ** if nothing managed to get cleaned, then
1148 ** in local_timer it will be checked and
1149 ** marked as HUNG if it exceeds a MAX attempt.
1150 */
1151 if ((processed == 0) && (txr->busy != IXGBE_QUEUE_HUNG))
1152 ++txr->busy;
1153 /*
1154 ** If anything gets cleaned we reset state to 1,
1155 ** note this will turn off HUNG if its set.
1156 */
1157 if (processed)
1158 txr->busy = 1;
1159
1160 if (txr->tx_avail == txr->num_desc)
1161 txr->busy = 0;
1162
1163 return;
1164 }
1165
1166
1167 #ifdef IXGBE_FDIR
1168 /*
1169 ** This routine parses packet headers so that Flow
1170 ** Director can make a hashed filter table entry
1171 ** allowing traffic flows to be identified and kept
1172 ** on the same cpu. This would be a performance
1173 ** hit, but we only do it at IXGBE_FDIR_RATE of
1174 ** packets.
1175 */
1176 static void
1177 ixgbe_atr(struct tx_ring *txr, struct mbuf *mp)
1178 {
1179 struct adapter *adapter = txr->adapter;
1180 struct ix_queue *que;
1181 struct ip *ip;
1182 struct tcphdr *th;
1183 struct udphdr *uh;
1184 struct ether_vlan_header *eh;
1185 union ixgbe_atr_hash_dword input = {.dword = 0};
1186 union ixgbe_atr_hash_dword common = {.dword = 0};
1187 int ehdrlen, ip_hlen;
1188 u16 etype;
1189
1190 eh = mtod(mp, struct ether_vlan_header *);
1191 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
1192 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
1193 etype = eh->evl_proto;
1194 } else {
1195 ehdrlen = ETHER_HDR_LEN;
1196 etype = eh->evl_encap_proto;
1197 }
1198
1199 /* Only handling IPv4 */
1200 if (etype != htons(ETHERTYPE_IP))
1201 return;
1202
1203 ip = (struct ip *)(mp->m_data + ehdrlen);
1204 ip_hlen = ip->ip_hl << 2;
1205
1206 /* check if we're UDP or TCP */
1207 switch (ip->ip_p) {
1208 case IPPROTO_TCP:
1209 th = (struct tcphdr *)((char *)ip + ip_hlen);
1210 /* src and dst are inverted */
1211 common.port.dst ^= th->th_sport;
1212 common.port.src ^= th->th_dport;
1213 input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_TCPV4;
1214 break;
1215 case IPPROTO_UDP:
1216 uh = (struct udphdr *)((char *)ip + ip_hlen);
1217 /* src and dst are inverted */
1218 common.port.dst ^= uh->uh_sport;
1219 common.port.src ^= uh->uh_dport;
1220 input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_UDPV4;
1221 break;
1222 default:
1223 return;
1224 }
1225
1226 input.formatted.vlan_id = htobe16(mp->m_pkthdr.ether_vtag);
1227 if (mp->m_pkthdr.ether_vtag)
1228 common.flex_bytes ^= htons(ETHERTYPE_VLAN);
1229 else
1230 common.flex_bytes ^= etype;
1231 common.ip ^= ip->ip_src.s_addr ^ ip->ip_dst.s_addr;
1232
1233 que = &adapter->queues[txr->me];
1234 /*
1235 ** This assumes the Rx queue and Tx
1236 ** queue are bound to the same CPU
1237 */
1238 ixgbe_fdir_add_signature_filter_82599(&adapter->hw,
1239 input, common, que->msix);
1240 }
1241 #endif /* IXGBE_FDIR */
1242
1243 /*
1244 ** Used to detect a descriptor that has
1245 ** been merged by Hardware RSC.
1246 */
1247 static inline u32
1248 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1249 {
1250 return (le32toh(rx->wb.lower.lo_dword.data) &
1251 IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1252 }
1253
1254 /*********************************************************************
1255 *
1256 * Initialize Hardware RSC (LRO) feature on 82599
1257 * for an RX ring, this is toggled by the LRO capability
1258 * even though it is transparent to the stack.
1259 *
1260 * NOTE: since this HW feature only works with IPV4 and
1261 * our testing has shown soft LRO to be as effective
1262 * I have decided to disable this by default.
1263 *
1264 **********************************************************************/
1265 static void
1266 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
1267 {
1268 struct adapter *adapter = rxr->adapter;
1269 struct ixgbe_hw *hw = &adapter->hw;
1270 u32 rscctrl, rdrxctl;
1271
1272 /* If turning LRO/RSC off we need to disable it */
1273 if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) {
1274 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1275 rscctrl &= ~IXGBE_RSCCTL_RSCEN;
1276 return;
1277 }
1278
1279 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
1280 rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
1281 #ifdef DEV_NETMAP /* crcstrip is optional in netmap */
1282 if (adapter->ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip)
1283 #endif /* DEV_NETMAP */
1284 rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
1285 rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
1286 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
1287
1288 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1289 rscctrl |= IXGBE_RSCCTL_RSCEN;
1290 /*
1291 ** Limit the total number of descriptors that
1292 ** can be combined, so it does not exceed 64K
1293 */
1294 if (rxr->mbuf_sz == MCLBYTES)
1295 rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
1296 else if (rxr->mbuf_sz == MJUMPAGESIZE)
1297 rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
1298 else if (rxr->mbuf_sz == MJUM9BYTES)
1299 rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
1300 else /* Using 16K cluster */
1301 rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
1302
1303 IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
1304
1305 /* Enable TCP header recognition */
1306 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
1307 (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) |
1308 IXGBE_PSRTYPE_TCPHDR));
1309
1310 /* Disable RSC for ACK packets */
1311 IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
1312 (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
1313
1314 rxr->hw_rsc = TRUE;
1315 }
1316
1317 /*********************************************************************
1318 *
1319 * Refresh mbuf buffers for RX descriptor rings
1320 * - now keeps its own state so discards due to resource
1321 * exhaustion are unnecessary, if an mbuf cannot be obtained
1322 * it just returns, keeping its placeholder, thus it can simply
1323 * be recalled to try again.
1324 *
1325 **********************************************************************/
1326 static void
1327 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
1328 {
1329 struct adapter *adapter = rxr->adapter;
1330 struct ixgbe_rx_buf *rxbuf;
1331 struct mbuf *mp;
1332 int i, j, error;
1333 bool refreshed = false;
1334
1335 i = j = rxr->next_to_refresh;
1336 /* Control the loop with one beyond */
1337 if (++j == rxr->num_desc)
1338 j = 0;
1339
1340 while (j != limit) {
1341 rxbuf = &rxr->rx_buffers[i];
1342 if (rxbuf->buf == NULL) {
1343 mp = ixgbe_getjcl(&adapter->jcl_head, M_NOWAIT,
1344 MT_DATA, M_PKTHDR, rxr->mbuf_sz);
1345 if (mp == NULL) {
1346 rxr->no_jmbuf.ev_count++;
1347 goto update;
1348 }
1349 if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
1350 m_adj(mp, ETHER_ALIGN);
1351 } else
1352 mp = rxbuf->buf;
1353
1354 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1355
1356 /* If we're dealing with an mbuf that was copied rather
1357 * than replaced, there's no need to go through busdma.
1358 */
1359 if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
1360 /* Get the memory mapping */
1361 ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
1362 error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat,
1363 rxbuf->pmap, mp, BUS_DMA_NOWAIT);
1364 if (error != 0) {
1365 printf("Refresh mbufs: payload dmamap load"
1366 " failure - %d\n", error);
1367 m_free(mp);
1368 rxbuf->buf = NULL;
1369 goto update;
1370 }
1371 rxbuf->buf = mp;
1372 bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
1373 0, mp->m_pkthdr.len, BUS_DMASYNC_PREREAD);
1374 rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
1375 htole64(rxbuf->pmap->dm_segs[0].ds_addr);
1376 } else {
1377 rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
1378 rxbuf->flags &= ~IXGBE_RX_COPY;
1379 }
1380
1381 refreshed = true;
1382 /* Next is precalculated */
1383 i = j;
1384 rxr->next_to_refresh = i;
1385 if (++j == rxr->num_desc)
1386 j = 0;
1387 }
1388 update:
1389 if (refreshed) /* Update hardware tail index */
1390 IXGBE_WRITE_REG(&adapter->hw,
1391 rxr->tail, rxr->next_to_refresh);
1392 return;
1393 }
1394
1395 /*********************************************************************
1396 *
1397 * Allocate memory for rx_buffer structures. Since we use one
1398 * rx_buffer per received packet, the maximum number of rx_buffer's
1399 * that we'll need is equal to the number of receive descriptors
1400 * that we've allocated.
1401 *
1402 **********************************************************************/
1403 int
1404 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
1405 {
1406 struct adapter *adapter = rxr->adapter;
1407 device_t dev = adapter->dev;
1408 struct ixgbe_rx_buf *rxbuf;
1409 int bsize, error;
1410
1411 bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
1412 if (!(rxr->rx_buffers =
1413 (struct ixgbe_rx_buf *) malloc(bsize,
1414 M_DEVBUF, M_NOWAIT | M_ZERO))) {
1415 aprint_error_dev(dev, "Unable to allocate rx_buffer memory\n");
1416 error = ENOMEM;
1417 goto fail;
1418 }
1419
1420 if ((error = ixgbe_dma_tag_create(adapter->osdep.dmat, /* parent */
1421 1, 0, /* alignment, bounds */
1422 MJUM16BYTES, /* maxsize */
1423 1, /* nsegments */
1424 MJUM16BYTES, /* maxsegsize */
1425 0, /* flags */
1426 &rxr->ptag))) {
1427 aprint_error_dev(dev, "Unable to create RX DMA tag\n");
1428 goto fail;
1429 }
1430
1431 for (int i = 0; i < rxr->num_desc; i++, rxbuf++) {
1432 rxbuf = &rxr->rx_buffers[i];
1433 error = ixgbe_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
1434 if (error) {
1435 aprint_error_dev(dev, "Unable to create RX dma map\n");
1436 goto fail;
1437 }
1438 }
1439
1440 return (0);
1441
1442 fail:
1443 /* Frees all, but can handle partial completion */
1444 ixgbe_free_receive_structures(adapter);
1445 return (error);
1446 }
1447
1448 static void
1449 ixgbe_free_receive_ring(struct rx_ring *rxr)
1450 {
1451 struct ixgbe_rx_buf *rxbuf;
1452
1453 for (int i = 0; i < rxr->num_desc; i++) {
1454 rxbuf = &rxr->rx_buffers[i];
1455 if (rxbuf->buf != NULL) {
1456 bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
1457 0, rxbuf->buf->m_pkthdr.len,
1458 BUS_DMASYNC_POSTREAD);
1459 ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
1460 rxbuf->buf->m_flags |= M_PKTHDR;
1461 m_freem(rxbuf->buf);
1462 rxbuf->buf = NULL;
1463 rxbuf->flags = 0;
1464 }
1465 }
1466 }
1467
1468 /*********************************************************************
1469 *
1470 * Initialize a receive ring and its buffers.
1471 *
1472 **********************************************************************/
1473 static int
1474 ixgbe_setup_receive_ring(struct rx_ring *rxr)
1475 {
1476 struct adapter *adapter;
1477 struct ixgbe_rx_buf *rxbuf;
1478 #ifdef LRO
1479 struct ifnet *ifp;
1480 struct lro_ctrl *lro = &rxr->lro;
1481 #endif /* LRO */
1482 int rsize, error = 0;
1483 #ifdef DEV_NETMAP
1484 struct netmap_adapter *na = NA(rxr->adapter->ifp);
1485 struct netmap_slot *slot;
1486 #endif /* DEV_NETMAP */
1487
1488 adapter = rxr->adapter;
1489 #ifdef LRO
1490 ifp = adapter->ifp;
1491 #endif /* LRO */
1492
1493 /* Clear the ring contents */
1494 IXGBE_RX_LOCK(rxr);
1495 #ifdef DEV_NETMAP
1496 /* same as in ixgbe_setup_transmit_ring() */
1497 slot = netmap_reset(na, NR_RX, rxr->me, 0);
1498 #endif /* DEV_NETMAP */
1499 rsize = roundup2(adapter->num_rx_desc *
1500 sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
1501 bzero((void *)rxr->rx_base, rsize);
1502 /* Cache the size */
1503 rxr->mbuf_sz = adapter->rx_mbuf_sz;
1504
1505 /* Free current RX buffer structs and their mbufs */
1506 ixgbe_free_receive_ring(rxr);
1507
1508 IXGBE_RX_UNLOCK(rxr);
1509
1510 /* Now reinitialize our supply of jumbo mbufs. The number
1511 * or size of jumbo mbufs may have changed.
1512 */
1513 ixgbe_jcl_reinit(&adapter->jcl_head, rxr->ptag->dt_dmat,
1514 2 * adapter->num_rx_desc, adapter->rx_mbuf_sz);
1515
1516 IXGBE_RX_LOCK(rxr);
1517
1518 /* Now replenish the mbufs */
1519 for (int j = 0; j != rxr->num_desc; ++j) {
1520 struct mbuf *mp;
1521
1522 rxbuf = &rxr->rx_buffers[j];
1523 #ifdef DEV_NETMAP
1524 /*
1525 * In netmap mode, fill the map and set the buffer
1526 * address in the NIC ring, considering the offset
1527 * between the netmap and NIC rings (see comment in
1528 * ixgbe_setup_transmit_ring() ). No need to allocate
1529 * an mbuf, so end the block with a continue;
1530 */
1531 if (slot) {
1532 int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
1533 uint64_t paddr;
1534 void *addr;
1535
1536 addr = PNMB(na, slot + sj, &paddr);
1537 netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
1538 /* Update descriptor and the cached value */
1539 rxr->rx_base[j].read.pkt_addr = htole64(paddr);
1540 rxbuf->addr = htole64(paddr);
1541 continue;
1542 }
1543 #endif /* DEV_NETMAP */
1544 rxbuf->flags = 0;
1545 rxbuf->buf = ixgbe_getjcl(&adapter->jcl_head, M_NOWAIT,
1546 MT_DATA, M_PKTHDR, adapter->rx_mbuf_sz);
1547 if (rxbuf->buf == NULL) {
1548 error = ENOBUFS;
1549 goto fail;
1550 }
1551 mp = rxbuf->buf;
1552 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1553 /* Get the memory mapping */
1554 error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat,
1555 rxbuf->pmap, mp, BUS_DMA_NOWAIT);
1556 if (error != 0)
1557 goto fail;
1558 bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
1559 0, adapter->rx_mbuf_sz, BUS_DMASYNC_PREREAD);
1560 /* Update the descriptor and the cached value */
1561 rxr->rx_base[j].read.pkt_addr =
1562 htole64(rxbuf->pmap->dm_segs[0].ds_addr);
1563 rxbuf->addr = htole64(rxbuf->pmap->dm_segs[0].ds_addr);
1564 }
1565
1566
1567 /* Setup our descriptor indices */
1568 rxr->next_to_check = 0;
1569 rxr->next_to_refresh = 0;
1570 rxr->lro_enabled = FALSE;
1571 rxr->rx_copies.ev_count = 0;
1572 rxr->rx_bytes.ev_count = 0;
1573 rxr->vtag_strip = FALSE;
1574
1575 ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1576 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1577
1578 /*
1579 ** Now set up the LRO interface:
1580 */
1581 if (ixgbe_rsc_enable)
1582 ixgbe_setup_hw_rsc(rxr);
1583 #ifdef LRO
1584 else if (ifp->if_capenable & IFCAP_LRO) {
1585 device_t dev = adapter->dev;
1586 int err = tcp_lro_init(lro);
1587 if (err) {
1588 device_printf(dev, "LRO Initialization failed!\n");
1589 goto fail;
1590 }
1591 INIT_DEBUGOUT("RX Soft LRO Initialized\n");
1592 rxr->lro_enabled = TRUE;
1593 lro->ifp = adapter->ifp;
1594 }
1595 #endif /* LRO */
1596
1597 IXGBE_RX_UNLOCK(rxr);
1598 return (0);
1599
1600 fail:
1601 ixgbe_free_receive_ring(rxr);
1602 IXGBE_RX_UNLOCK(rxr);
1603 return (error);
1604 }
1605
1606 /*********************************************************************
1607 *
1608 * Initialize all receive rings.
1609 *
1610 **********************************************************************/
1611 int
1612 ixgbe_setup_receive_structures(struct adapter *adapter)
1613 {
1614 struct rx_ring *rxr = adapter->rx_rings;
1615 int j;
1616
1617 for (j = 0; j < adapter->num_queues; j++, rxr++)
1618 if (ixgbe_setup_receive_ring(rxr))
1619 goto fail;
1620
1621 return (0);
1622 fail:
1623 /*
1624 * Free RX buffers allocated so far, we will only handle
1625 * the rings that completed, the failing case will have
1626 * cleaned up for itself. 'j' failed, so its the terminus.
1627 */
1628 for (int i = 0; i < j; ++i) {
1629 rxr = &adapter->rx_rings[i];
1630 ixgbe_free_receive_ring(rxr);
1631 }
1632
1633 return (ENOBUFS);
1634 }
1635
1636
1637 /*********************************************************************
1638 *
1639 * Free all receive rings.
1640 *
1641 **********************************************************************/
1642 void
1643 ixgbe_free_receive_structures(struct adapter *adapter)
1644 {
1645 struct rx_ring *rxr = adapter->rx_rings;
1646
1647 INIT_DEBUGOUT("ixgbe_free_receive_structures: begin");
1648
1649 for (int i = 0; i < adapter->num_queues; i++, rxr++) {
1650 #ifdef LRO
1651 struct lro_ctrl *lro = &rxr->lro;
1652 #endif /* LRO */
1653 ixgbe_free_receive_buffers(rxr);
1654 #ifdef LRO
1655 /* Free LRO memory */
1656 tcp_lro_free(lro);
1657 #endif /* LRO */
1658 /* Free the ring memory as well */
1659 ixgbe_dma_free(adapter, &rxr->rxdma);
1660 IXGBE_RX_LOCK_DESTROY(rxr);
1661 }
1662
1663 free(adapter->rx_rings, M_DEVBUF);
1664 }
1665
1666
1667 /*********************************************************************
1668 *
1669 * Free receive ring data structures
1670 *
1671 **********************************************************************/
1672 static void
1673 ixgbe_free_receive_buffers(struct rx_ring *rxr)
1674 {
1675 struct adapter *adapter = rxr->adapter;
1676 struct ixgbe_rx_buf *rxbuf;
1677
1678 INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin");
1679
1680 /* Cleanup any existing buffers */
1681 if (rxr->rx_buffers != NULL) {
1682 for (int i = 0; i < adapter->num_rx_desc; i++) {
1683 rxbuf = &rxr->rx_buffers[i];
1684 if (rxbuf->buf != NULL) {
1685 bus_dmamap_sync(rxr->ptag->dt_dmat,
1686 rxbuf->pmap, 0, rxbuf->buf->m_pkthdr.len,
1687 BUS_DMASYNC_POSTREAD);
1688 ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
1689 rxbuf->buf->m_flags |= M_PKTHDR;
1690 m_freem(rxbuf->buf);
1691 }
1692 rxbuf->buf = NULL;
1693 if (rxbuf->pmap != NULL) {
1694 ixgbe_dmamap_destroy(rxr->ptag, rxbuf->pmap);
1695 rxbuf->pmap = NULL;
1696 }
1697 }
1698 if (rxr->rx_buffers != NULL) {
1699 free(rxr->rx_buffers, M_DEVBUF);
1700 rxr->rx_buffers = NULL;
1701 }
1702 }
1703
1704 if (rxr->ptag != NULL) {
1705 ixgbe_dma_tag_destroy(rxr->ptag);
1706 rxr->ptag = NULL;
1707 }
1708
1709 return;
1710 }
1711
1712 static __inline void
1713 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
1714 {
1715 int s;
1716
1717 #ifdef LRO
1718 struct adapter *adapter = ifp->if_softc;
1719 struct ethercom *ec = &adapter->osdep.ec;
1720
1721 /*
1722 * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
1723 * should be computed by hardware. Also it should not have VLAN tag in
1724 * ethernet header. In case of IPv6 we do not yet support ext. hdrs.
1725 */
1726 if (rxr->lro_enabled &&
1727 (ec->ec_capenable & ETHERCAP_VLAN_HWTAGGING) != 0 &&
1728 (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
1729 ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1730 (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
1731 (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1732 (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
1733 (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
1734 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
1735 /*
1736 * Send to the stack if:
1737 ** - LRO not enabled, or
1738 ** - no LRO resources, or
1739 ** - lro enqueue fails
1740 */
1741 if (rxr->lro.lro_cnt != 0)
1742 if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
1743 return;
1744 }
1745 #endif /* LRO */
1746
1747 IXGBE_RX_UNLOCK(rxr);
1748
1749 s = splnet();
1750 /* Pass this up to any BPF listeners. */
1751 bpf_mtap(ifp, m);
1752 if_input(ifp, m);
1753 splx(s);
1754
1755 IXGBE_RX_LOCK(rxr);
1756 }
1757
1758 static __inline void
1759 ixgbe_rx_discard(struct rx_ring *rxr, int i)
1760 {
1761 struct ixgbe_rx_buf *rbuf;
1762
1763 rbuf = &rxr->rx_buffers[i];
1764
1765
1766 /*
1767 ** With advanced descriptors the writeback
1768 ** clobbers the buffer addrs, so its easier
1769 ** to just free the existing mbufs and take
1770 ** the normal refresh path to get new buffers
1771 ** and mapping.
1772 */
1773
1774 if (rbuf->buf != NULL) {/* Partial chain ? */
1775 rbuf->fmp->m_flags |= M_PKTHDR;
1776 m_freem(rbuf->fmp);
1777 rbuf->fmp = NULL;
1778 rbuf->buf = NULL; /* rbuf->buf is part of fmp's chain */
1779 } else if (rbuf->buf) {
1780 m_free(rbuf->buf);
1781 rbuf->buf = NULL;
1782 }
1783 ixgbe_dmamap_unload(rxr->ptag, rbuf->pmap);
1784
1785 rbuf->flags = 0;
1786
1787 return;
1788 }
1789
1790
1791 /*********************************************************************
1792 *
1793 * This routine executes in interrupt context. It replenishes
1794 * the mbufs in the descriptor and sends data which has been
1795 * dma'ed into host memory to upper layer.
1796 *
1797 * Return TRUE for more work, FALSE for all clean.
1798 *********************************************************************/
1799 bool
1800 ixgbe_rxeof(struct ix_queue *que)
1801 {
1802 struct adapter *adapter = que->adapter;
1803 struct rx_ring *rxr = que->rxr;
1804 struct ifnet *ifp = adapter->ifp;
1805 #ifdef LRO
1806 struct lro_ctrl *lro = &rxr->lro;
1807 struct lro_entry *queued;
1808 #endif /* LRO */
1809 int i, nextp, processed = 0;
1810 u32 staterr = 0;
1811 u32 count = adapter->rx_process_limit;
1812 union ixgbe_adv_rx_desc *cur;
1813 struct ixgbe_rx_buf *rbuf, *nbuf;
1814 #ifdef RSS
1815 u16 pkt_info;
1816 #endif
1817
1818 IXGBE_RX_LOCK(rxr);
1819
1820 #ifdef DEV_NETMAP
1821 /* Same as the txeof routine: wakeup clients on intr. */
1822 if (netmap_rx_irq(ifp, rxr->me, &processed)) {
1823 IXGBE_RX_UNLOCK(rxr);
1824 return (FALSE);
1825 }
1826 #endif /* DEV_NETMAP */
1827
1828 for (i = rxr->next_to_check; count != 0;) {
1829 struct mbuf *sendmp, *mp;
1830 u32 rsc, ptype;
1831 u16 len;
1832 u16 vtag = 0;
1833 bool eop;
1834
1835 /* Sync the ring. */
1836 ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1837 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1838
1839 cur = &rxr->rx_base[i];
1840 staterr = le32toh(cur->wb.upper.status_error);
1841 #ifdef RSS
1842 pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
1843 #endif
1844
1845 if ((staterr & IXGBE_RXD_STAT_DD) == 0)
1846 break;
1847 if ((ifp->if_flags & IFF_RUNNING) == 0)
1848 break;
1849
1850 count--;
1851 sendmp = NULL;
1852 nbuf = NULL;
1853 rsc = 0;
1854 cur->wb.upper.status_error = 0;
1855 rbuf = &rxr->rx_buffers[i];
1856 mp = rbuf->buf;
1857
1858 len = le16toh(cur->wb.upper.length);
1859 ptype = le32toh(cur->wb.lower.lo_dword.data) &
1860 IXGBE_RXDADV_PKTTYPE_MASK;
1861 eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
1862
1863 /* Make sure bad packets are discarded */
1864 if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) {
1865 #if __FreeBSD_version >= 1100036
1866 if (IXGBE_IS_VF(adapter))
1867 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
1868 #endif
1869 rxr->rx_discarded.ev_count++;
1870 ixgbe_rx_discard(rxr, i);
1871 goto next_desc;
1872 }
1873
1874 /*
1875 ** On 82599 which supports a hardware
1876 ** LRO (called HW RSC), packets need
1877 ** not be fragmented across sequential
1878 ** descriptors, rather the next descriptor
1879 ** is indicated in bits of the descriptor.
1880 ** This also means that we might proceses
1881 ** more than one packet at a time, something
1882 ** that has never been true before, it
1883 ** required eliminating global chain pointers
1884 ** in favor of what we are doing here. -jfv
1885 */
1886 if (!eop) {
1887 /*
1888 ** Figure out the next descriptor
1889 ** of this frame.
1890 */
1891 if (rxr->hw_rsc == TRUE) {
1892 rsc = ixgbe_rsc_count(cur);
1893 rxr->rsc_num += (rsc - 1);
1894 }
1895 if (rsc) { /* Get hardware index */
1896 nextp = ((staterr &
1897 IXGBE_RXDADV_NEXTP_MASK) >>
1898 IXGBE_RXDADV_NEXTP_SHIFT);
1899 } else { /* Just sequential */
1900 nextp = i + 1;
1901 if (nextp == adapter->num_rx_desc)
1902 nextp = 0;
1903 }
1904 nbuf = &rxr->rx_buffers[nextp];
1905 prefetch(nbuf);
1906 }
1907 /*
1908 ** Rather than using the fmp/lmp global pointers
1909 ** we now keep the head of a packet chain in the
1910 ** buffer struct and pass this along from one
1911 ** descriptor to the next, until we get EOP.
1912 */
1913 mp->m_len = len;
1914 /*
1915 ** See if there is a stored head
1916 ** that determines what we are
1917 */
1918 sendmp = rbuf->fmp;
1919 if (sendmp != NULL) { /* secondary frag */
1920 rbuf->buf = rbuf->fmp = NULL;
1921 mp->m_flags &= ~M_PKTHDR;
1922 sendmp->m_pkthdr.len += mp->m_len;
1923 } else {
1924 /*
1925 * Optimize. This might be a small packet,
1926 * maybe just a TCP ACK. Do a fast copy that
1927 * is cache aligned into a new mbuf, and
1928 * leave the old mbuf+cluster for re-use.
1929 */
1930 if (eop && len <= IXGBE_RX_COPY_LEN) {
1931 sendmp = m_gethdr(M_NOWAIT, MT_DATA);
1932 if (sendmp != NULL) {
1933 sendmp->m_data +=
1934 IXGBE_RX_COPY_ALIGN;
1935 ixgbe_bcopy(mp->m_data,
1936 sendmp->m_data, len);
1937 sendmp->m_len = len;
1938 rxr->rx_copies.ev_count++;
1939 rbuf->flags |= IXGBE_RX_COPY;
1940 }
1941 }
1942 if (sendmp == NULL) {
1943 rbuf->buf = rbuf->fmp = NULL;
1944 sendmp = mp;
1945 }
1946
1947 /* first desc of a non-ps chain */
1948 sendmp->m_flags |= M_PKTHDR;
1949 sendmp->m_pkthdr.len = mp->m_len;
1950 }
1951 ++processed;
1952
1953 /* Pass the head pointer on */
1954 if (eop == 0) {
1955 nbuf->fmp = sendmp;
1956 sendmp = NULL;
1957 mp->m_next = nbuf->buf;
1958 } else { /* Sending this frame */
1959 m_set_rcvif(sendmp, ifp);
1960 ifp->if_ipackets++;
1961 rxr->rx_packets.ev_count++;
1962 /* capture data for AIM */
1963 rxr->bytes += sendmp->m_pkthdr.len;
1964 rxr->rx_bytes.ev_count += sendmp->m_pkthdr.len;
1965 /* Process vlan info */
1966 if ((rxr->vtag_strip) &&
1967 (staterr & IXGBE_RXD_STAT_VP))
1968 vtag = le16toh(cur->wb.upper.vlan);
1969 if (vtag) {
1970 VLAN_INPUT_TAG(ifp, sendmp, vtag,
1971 printf("%s: could not apply VLAN "
1972 "tag", __func__));
1973 }
1974 if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) {
1975 ixgbe_rx_checksum(staterr, sendmp, ptype,
1976 &adapter->stats.pf);
1977 }
1978
1979 #if 0 /* FreeBSD */
1980 /*
1981 * In case of multiqueue, we have RXCSUM.PCSD bit set
1982 * and never cleared. This means we have RSS hash
1983 * available to be used.
1984 */
1985 if (adapter->num_queues > 1) {
1986 sendmp->m_pkthdr.flowid =
1987 le32toh(cur->wb.lower.hi_dword.rss);
1988 switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) {
1989 case IXGBE_RXDADV_RSSTYPE_IPV4:
1990 M_HASHTYPE_SET(sendmp,
1991 M_HASHTYPE_RSS_IPV4);
1992 break;
1993 case IXGBE_RXDADV_RSSTYPE_IPV4_TCP:
1994 M_HASHTYPE_SET(sendmp,
1995 M_HASHTYPE_RSS_TCP_IPV4);
1996 break;
1997 case IXGBE_RXDADV_RSSTYPE_IPV6:
1998 M_HASHTYPE_SET(sendmp,
1999 M_HASHTYPE_RSS_IPV6);
2000 break;
2001 case IXGBE_RXDADV_RSSTYPE_IPV6_TCP:
2002 M_HASHTYPE_SET(sendmp,
2003 M_HASHTYPE_RSS_TCP_IPV6);
2004 break;
2005 case IXGBE_RXDADV_RSSTYPE_IPV6_EX:
2006 M_HASHTYPE_SET(sendmp,
2007 M_HASHTYPE_RSS_IPV6_EX);
2008 break;
2009 case IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX:
2010 M_HASHTYPE_SET(sendmp,
2011 M_HASHTYPE_RSS_TCP_IPV6_EX);
2012 break;
2013 #if __FreeBSD_version > 1100000
2014 case IXGBE_RXDADV_RSSTYPE_IPV4_UDP:
2015 M_HASHTYPE_SET(sendmp,
2016 M_HASHTYPE_RSS_UDP_IPV4);
2017 break;
2018 case IXGBE_RXDADV_RSSTYPE_IPV6_UDP:
2019 M_HASHTYPE_SET(sendmp,
2020 M_HASHTYPE_RSS_UDP_IPV6);
2021 break;
2022 case IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX:
2023 M_HASHTYPE_SET(sendmp,
2024 M_HASHTYPE_RSS_UDP_IPV6_EX);
2025 break;
2026 #endif
2027 default:
2028 M_HASHTYPE_SET(sendmp,
2029 M_HASHTYPE_OPAQUE);
2030 }
2031 } else {
2032 sendmp->m_pkthdr.flowid = que->msix;
2033 M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
2034 }
2035 #endif
2036 }
2037 next_desc:
2038 ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
2039 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2040
2041 /* Advance our pointers to the next descriptor. */
2042 if (++i == rxr->num_desc)
2043 i = 0;
2044
2045 /* Now send to the stack or do LRO */
2046 if (sendmp != NULL) {
2047 rxr->next_to_check = i;
2048 ixgbe_rx_input(rxr, ifp, sendmp, ptype);
2049 i = rxr->next_to_check;
2050 }
2051
2052 /* Every 8 descriptors we go to refresh mbufs */
2053 if (processed == 8) {
2054 ixgbe_refresh_mbufs(rxr, i);
2055 processed = 0;
2056 }
2057 }
2058
2059 /* Refresh any remaining buf structs */
2060 if (ixgbe_rx_unrefreshed(rxr))
2061 ixgbe_refresh_mbufs(rxr, i);
2062
2063 rxr->next_to_check = i;
2064
2065 #ifdef LRO
2066 /*
2067 * Flush any outstanding LRO work
2068 */
2069 while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
2070 SLIST_REMOVE_HEAD(&lro->lro_active, next);
2071 tcp_lro_flush(lro, queued);
2072 }
2073 #endif /* LRO */
2074
2075 IXGBE_RX_UNLOCK(rxr);
2076
2077 /*
2078 ** Still have cleaning to do?
2079 */
2080 if ((staterr & IXGBE_RXD_STAT_DD) != 0)
2081 return true;
2082 else
2083 return false;
2084 }
2085
2086
2087 /*********************************************************************
2088 *
2089 * Verify that the hardware indicated that the checksum is valid.
2090 * Inform the stack about the status of checksum so that stack
2091 * doesn't spend time verifying the checksum.
2092 *
2093 *********************************************************************/
2094 static void
2095 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype,
2096 struct ixgbe_hw_stats *stats)
2097 {
2098 u16 status = (u16) staterr;
2099 u8 errors = (u8) (staterr >> 24);
2100 #if 0
2101 bool sctp = false;
2102
2103 if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
2104 (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
2105 sctp = true;
2106 #endif
2107
2108 /* IPv4 checksum */
2109 if (status & IXGBE_RXD_STAT_IPCS) {
2110 stats->ipcs.ev_count++;
2111 if (!(errors & IXGBE_RXD_ERR_IPE)) {
2112 /* IP Checksum Good */
2113 mp->m_pkthdr.csum_flags = M_CSUM_IPv4;
2114 } else {
2115 stats->ipcs_bad.ev_count++;
2116 mp->m_pkthdr.csum_flags = M_CSUM_IPv4|M_CSUM_IPv4_BAD;
2117 }
2118 }
2119 /* TCP/UDP/SCTP checksum */
2120 if (status & IXGBE_RXD_STAT_L4CS) {
2121 stats->l4cs.ev_count++;
2122 int type = M_CSUM_TCPv4|M_CSUM_TCPv6|M_CSUM_UDPv4|M_CSUM_UDPv6;
2123 if (!(errors & IXGBE_RXD_ERR_TCPE)) {
2124 mp->m_pkthdr.csum_flags |= type;
2125 } else {
2126 stats->l4cs_bad.ev_count++;
2127 mp->m_pkthdr.csum_flags |= type | M_CSUM_TCP_UDP_BAD;
2128 }
2129 }
2130 }
2131
2132
2133 /********************************************************************
2134 * Manage DMA'able memory.
2135 *******************************************************************/
2136
2137 int
2138 ixgbe_dma_malloc(struct adapter *adapter, const bus_size_t size,
2139 struct ixgbe_dma_alloc *dma, const int mapflags)
2140 {
2141 device_t dev = adapter->dev;
2142 int r, rsegs;
2143
2144 r = ixgbe_dma_tag_create(adapter->osdep.dmat, /* parent */
2145 DBA_ALIGN, 0, /* alignment, bounds */
2146 size, /* maxsize */
2147 1, /* nsegments */
2148 size, /* maxsegsize */
2149 BUS_DMA_ALLOCNOW, /* flags */
2150 &dma->dma_tag);
2151 if (r != 0) {
2152 aprint_error_dev(dev,
2153 "%s: ixgbe_dma_tag_create failed; error %d\n", __func__, r);
2154 goto fail_0;
2155 }
2156
2157 r = bus_dmamem_alloc(dma->dma_tag->dt_dmat,
2158 size,
2159 dma->dma_tag->dt_alignment,
2160 dma->dma_tag->dt_boundary,
2161 &dma->dma_seg, 1, &rsegs, BUS_DMA_NOWAIT);
2162 if (r != 0) {
2163 aprint_error_dev(dev,
2164 "%s: bus_dmamem_alloc failed; error %d\n", __func__, r);
2165 goto fail_1;
2166 }
2167
2168 r = bus_dmamem_map(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs,
2169 size, &dma->dma_vaddr, BUS_DMA_NOWAIT);
2170 if (r != 0) {
2171 aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
2172 __func__, r);
2173 goto fail_2;
2174 }
2175
2176 r = ixgbe_dmamap_create(dma->dma_tag, 0, &dma->dma_map);
2177 if (r != 0) {
2178 aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
2179 __func__, r);
2180 goto fail_3;
2181 }
2182
2183 r = bus_dmamap_load(dma->dma_tag->dt_dmat, dma->dma_map, dma->dma_vaddr,
2184 size,
2185 NULL,
2186 mapflags | BUS_DMA_NOWAIT);
2187 if (r != 0) {
2188 aprint_error_dev(dev, "%s: bus_dmamap_load failed; error %d\n",
2189 __func__, r);
2190 goto fail_4;
2191 }
2192 dma->dma_paddr = dma->dma_map->dm_segs[0].ds_addr;
2193 dma->dma_size = size;
2194 return 0;
2195 fail_4:
2196 ixgbe_dmamap_destroy(dma->dma_tag, dma->dma_map);
2197 fail_3:
2198 bus_dmamem_unmap(dma->dma_tag->dt_dmat, dma->dma_vaddr, size);
2199 fail_2:
2200 bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs);
2201 fail_1:
2202 ixgbe_dma_tag_destroy(dma->dma_tag);
2203 fail_0:
2204 return r;
2205 }
2206
2207 void
2208 ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma)
2209 {
2210 bus_dmamap_sync(dma->dma_tag->dt_dmat, dma->dma_map, 0, dma->dma_size,
2211 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2212 ixgbe_dmamap_unload(dma->dma_tag, dma->dma_map);
2213 bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, 1);
2214 ixgbe_dma_tag_destroy(dma->dma_tag);
2215 }
2216
2217
2218 /*********************************************************************
2219 *
2220 * Allocate memory for the transmit and receive rings, and then
2221 * the descriptors associated with each, called only once at attach.
2222 *
2223 **********************************************************************/
2224 int
2225 ixgbe_allocate_queues(struct adapter *adapter)
2226 {
2227 device_t dev = adapter->dev;
2228 struct ix_queue *que;
2229 struct tx_ring *txr;
2230 struct rx_ring *rxr;
2231 int rsize, tsize, error = IXGBE_SUCCESS;
2232 int txconf = 0, rxconf = 0;
2233 #ifdef PCI_IOV
2234 enum ixgbe_iov_mode iov_mode;
2235 #endif
2236
2237 /* First allocate the top level queue structs */
2238 if (!(adapter->queues =
2239 (struct ix_queue *) malloc(sizeof(struct ix_queue) *
2240 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2241 aprint_error_dev(dev, "Unable to allocate queue memory\n");
2242 error = ENOMEM;
2243 goto fail;
2244 }
2245
2246 /* First allocate the TX ring struct memory */
2247 if (!(adapter->tx_rings =
2248 (struct tx_ring *) malloc(sizeof(struct tx_ring) *
2249 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2250 aprint_error_dev(dev, "Unable to allocate TX ring memory\n");
2251 error = ENOMEM;
2252 goto tx_fail;
2253 }
2254
2255 /* Next allocate the RX */
2256 if (!(adapter->rx_rings =
2257 (struct rx_ring *) malloc(sizeof(struct rx_ring) *
2258 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2259 aprint_error_dev(dev, "Unable to allocate RX ring memory\n");
2260 error = ENOMEM;
2261 goto rx_fail;
2262 }
2263
2264 /* For the ring itself */
2265 tsize = roundup2(adapter->num_tx_desc *
2266 sizeof(union ixgbe_adv_tx_desc), DBA_ALIGN);
2267
2268 #ifdef PCI_IOV
2269 iov_mode = ixgbe_get_iov_mode(adapter);
2270 adapter->pool = ixgbe_max_vfs(iov_mode);
2271 #else
2272 adapter->pool = 0;
2273 #endif
2274 /*
2275 * Now set up the TX queues, txconf is needed to handle the
2276 * possibility that things fail midcourse and we need to
2277 * undo memory gracefully
2278 */
2279 for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2280 /* Set up some basics */
2281 txr = &adapter->tx_rings[i];
2282 txr->adapter = adapter;
2283 #ifdef PCI_IOV
2284 txr->me = ixgbe_pf_que_index(iov_mode, i);
2285 #else
2286 txr->me = i;
2287 #endif
2288 txr->num_desc = adapter->num_tx_desc;
2289
2290 /* Initialize the TX side lock */
2291 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
2292 device_xname(dev), txr->me);
2293 mutex_init(&txr->tx_mtx, MUTEX_DEFAULT, IPL_NET);
2294
2295 if (ixgbe_dma_malloc(adapter, tsize,
2296 &txr->txdma, BUS_DMA_NOWAIT)) {
2297 aprint_error_dev(dev,
2298 "Unable to allocate TX Descriptor memory\n");
2299 error = ENOMEM;
2300 goto err_tx_desc;
2301 }
2302 txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
2303 bzero((void *)txr->tx_base, tsize);
2304
2305 /* Now allocate transmit buffers for the ring */
2306 if (ixgbe_allocate_transmit_buffers(txr)) {
2307 aprint_error_dev(dev,
2308 "Critical Failure setting up transmit buffers\n");
2309 error = ENOMEM;
2310 goto err_tx_desc;
2311 }
2312 #ifndef IXGBE_LEGACY_TX
2313 /* Allocate a buf ring */
2314 txr->br = buf_ring_alloc(IXGBE_BR_SIZE, M_DEVBUF,
2315 M_WAITOK, &txr->tx_mtx);
2316 if (txr->br == NULL) {
2317 aprint_error_dev(dev,
2318 "Critical Failure setting up buf ring\n");
2319 error = ENOMEM;
2320 goto err_tx_desc;
2321 }
2322 #endif
2323 }
2324
2325 /*
2326 * Next the RX queues...
2327 */
2328 rsize = roundup2(adapter->num_rx_desc *
2329 sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
2330 for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2331 rxr = &adapter->rx_rings[i];
2332 /* Set up some basics */
2333 rxr->adapter = adapter;
2334 #ifdef PCI_IOV
2335 rxr->me = ixgbe_pf_que_index(iov_mode, i);
2336 #else
2337 rxr->me = i;
2338 #endif
2339 rxr->num_desc = adapter->num_rx_desc;
2340
2341 /* Initialize the RX side lock */
2342 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
2343 device_xname(dev), rxr->me);
2344 mutex_init(&rxr->rx_mtx, MUTEX_DEFAULT, IPL_NET);
2345
2346 if (ixgbe_dma_malloc(adapter, rsize,
2347 &rxr->rxdma, BUS_DMA_NOWAIT)) {
2348 aprint_error_dev(dev,
2349 "Unable to allocate RxDescriptor memory\n");
2350 error = ENOMEM;
2351 goto err_rx_desc;
2352 }
2353 rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2354 bzero((void *)rxr->rx_base, rsize);
2355
2356 /* Allocate receive buffers for the ring*/
2357 if (ixgbe_allocate_receive_buffers(rxr)) {
2358 aprint_error_dev(dev,
2359 "Critical Failure setting up receive buffers\n");
2360 error = ENOMEM;
2361 goto err_rx_desc;
2362 }
2363 }
2364
2365 /*
2366 ** Finally set up the queue holding structs
2367 */
2368 for (int i = 0; i < adapter->num_queues; i++) {
2369 que = &adapter->queues[i];
2370 que->adapter = adapter;
2371 que->me = i;
2372 que->txr = &adapter->tx_rings[i];
2373 que->rxr = &adapter->rx_rings[i];
2374 }
2375
2376 return (0);
2377
2378 err_rx_desc:
2379 for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2380 ixgbe_dma_free(adapter, &rxr->rxdma);
2381 err_tx_desc:
2382 for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2383 ixgbe_dma_free(adapter, &txr->txdma);
2384 free(adapter->rx_rings, M_DEVBUF);
2385 rx_fail:
2386 free(adapter->tx_rings, M_DEVBUF);
2387 tx_fail:
2388 free(adapter->queues, M_DEVBUF);
2389 fail:
2390 return (error);
2391 }
2392
2393