ix_txrx.c revision 1.13 1 /******************************************************************************
2
3 Copyright (c) 2001-2015, Intel Corporation
4 All rights reserved.
5
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
8
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
11
12 2. Redistributions in binary form must reproduce the above copyright
13 notice, this list of conditions and the following disclaimer in the
14 documentation and/or other materials provided with the distribution.
15
16 3. Neither the name of the Intel Corporation nor the names of its
17 contributors may be used to endorse or promote products derived from
18 this software without specific prior written permission.
19
20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*
34 * Copyright (c) 2011 The NetBSD Foundation, Inc.
35 * All rights reserved.
36 *
37 * This code is derived from software contributed to The NetBSD Foundation
38 * by Coyote Point Systems, Inc.
39 *
40 * Redistribution and use in source and binary forms, with or without
41 * modification, are permitted provided that the following conditions
42 * are met:
43 * 1. Redistributions of source code must retain the above copyright
44 * notice, this list of conditions and the following disclaimer.
45 * 2. Redistributions in binary form must reproduce the above copyright
46 * notice, this list of conditions and the following disclaimer in the
47 * documentation and/or other materials provided with the distribution.
48 *
49 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
50 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
51 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
52 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
53 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
54 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
55 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
56 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
57 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
58 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
59 * POSSIBILITY OF SUCH DAMAGE.
60 */
61 /*$FreeBSD: head/sys/dev/ixgbe/ix_txrx.c 301538 2016-06-07 04:51:50Z sephe $*/
62 /*$NetBSD: ix_txrx.c,v 1.13 2016/12/16 08:24:40 msaitoh Exp $*/
63
64 #include "opt_inet.h"
65 #include "opt_inet6.h"
66
67 #include "ixgbe.h"
68
69 #ifdef DEV_NETMAP
70 #include <net/netmap.h>
71 #include <sys/selinfo.h>
72 #include <dev/netmap/netmap_kern.h>
73
74 extern int ix_crcstrip;
75 #endif
76
77 /*
78 ** HW RSC control:
79 ** this feature only works with
80 ** IPv4, and only on 82599 and later.
81 ** Also this will cause IP forwarding to
82 ** fail and that can't be controlled by
83 ** the stack as LRO can. For all these
84 ** reasons I've deemed it best to leave
85 ** this off and not bother with a tuneable
86 ** interface, this would need to be compiled
87 ** to enable.
88 */
89 static bool ixgbe_rsc_enable = FALSE;
90
91 #ifdef IXGBE_FDIR
92 /*
93 ** For Flow Director: this is the
94 ** number of TX packets we sample
95 ** for the filter pool, this means
96 ** every 20th packet will be probed.
97 **
98 ** This feature can be disabled by
99 ** setting this to 0.
100 */
101 static int atr_sample_rate = 20;
102 #endif
103
104 /*********************************************************************
105 * Local Function prototypes
106 *********************************************************************/
107 static void ixgbe_setup_transmit_ring(struct tx_ring *);
108 static void ixgbe_free_transmit_buffers(struct tx_ring *);
109 static int ixgbe_setup_receive_ring(struct rx_ring *);
110 static void ixgbe_free_receive_buffers(struct rx_ring *);
111
112 static void ixgbe_rx_checksum(u32, struct mbuf *, u32,
113 struct ixgbe_hw_stats *);
114 static void ixgbe_refresh_mbufs(struct rx_ring *, int);
115 static int ixgbe_xmit(struct tx_ring *, struct mbuf *);
116 static int ixgbe_tx_ctx_setup(struct tx_ring *,
117 struct mbuf *, u32 *, u32 *);
118 static int ixgbe_tso_setup(struct tx_ring *,
119 struct mbuf *, u32 *, u32 *);
120 #ifdef IXGBE_FDIR
121 static void ixgbe_atr(struct tx_ring *, struct mbuf *);
122 #endif
123 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
124 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
125 struct mbuf *, u32);
126
127 static void ixgbe_setup_hw_rsc(struct rx_ring *);
128
129 #ifdef IXGBE_LEGACY_TX
130 /*********************************************************************
131 * Transmit entry point
132 *
133 * ixgbe_start is called by the stack to initiate a transmit.
134 * The driver will remain in this routine as long as there are
135 * packets to transmit and transmit resources are available.
136 * In case resources are not available stack is notified and
137 * the packet is requeued.
138 **********************************************************************/
139
140 void
141 ixgbe_start_locked(struct tx_ring *txr, struct ifnet * ifp)
142 {
143 int rc;
144 struct mbuf *m_head;
145 struct adapter *adapter = txr->adapter;
146
147 IXGBE_TX_LOCK_ASSERT(txr);
148
149 if ((ifp->if_flags & IFF_RUNNING) == 0)
150 return;
151 if (!adapter->link_active)
152 return;
153
154 while (!IFQ_IS_EMPTY(&ifp->if_snd)) {
155 if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
156 break;
157
158 IFQ_POLL(&ifp->if_snd, m_head);
159 if (m_head == NULL)
160 break;
161
162 if ((rc = ixgbe_xmit(txr, m_head)) == EAGAIN) {
163 break;
164 }
165 IFQ_DEQUEUE(&ifp->if_snd, m_head);
166 if (rc == EFBIG) {
167 struct mbuf *mtmp;
168
169 if ((mtmp = m_defrag(m_head, M_NOWAIT)) != NULL) {
170 m_head = mtmp;
171 rc = ixgbe_xmit(txr, m_head);
172 if (rc != 0)
173 adapter->efbig2_tx_dma_setup.ev_count++;
174 } else
175 adapter->m_defrag_failed.ev_count++;
176 }
177 if (rc != 0) {
178 m_freem(m_head);
179 continue;
180 }
181
182 /* Send a copy of the frame to the BPF listener */
183 bpf_mtap(ifp, m_head);
184 }
185 return;
186 }
187
188 /*
189 * Legacy TX start - called by the stack, this
190 * always uses the first tx ring, and should
191 * not be used with multiqueue tx enabled.
192 */
193 void
194 ixgbe_start(struct ifnet *ifp)
195 {
196 struct adapter *adapter = ifp->if_softc;
197 struct tx_ring *txr = adapter->tx_rings;
198
199 if (ifp->if_flags & IFF_RUNNING) {
200 IXGBE_TX_LOCK(txr);
201 ixgbe_start_locked(txr, ifp);
202 IXGBE_TX_UNLOCK(txr);
203 }
204 return;
205 }
206
207 #else /* ! IXGBE_LEGACY_TX */
208
209 /*
210 ** Multiqueue Transmit Entry Point
211 ** (if_transmit function)
212 */
213 int
214 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
215 {
216 struct adapter *adapter = ifp->if_softc;
217 struct ix_queue *que;
218 struct tx_ring *txr;
219 int i, err = 0;
220 #ifdef RSS
221 uint32_t bucket_id;
222 #endif
223
224 /*
225 * When doing RSS, map it to the same outbound queue
226 * as the incoming flow would be mapped to.
227 *
228 * If everything is setup correctly, it should be the
229 * same bucket that the current CPU we're on is.
230 */
231 #if __FreeBSD_version < 1100054
232 if (m->m_flags & M_FLOWID) {
233 #else
234 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
235 #endif
236 #ifdef RSS
237 if (rss_hash2bucket(m->m_pkthdr.flowid,
238 M_HASHTYPE_GET(m), &bucket_id) == 0) {
239 /* TODO: spit out something if bucket_id > num_queues? */
240 i = bucket_id % adapter->num_queues;
241 #ifdef IXGBE_DEBUG
242 if (bucket_id > adapter->num_queues)
243 if_printf(ifp, "bucket_id (%d) > num_queues "
244 "(%d)\n", bucket_id, adapter->num_queues);
245 #endif
246 } else
247 #endif
248 i = m->m_pkthdr.flowid % adapter->num_queues;
249 } else
250 i = curcpu % adapter->num_queues;
251
252 /* Check for a hung queue and pick alternative */
253 if (((1 << i) & adapter->active_queues) == 0)
254 i = ffsl(adapter->active_queues);
255
256 txr = &adapter->tx_rings[i];
257 que = &adapter->queues[i];
258
259 err = drbr_enqueue(ifp, txr->br, m);
260 if (err)
261 return (err);
262 if (IXGBE_TX_TRYLOCK(txr)) {
263 ixgbe_mq_start_locked(ifp, txr);
264 IXGBE_TX_UNLOCK(txr);
265 } else
266 softint_schedule(txr->txq_si);
267
268 return (0);
269 }
270
271 int
272 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
273 {
274 struct adapter *adapter = txr->adapter;
275 struct mbuf *next;
276 int enqueued = 0, err = 0;
277
278 if (((ifp->if_flags & IFF_RUNNING) == 0) ||
279 adapter->link_active == 0)
280 return (ENETDOWN);
281
282 /* Process the queue */
283 #if __FreeBSD_version < 901504
284 next = drbr_dequeue(ifp, txr->br);
285 while (next != NULL) {
286 if ((err = ixgbe_xmit(txr, &next)) != 0) {
287 if (next != NULL)
288 err = drbr_enqueue(ifp, txr->br, next);
289 #else
290 while ((next = drbr_peek(ifp, txr->br)) != NULL) {
291 if ((err = ixgbe_xmit(txr, &next)) != 0) {
292 if (next == NULL) {
293 drbr_advance(ifp, txr->br);
294 } else {
295 drbr_putback(ifp, txr->br, next);
296 }
297 #endif
298 break;
299 }
300 #if __FreeBSD_version >= 901504
301 drbr_advance(ifp, txr->br);
302 #endif
303 enqueued++;
304 #if 0 // this is VF-only
305 #if __FreeBSD_version >= 1100036
306 /*
307 * Since we're looking at the tx ring, we can check
308 * to see if we're a VF by examing our tail register
309 * address.
310 */
311 if (txr->tail < IXGBE_TDT(0) && next->m_flags & M_MCAST)
312 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
313 #endif
314 #endif
315 /* Send a copy of the frame to the BPF listener */
316 bpf_mtap(ifp, next);
317 if ((ifp->if_flags & IFF_RUNNING) == 0)
318 break;
319 #if __FreeBSD_version < 901504
320 next = drbr_dequeue(ifp, txr->br);
321 #endif
322 }
323
324 if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD)
325 ixgbe_txeof(txr);
326
327 return (err);
328 }
329
330 /*
331 * Called from a taskqueue to drain queued transmit packets.
332 */
333 void
334 ixgbe_deferred_mq_start(void *arg, int pending)
335 {
336 struct tx_ring *txr = arg;
337 struct adapter *adapter = txr->adapter;
338 struct ifnet *ifp = adapter->ifp;
339
340 IXGBE_TX_LOCK(txr);
341 if (!drbr_empty(ifp, txr->br))
342 ixgbe_mq_start_locked(ifp, txr);
343 IXGBE_TX_UNLOCK(txr);
344 }
345
346 /*
347 * Flush all ring buffers
348 */
349 void
350 ixgbe_qflush(struct ifnet *ifp)
351 {
352 struct adapter *adapter = ifp->if_softc;
353 struct tx_ring *txr = adapter->tx_rings;
354 struct mbuf *m;
355
356 for (int i = 0; i < adapter->num_queues; i++, txr++) {
357 IXGBE_TX_LOCK(txr);
358 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
359 m_freem(m);
360 IXGBE_TX_UNLOCK(txr);
361 }
362 if_qflush(ifp);
363 }
364 #endif /* IXGBE_LEGACY_TX */
365
366
367 /*********************************************************************
368 *
369 * This routine maps the mbufs to tx descriptors, allowing the
370 * TX engine to transmit the packets.
371 * - return 0 on success, positive on failure
372 *
373 **********************************************************************/
374
375 static int
376 ixgbe_xmit(struct tx_ring *txr, struct mbuf *m_head)
377 {
378 struct m_tag *mtag;
379 struct adapter *adapter = txr->adapter;
380 struct ethercom *ec = &adapter->osdep.ec;
381 u32 olinfo_status = 0, cmd_type_len;
382 int i, j, error;
383 int first;
384 bus_dmamap_t map;
385 struct ixgbe_tx_buf *txbuf;
386 union ixgbe_adv_tx_desc *txd = NULL;
387
388 /* Basic descriptor defines */
389 cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
390 IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
391
392 if ((mtag = VLAN_OUTPUT_TAG(ec, m_head)) != NULL)
393 cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
394
395 /*
396 * Important to capture the first descriptor
397 * used because it will contain the index of
398 * the one we tell the hardware to report back
399 */
400 first = txr->next_avail_desc;
401 txbuf = &txr->tx_buffers[first];
402 map = txbuf->map;
403
404 /*
405 * Map the packet for DMA.
406 */
407 error = bus_dmamap_load_mbuf(txr->txtag->dt_dmat, map,
408 m_head, BUS_DMA_NOWAIT);
409
410 if (__predict_false(error)) {
411
412 switch (error) {
413 case EAGAIN:
414 adapter->eagain_tx_dma_setup.ev_count++;
415 return EAGAIN;
416 case ENOMEM:
417 adapter->enomem_tx_dma_setup.ev_count++;
418 return EAGAIN;
419 case EFBIG:
420 /*
421 * XXX Try it again?
422 * do m_defrag() and retry bus_dmamap_load_mbuf().
423 */
424 adapter->efbig_tx_dma_setup.ev_count++;
425 return error;
426 case EINVAL:
427 adapter->einval_tx_dma_setup.ev_count++;
428 return error;
429 default:
430 adapter->other_tx_dma_setup.ev_count++;
431 return error;
432 }
433 }
434
435 /* Make certain there are enough descriptors */
436 if (txr->tx_avail < (map->dm_nsegs + 2)) {
437 txr->no_desc_avail.ev_count++;
438 ixgbe_dmamap_unload(txr->txtag, txbuf->map);
439 return EAGAIN;
440 }
441
442 /*
443 * Set up the appropriate offload context
444 * this will consume the first descriptor
445 */
446 error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
447 if (__predict_false(error)) {
448 return (error);
449 }
450
451 #ifdef IXGBE_FDIR
452 /* Do the flow director magic */
453 if ((txr->atr_sample) && (!adapter->fdir_reinit)) {
454 ++txr->atr_count;
455 if (txr->atr_count >= atr_sample_rate) {
456 ixgbe_atr(txr, m_head);
457 txr->atr_count = 0;
458 }
459 }
460 #endif
461
462 olinfo_status |= IXGBE_ADVTXD_CC;
463 i = txr->next_avail_desc;
464 for (j = 0; j < map->dm_nsegs; j++) {
465 bus_size_t seglen;
466 bus_addr_t segaddr;
467
468 txbuf = &txr->tx_buffers[i];
469 txd = &txr->tx_base[i];
470 seglen = map->dm_segs[j].ds_len;
471 segaddr = htole64(map->dm_segs[j].ds_addr);
472
473 txd->read.buffer_addr = segaddr;
474 txd->read.cmd_type_len = htole32(txr->txd_cmd |
475 cmd_type_len |seglen);
476 txd->read.olinfo_status = htole32(olinfo_status);
477
478 if (++i == txr->num_desc)
479 i = 0;
480 }
481
482 txd->read.cmd_type_len |=
483 htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
484 txr->tx_avail -= map->dm_nsegs;
485 txr->next_avail_desc = i;
486
487 txbuf->m_head = m_head;
488 /*
489 * Here we swap the map so the last descriptor,
490 * which gets the completion interrupt has the
491 * real map, and the first descriptor gets the
492 * unused map from this descriptor.
493 */
494 txr->tx_buffers[first].map = txbuf->map;
495 txbuf->map = map;
496 bus_dmamap_sync(txr->txtag->dt_dmat, map, 0, m_head->m_pkthdr.len,
497 BUS_DMASYNC_PREWRITE);
498
499 /* Set the EOP descriptor that will be marked done */
500 txbuf = &txr->tx_buffers[first];
501 txbuf->eop = txd;
502
503 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
504 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
505 /*
506 * Advance the Transmit Descriptor Tail (Tdt), this tells the
507 * hardware that this frame is available to transmit.
508 */
509 ++txr->total_packets.ev_count;
510 IXGBE_WRITE_REG(&adapter->hw, txr->tail, i);
511
512 /* Mark queue as having work */
513 if (txr->busy == 0)
514 txr->busy = 1;
515
516 return 0;
517 }
518
519 /*********************************************************************
520 *
521 * Allocate memory for tx_buffer structures. The tx_buffer stores all
522 * the information needed to transmit a packet on the wire. This is
523 * called only once at attach, setup is done every reset.
524 *
525 **********************************************************************/
526 int
527 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
528 {
529 struct adapter *adapter = txr->adapter;
530 device_t dev = adapter->dev;
531 struct ixgbe_tx_buf *txbuf;
532 int error, i;
533
534 /*
535 * Setup DMA descriptor areas.
536 */
537 if ((error = ixgbe_dma_tag_create(adapter->osdep.dmat, /* parent */
538 1, 0, /* alignment, bounds */
539 IXGBE_TSO_SIZE, /* maxsize */
540 adapter->num_segs, /* nsegments */
541 PAGE_SIZE, /* maxsegsize */
542 0, /* flags */
543 &txr->txtag))) {
544 aprint_error_dev(dev,"Unable to allocate TX DMA tag\n");
545 goto fail;
546 }
547
548 if (!(txr->tx_buffers =
549 (struct ixgbe_tx_buf *) malloc(sizeof(struct ixgbe_tx_buf) *
550 adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
551 aprint_error_dev(dev, "Unable to allocate tx_buffer memory\n");
552 error = ENOMEM;
553 goto fail;
554 }
555
556 /* Create the descriptor buffer dma maps */
557 txbuf = txr->tx_buffers;
558 for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
559 error = ixgbe_dmamap_create(txr->txtag, 0, &txbuf->map);
560 if (error != 0) {
561 aprint_error_dev(dev,
562 "Unable to create TX DMA map (%d)\n", error);
563 goto fail;
564 }
565 }
566
567 return 0;
568 fail:
569 /* We free all, it handles case where we are in the middle */
570 ixgbe_free_transmit_structures(adapter);
571 return (error);
572 }
573
574 /*********************************************************************
575 *
576 * Initialize a transmit ring.
577 *
578 **********************************************************************/
579 static void
580 ixgbe_setup_transmit_ring(struct tx_ring *txr)
581 {
582 struct adapter *adapter = txr->adapter;
583 struct ixgbe_tx_buf *txbuf;
584 #ifdef DEV_NETMAP
585 struct netmap_adapter *na = NA(adapter->ifp);
586 struct netmap_slot *slot;
587 #endif /* DEV_NETMAP */
588
589 /* Clear the old ring contents */
590 IXGBE_TX_LOCK(txr);
591 #ifdef DEV_NETMAP
592 /*
593 * (under lock): if in netmap mode, do some consistency
594 * checks and set slot to entry 0 of the netmap ring.
595 */
596 slot = netmap_reset(na, NR_TX, txr->me, 0);
597 #endif /* DEV_NETMAP */
598 bzero((void *)txr->tx_base,
599 (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
600 /* Reset indices */
601 txr->next_avail_desc = 0;
602 txr->next_to_clean = 0;
603
604 /* Free any existing tx buffers. */
605 txbuf = txr->tx_buffers;
606 for (int i = 0; i < txr->num_desc; i++, txbuf++) {
607 if (txbuf->m_head != NULL) {
608 bus_dmamap_sync(txr->txtag->dt_dmat, txbuf->map,
609 0, txbuf->m_head->m_pkthdr.len,
610 BUS_DMASYNC_POSTWRITE);
611 ixgbe_dmamap_unload(txr->txtag, txbuf->map);
612 m_freem(txbuf->m_head);
613 txbuf->m_head = NULL;
614 }
615 #ifdef DEV_NETMAP
616 /*
617 * In netmap mode, set the map for the packet buffer.
618 * NOTE: Some drivers (not this one) also need to set
619 * the physical buffer address in the NIC ring.
620 * Slots in the netmap ring (indexed by "si") are
621 * kring->nkr_hwofs positions "ahead" wrt the
622 * corresponding slot in the NIC ring. In some drivers
623 * (not here) nkr_hwofs can be negative. Function
624 * netmap_idx_n2k() handles wraparounds properly.
625 */
626 if (slot) {
627 int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
628 netmap_load_map(na, txr->txtag,
629 txbuf->map, NMB(na, slot + si));
630 }
631 #endif /* DEV_NETMAP */
632 /* Clear the EOP descriptor pointer */
633 txbuf->eop = NULL;
634 }
635
636 #ifdef IXGBE_FDIR
637 /* Set the rate at which we sample packets */
638 if (adapter->hw.mac.type != ixgbe_mac_82598EB)
639 txr->atr_sample = atr_sample_rate;
640 #endif
641
642 /* Set number of descriptors available */
643 txr->tx_avail = adapter->num_tx_desc;
644
645 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
646 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
647 IXGBE_TX_UNLOCK(txr);
648 }
649
650 /*********************************************************************
651 *
652 * Initialize all transmit rings.
653 *
654 **********************************************************************/
655 int
656 ixgbe_setup_transmit_structures(struct adapter *adapter)
657 {
658 struct tx_ring *txr = adapter->tx_rings;
659
660 for (int i = 0; i < adapter->num_queues; i++, txr++)
661 ixgbe_setup_transmit_ring(txr);
662
663 return (0);
664 }
665
666 /*********************************************************************
667 *
668 * Free all transmit rings.
669 *
670 **********************************************************************/
671 void
672 ixgbe_free_transmit_structures(struct adapter *adapter)
673 {
674 struct tx_ring *txr = adapter->tx_rings;
675
676 for (int i = 0; i < adapter->num_queues; i++, txr++) {
677 ixgbe_free_transmit_buffers(txr);
678 ixgbe_dma_free(adapter, &txr->txdma);
679 IXGBE_TX_LOCK_DESTROY(txr);
680 }
681 free(adapter->tx_rings, M_DEVBUF);
682 }
683
684 /*********************************************************************
685 *
686 * Free transmit ring related data structures.
687 *
688 **********************************************************************/
689 static void
690 ixgbe_free_transmit_buffers(struct tx_ring *txr)
691 {
692 struct adapter *adapter = txr->adapter;
693 struct ixgbe_tx_buf *tx_buffer;
694 int i;
695
696 INIT_DEBUGOUT("ixgbe_free_transmit_ring: begin");
697
698 if (txr->tx_buffers == NULL)
699 return;
700
701 tx_buffer = txr->tx_buffers;
702 for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
703 if (tx_buffer->m_head != NULL) {
704 bus_dmamap_sync(txr->txtag->dt_dmat, tx_buffer->map,
705 0, tx_buffer->m_head->m_pkthdr.len,
706 BUS_DMASYNC_POSTWRITE);
707 ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
708 m_freem(tx_buffer->m_head);
709 tx_buffer->m_head = NULL;
710 if (tx_buffer->map != NULL) {
711 ixgbe_dmamap_destroy(txr->txtag,
712 tx_buffer->map);
713 tx_buffer->map = NULL;
714 }
715 } else if (tx_buffer->map != NULL) {
716 ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
717 ixgbe_dmamap_destroy(txr->txtag, tx_buffer->map);
718 tx_buffer->map = NULL;
719 }
720 }
721 #ifndef IXGBE_LEGACY_TX
722 if (txr->br != NULL)
723 buf_ring_free(txr->br, M_DEVBUF);
724 #endif
725 if (txr->tx_buffers != NULL) {
726 free(txr->tx_buffers, M_DEVBUF);
727 txr->tx_buffers = NULL;
728 }
729 if (txr->txtag != NULL) {
730 ixgbe_dma_tag_destroy(txr->txtag);
731 txr->txtag = NULL;
732 }
733 return;
734 }
735
736 /*********************************************************************
737 *
738 * Advanced Context Descriptor setup for VLAN, CSUM or TSO
739 *
740 **********************************************************************/
741
742 static int
743 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
744 u32 *cmd_type_len, u32 *olinfo_status)
745 {
746 struct adapter *adapter = txr->adapter;
747 struct ethercom *ec = &adapter->osdep.ec;
748 struct m_tag *mtag;
749 struct ixgbe_adv_tx_context_desc *TXD;
750 struct ether_vlan_header *eh;
751 #ifdef INET
752 struct ip *ip;
753 #endif
754 #ifdef INET6
755 struct ip6_hdr *ip6;
756 #endif
757 u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
758 int ehdrlen, ip_hlen = 0;
759 u16 etype;
760 u8 ipproto = 0;
761 int offload = TRUE;
762 int ctxd = txr->next_avail_desc;
763 u16 vtag = 0;
764 char *l3d;
765
766
767 /* First check if TSO is to be used */
768 if (mp->m_pkthdr.csum_flags & (M_CSUM_TSOv4|M_CSUM_TSOv6))
769 return (ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status));
770
771 if ((mp->m_pkthdr.csum_flags & M_CSUM_OFFLOAD) == 0)
772 offload = FALSE;
773
774 /* Indicate the whole packet as payload when not doing TSO */
775 *olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
776
777 /* Now ready a context descriptor */
778 TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
779
780 /*
781 ** In advanced descriptors the vlan tag must
782 ** be placed into the context descriptor. Hence
783 ** we need to make one even if not doing offloads.
784 */
785 if ((mtag = VLAN_OUTPUT_TAG(ec, mp)) != NULL) {
786 vtag = htole16(VLAN_TAG_VALUE(mtag) & 0xffff);
787 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
788 } else if (!IXGBE_IS_X550VF(adapter) && (offload == FALSE))
789 return (0);
790
791 /*
792 * Determine where frame payload starts.
793 * Jump over vlan headers if already present,
794 * helpful for QinQ too.
795 */
796 KASSERT(mp->m_len >= offsetof(struct ether_vlan_header, evl_tag));
797 eh = mtod(mp, struct ether_vlan_header *);
798 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
799 KASSERT(mp->m_len >= sizeof(struct ether_vlan_header));
800 etype = ntohs(eh->evl_proto);
801 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
802 } else {
803 etype = ntohs(eh->evl_encap_proto);
804 ehdrlen = ETHER_HDR_LEN;
805 }
806
807 /* Set the ether header length */
808 vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
809
810 if (offload == FALSE)
811 goto no_offloads;
812
813 /*
814 * If the first mbuf only includes the ethernet header, jump to the next one
815 * XXX: This assumes the stack splits mbufs containing headers on header boundaries
816 * XXX: And assumes the entire IP header is contained in one mbuf
817 */
818 if (mp->m_len == ehdrlen && mp->m_next)
819 l3d = mtod(mp->m_next, char *);
820 else
821 l3d = mtod(mp, char *) + ehdrlen;
822
823 switch (etype) {
824 #ifdef INET
825 case ETHERTYPE_IP:
826 ip = (struct ip *)(l3d);
827 ip_hlen = ip->ip_hl << 2;
828 ipproto = ip->ip_p;
829 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
830 KASSERT((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) == 0 ||
831 ip->ip_sum == 0);
832 break;
833 #endif
834 #ifdef INET6
835 case ETHERTYPE_IPV6:
836 ip6 = (struct ip6_hdr *)(l3d);
837 ip_hlen = sizeof(struct ip6_hdr);
838 ipproto = ip6->ip6_nxt;
839 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
840 break;
841 #endif
842 default:
843 offload = false;
844 break;
845 }
846
847 if ((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) != 0)
848 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
849
850 vlan_macip_lens |= ip_hlen;
851
852 /* No support for offloads for non-L4 next headers */
853 switch (ipproto) {
854 case IPPROTO_TCP:
855 if (mp->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_TCPv6))
856
857 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
858 else
859 offload = false;
860 break;
861 case IPPROTO_UDP:
862 if (mp->m_pkthdr.csum_flags & (M_CSUM_UDPv4|M_CSUM_UDPv6))
863 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
864 else
865 offload = false;
866 break;
867 default:
868 offload = false;
869 break;
870 }
871
872 if (offload) /* Insert L4 checksum into data descriptors */
873 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
874
875 no_offloads:
876 type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
877
878 /* Now copy bits into descriptor */
879 TXD->vlan_macip_lens = htole32(vlan_macip_lens);
880 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
881 TXD->seqnum_seed = htole32(0);
882 TXD->mss_l4len_idx = htole32(0);
883
884 /* We've consumed the first desc, adjust counters */
885 if (++ctxd == txr->num_desc)
886 ctxd = 0;
887 txr->next_avail_desc = ctxd;
888 --txr->tx_avail;
889
890 return 0;
891 }
892
893 /**********************************************************************
894 *
895 * Setup work for hardware segmentation offload (TSO) on
896 * adapters using advanced tx descriptors
897 *
898 **********************************************************************/
899 static int
900 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp,
901 u32 *cmd_type_len, u32 *olinfo_status)
902 {
903 struct m_tag *mtag;
904 struct adapter *adapter = txr->adapter;
905 struct ethercom *ec = &adapter->osdep.ec;
906 struct ixgbe_adv_tx_context_desc *TXD;
907 u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
908 u32 mss_l4len_idx = 0, paylen;
909 u16 vtag = 0, eh_type;
910 int ctxd, ehdrlen, ip_hlen, tcp_hlen;
911 struct ether_vlan_header *eh;
912 #ifdef INET6
913 struct ip6_hdr *ip6;
914 #endif
915 #ifdef INET
916 struct ip *ip;
917 #endif
918 struct tcphdr *th;
919
920 /*
921 * Determine where frame payload starts.
922 * Jump over vlan headers if already present
923 */
924 eh = mtod(mp, struct ether_vlan_header *);
925 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
926 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
927 eh_type = eh->evl_proto;
928 } else {
929 ehdrlen = ETHER_HDR_LEN;
930 eh_type = eh->evl_encap_proto;
931 }
932
933 switch (ntohs(eh_type)) {
934 #ifdef INET6
935 case ETHERTYPE_IPV6:
936 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
937 /* XXX-BZ For now we do not pretend to support ext. hdrs. */
938 if (ip6->ip6_nxt != IPPROTO_TCP)
939 return (ENXIO);
940 ip_hlen = sizeof(struct ip6_hdr);
941 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
942 th = (struct tcphdr *)((char *)ip6 + ip_hlen);
943 th->th_sum = in6_cksum_phdr(&ip6->ip6_src,
944 &ip6->ip6_dst, 0, htonl(IPPROTO_TCP));
945 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
946 break;
947 #endif
948 #ifdef INET
949 case ETHERTYPE_IP:
950 ip = (struct ip *)(mp->m_data + ehdrlen);
951 if (ip->ip_p != IPPROTO_TCP)
952 return (ENXIO);
953 ip->ip_sum = 0;
954 ip_hlen = ip->ip_hl << 2;
955 th = (struct tcphdr *)((char *)ip + ip_hlen);
956 th->th_sum = in_cksum_phdr(ip->ip_src.s_addr,
957 ip->ip_dst.s_addr, htons(IPPROTO_TCP));
958 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
959 /* Tell transmit desc to also do IPv4 checksum. */
960 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
961 break;
962 #endif
963 default:
964 panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
965 __func__, ntohs(eh_type));
966 break;
967 }
968
969 ctxd = txr->next_avail_desc;
970 TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
971
972 tcp_hlen = th->th_off << 2;
973
974 /* This is used in the transmit desc in encap */
975 paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
976
977 /* VLAN MACLEN IPLEN */
978 if ((mtag = VLAN_OUTPUT_TAG(ec, mp)) != NULL) {
979 vtag = htole16(VLAN_TAG_VALUE(mtag) & 0xffff);
980 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
981 }
982
983 vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
984 vlan_macip_lens |= ip_hlen;
985 TXD->vlan_macip_lens = htole32(vlan_macip_lens);
986
987 /* ADV DTYPE TUCMD */
988 type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
989 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
990 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
991
992 /* MSS L4LEN IDX */
993 mss_l4len_idx |= (mp->m_pkthdr.segsz << IXGBE_ADVTXD_MSS_SHIFT);
994 mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
995 TXD->mss_l4len_idx = htole32(mss_l4len_idx);
996
997 TXD->seqnum_seed = htole32(0);
998
999 if (++ctxd == txr->num_desc)
1000 ctxd = 0;
1001
1002 txr->tx_avail--;
1003 txr->next_avail_desc = ctxd;
1004 *cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
1005 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
1006 *olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
1007 ++txr->tso_tx.ev_count;
1008 return (0);
1009 }
1010
1011
1012 /**********************************************************************
1013 *
1014 * Examine each tx_buffer in the used queue. If the hardware is done
1015 * processing the packet then free associated resources. The
1016 * tx_buffer is put back on the free queue.
1017 *
1018 **********************************************************************/
1019 void
1020 ixgbe_txeof(struct tx_ring *txr)
1021 {
1022 struct adapter *adapter = txr->adapter;
1023 struct ifnet *ifp = adapter->ifp;
1024 u32 work, processed = 0;
1025 u32 limit = adapter->tx_process_limit;
1026 struct ixgbe_tx_buf *buf;
1027 union ixgbe_adv_tx_desc *txd;
1028
1029 KASSERT(mutex_owned(&txr->tx_mtx));
1030
1031 #ifdef DEV_NETMAP
1032 if (ifp->if_capenable & IFCAP_NETMAP) {
1033 struct netmap_adapter *na = NA(ifp);
1034 struct netmap_kring *kring = &na->tx_rings[txr->me];
1035 txd = txr->tx_base;
1036 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1037 BUS_DMASYNC_POSTREAD);
1038 /*
1039 * In netmap mode, all the work is done in the context
1040 * of the client thread. Interrupt handlers only wake up
1041 * clients, which may be sleeping on individual rings
1042 * or on a global resource for all rings.
1043 * To implement tx interrupt mitigation, we wake up the client
1044 * thread roughly every half ring, even if the NIC interrupts
1045 * more frequently. This is implemented as follows:
1046 * - ixgbe_txsync() sets kring->nr_kflags with the index of
1047 * the slot that should wake up the thread (nkr_num_slots
1048 * means the user thread should not be woken up);
1049 * - the driver ignores tx interrupts unless netmap_mitigate=0
1050 * or the slot has the DD bit set.
1051 */
1052 if (!netmap_mitigate ||
1053 (kring->nr_kflags < kring->nkr_num_slots &&
1054 txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD)) {
1055 netmap_tx_irq(ifp, txr->me);
1056 }
1057 return;
1058 }
1059 #endif /* DEV_NETMAP */
1060
1061 if (txr->tx_avail == txr->num_desc) {
1062 txr->busy = 0;
1063 return;
1064 }
1065
1066 /* Get work starting point */
1067 work = txr->next_to_clean;
1068 buf = &txr->tx_buffers[work];
1069 txd = &txr->tx_base[work];
1070 work -= txr->num_desc; /* The distance to ring end */
1071 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1072 BUS_DMASYNC_POSTREAD);
1073
1074 do {
1075 union ixgbe_adv_tx_desc *eop = buf->eop;
1076 if (eop == NULL) /* No work */
1077 break;
1078
1079 if ((eop->wb.status & IXGBE_TXD_STAT_DD) == 0)
1080 break; /* I/O not complete */
1081
1082 if (buf->m_head) {
1083 txr->bytes +=
1084 buf->m_head->m_pkthdr.len;
1085 bus_dmamap_sync(txr->txtag->dt_dmat,
1086 buf->map,
1087 0, buf->m_head->m_pkthdr.len,
1088 BUS_DMASYNC_POSTWRITE);
1089 ixgbe_dmamap_unload(txr->txtag,
1090 buf->map);
1091 m_freem(buf->m_head);
1092 buf->m_head = NULL;
1093 }
1094 buf->eop = NULL;
1095 ++txr->tx_avail;
1096
1097 /* We clean the range if multi segment */
1098 while (txd != eop) {
1099 ++txd;
1100 ++buf;
1101 ++work;
1102 /* wrap the ring? */
1103 if (__predict_false(!work)) {
1104 work -= txr->num_desc;
1105 buf = txr->tx_buffers;
1106 txd = txr->tx_base;
1107 }
1108 if (buf->m_head) {
1109 txr->bytes +=
1110 buf->m_head->m_pkthdr.len;
1111 bus_dmamap_sync(txr->txtag->dt_dmat,
1112 buf->map,
1113 0, buf->m_head->m_pkthdr.len,
1114 BUS_DMASYNC_POSTWRITE);
1115 ixgbe_dmamap_unload(txr->txtag,
1116 buf->map);
1117 m_freem(buf->m_head);
1118 buf->m_head = NULL;
1119 }
1120 ++txr->tx_avail;
1121 buf->eop = NULL;
1122
1123 }
1124 ++txr->packets;
1125 ++processed;
1126 ++ifp->if_opackets;
1127
1128 /* Try the next packet */
1129 ++txd;
1130 ++buf;
1131 ++work;
1132 /* reset with a wrap */
1133 if (__predict_false(!work)) {
1134 work -= txr->num_desc;
1135 buf = txr->tx_buffers;
1136 txd = txr->tx_base;
1137 }
1138 prefetch(txd);
1139 } while (__predict_true(--limit));
1140
1141 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1142 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1143
1144 work += txr->num_desc;
1145 txr->next_to_clean = work;
1146
1147 /*
1148 ** Queue Hang detection, we know there's
1149 ** work outstanding or the first return
1150 ** would have been taken, so increment busy
1151 ** if nothing managed to get cleaned, then
1152 ** in local_timer it will be checked and
1153 ** marked as HUNG if it exceeds a MAX attempt.
1154 */
1155 if ((processed == 0) && (txr->busy != IXGBE_QUEUE_HUNG))
1156 ++txr->busy;
1157 /*
1158 ** If anything gets cleaned we reset state to 1,
1159 ** note this will turn off HUNG if its set.
1160 */
1161 if (processed)
1162 txr->busy = 1;
1163
1164 if (txr->tx_avail == txr->num_desc)
1165 txr->busy = 0;
1166
1167 return;
1168 }
1169
1170
1171 #ifdef IXGBE_FDIR
1172 /*
1173 ** This routine parses packet headers so that Flow
1174 ** Director can make a hashed filter table entry
1175 ** allowing traffic flows to be identified and kept
1176 ** on the same cpu. This would be a performance
1177 ** hit, but we only do it at IXGBE_FDIR_RATE of
1178 ** packets.
1179 */
1180 static void
1181 ixgbe_atr(struct tx_ring *txr, struct mbuf *mp)
1182 {
1183 struct adapter *adapter = txr->adapter;
1184 struct ix_queue *que;
1185 struct ip *ip;
1186 struct tcphdr *th;
1187 struct udphdr *uh;
1188 struct ether_vlan_header *eh;
1189 union ixgbe_atr_hash_dword input = {.dword = 0};
1190 union ixgbe_atr_hash_dword common = {.dword = 0};
1191 int ehdrlen, ip_hlen;
1192 u16 etype;
1193
1194 eh = mtod(mp, struct ether_vlan_header *);
1195 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
1196 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
1197 etype = eh->evl_proto;
1198 } else {
1199 ehdrlen = ETHER_HDR_LEN;
1200 etype = eh->evl_encap_proto;
1201 }
1202
1203 /* Only handling IPv4 */
1204 if (etype != htons(ETHERTYPE_IP))
1205 return;
1206
1207 ip = (struct ip *)(mp->m_data + ehdrlen);
1208 ip_hlen = ip->ip_hl << 2;
1209
1210 /* check if we're UDP or TCP */
1211 switch (ip->ip_p) {
1212 case IPPROTO_TCP:
1213 th = (struct tcphdr *)((char *)ip + ip_hlen);
1214 /* src and dst are inverted */
1215 common.port.dst ^= th->th_sport;
1216 common.port.src ^= th->th_dport;
1217 input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_TCPV4;
1218 break;
1219 case IPPROTO_UDP:
1220 uh = (struct udphdr *)((char *)ip + ip_hlen);
1221 /* src and dst are inverted */
1222 common.port.dst ^= uh->uh_sport;
1223 common.port.src ^= uh->uh_dport;
1224 input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_UDPV4;
1225 break;
1226 default:
1227 return;
1228 }
1229
1230 input.formatted.vlan_id = htobe16(mp->m_pkthdr.ether_vtag);
1231 if (mp->m_pkthdr.ether_vtag)
1232 common.flex_bytes ^= htons(ETHERTYPE_VLAN);
1233 else
1234 common.flex_bytes ^= etype;
1235 common.ip ^= ip->ip_src.s_addr ^ ip->ip_dst.s_addr;
1236
1237 que = &adapter->queues[txr->me];
1238 /*
1239 ** This assumes the Rx queue and Tx
1240 ** queue are bound to the same CPU
1241 */
1242 ixgbe_fdir_add_signature_filter_82599(&adapter->hw,
1243 input, common, que->msix);
1244 }
1245 #endif /* IXGBE_FDIR */
1246
1247 /*
1248 ** Used to detect a descriptor that has
1249 ** been merged by Hardware RSC.
1250 */
1251 static inline u32
1252 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1253 {
1254 return (le32toh(rx->wb.lower.lo_dword.data) &
1255 IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1256 }
1257
1258 /*********************************************************************
1259 *
1260 * Initialize Hardware RSC (LRO) feature on 82599
1261 * for an RX ring, this is toggled by the LRO capability
1262 * even though it is transparent to the stack.
1263 *
1264 * NOTE: since this HW feature only works with IPV4 and
1265 * our testing has shown soft LRO to be as effective
1266 * I have decided to disable this by default.
1267 *
1268 **********************************************************************/
1269 static void
1270 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
1271 {
1272 struct adapter *adapter = rxr->adapter;
1273 struct ixgbe_hw *hw = &adapter->hw;
1274 u32 rscctrl, rdrxctl;
1275
1276 /* If turning LRO/RSC off we need to disable it */
1277 if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) {
1278 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1279 rscctrl &= ~IXGBE_RSCCTL_RSCEN;
1280 return;
1281 }
1282
1283 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
1284 rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
1285 #ifdef DEV_NETMAP /* crcstrip is optional in netmap */
1286 if (adapter->ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip)
1287 #endif /* DEV_NETMAP */
1288 rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
1289 rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
1290 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
1291
1292 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1293 rscctrl |= IXGBE_RSCCTL_RSCEN;
1294 /*
1295 ** Limit the total number of descriptors that
1296 ** can be combined, so it does not exceed 64K
1297 */
1298 if (rxr->mbuf_sz == MCLBYTES)
1299 rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
1300 else if (rxr->mbuf_sz == MJUMPAGESIZE)
1301 rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
1302 else if (rxr->mbuf_sz == MJUM9BYTES)
1303 rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
1304 else /* Using 16K cluster */
1305 rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
1306
1307 IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
1308
1309 /* Enable TCP header recognition */
1310 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
1311 (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) |
1312 IXGBE_PSRTYPE_TCPHDR));
1313
1314 /* Disable RSC for ACK packets */
1315 IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
1316 (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
1317
1318 rxr->hw_rsc = TRUE;
1319 }
1320
1321 /*********************************************************************
1322 *
1323 * Refresh mbuf buffers for RX descriptor rings
1324 * - now keeps its own state so discards due to resource
1325 * exhaustion are unnecessary, if an mbuf cannot be obtained
1326 * it just returns, keeping its placeholder, thus it can simply
1327 * be recalled to try again.
1328 *
1329 **********************************************************************/
1330 static void
1331 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
1332 {
1333 struct adapter *adapter = rxr->adapter;
1334 struct ixgbe_rx_buf *rxbuf;
1335 struct mbuf *mp;
1336 int i, j, error;
1337 bool refreshed = false;
1338
1339 i = j = rxr->next_to_refresh;
1340 /* Control the loop with one beyond */
1341 if (++j == rxr->num_desc)
1342 j = 0;
1343
1344 while (j != limit) {
1345 rxbuf = &rxr->rx_buffers[i];
1346 if (rxbuf->buf == NULL) {
1347 mp = ixgbe_getjcl(&adapter->jcl_head, M_NOWAIT,
1348 MT_DATA, M_PKTHDR, rxr->mbuf_sz);
1349 if (mp == NULL) {
1350 rxr->no_jmbuf.ev_count++;
1351 goto update;
1352 }
1353 if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
1354 m_adj(mp, ETHER_ALIGN);
1355 } else
1356 mp = rxbuf->buf;
1357
1358 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1359
1360 /* If we're dealing with an mbuf that was copied rather
1361 * than replaced, there's no need to go through busdma.
1362 */
1363 if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
1364 /* Get the memory mapping */
1365 ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
1366 error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat,
1367 rxbuf->pmap, mp, BUS_DMA_NOWAIT);
1368 if (error != 0) {
1369 printf("Refresh mbufs: payload dmamap load"
1370 " failure - %d\n", error);
1371 m_free(mp);
1372 rxbuf->buf = NULL;
1373 goto update;
1374 }
1375 rxbuf->buf = mp;
1376 bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
1377 0, mp->m_pkthdr.len, BUS_DMASYNC_PREREAD);
1378 rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
1379 htole64(rxbuf->pmap->dm_segs[0].ds_addr);
1380 } else {
1381 rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
1382 rxbuf->flags &= ~IXGBE_RX_COPY;
1383 }
1384
1385 refreshed = true;
1386 /* Next is precalculated */
1387 i = j;
1388 rxr->next_to_refresh = i;
1389 if (++j == rxr->num_desc)
1390 j = 0;
1391 }
1392 update:
1393 if (refreshed) /* Update hardware tail index */
1394 IXGBE_WRITE_REG(&adapter->hw,
1395 rxr->tail, rxr->next_to_refresh);
1396 return;
1397 }
1398
1399 /*********************************************************************
1400 *
1401 * Allocate memory for rx_buffer structures. Since we use one
1402 * rx_buffer per received packet, the maximum number of rx_buffer's
1403 * that we'll need is equal to the number of receive descriptors
1404 * that we've allocated.
1405 *
1406 **********************************************************************/
1407 int
1408 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
1409 {
1410 struct adapter *adapter = rxr->adapter;
1411 device_t dev = adapter->dev;
1412 struct ixgbe_rx_buf *rxbuf;
1413 int bsize, error;
1414
1415 bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
1416 if (!(rxr->rx_buffers =
1417 (struct ixgbe_rx_buf *) malloc(bsize,
1418 M_DEVBUF, M_NOWAIT | M_ZERO))) {
1419 aprint_error_dev(dev, "Unable to allocate rx_buffer memory\n");
1420 error = ENOMEM;
1421 goto fail;
1422 }
1423
1424 if ((error = ixgbe_dma_tag_create(adapter->osdep.dmat, /* parent */
1425 1, 0, /* alignment, bounds */
1426 MJUM16BYTES, /* maxsize */
1427 1, /* nsegments */
1428 MJUM16BYTES, /* maxsegsize */
1429 0, /* flags */
1430 &rxr->ptag))) {
1431 aprint_error_dev(dev, "Unable to create RX DMA tag\n");
1432 goto fail;
1433 }
1434
1435 for (int i = 0; i < rxr->num_desc; i++, rxbuf++) {
1436 rxbuf = &rxr->rx_buffers[i];
1437 error = ixgbe_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
1438 if (error) {
1439 aprint_error_dev(dev, "Unable to create RX dma map\n");
1440 goto fail;
1441 }
1442 }
1443
1444 return (0);
1445
1446 fail:
1447 /* Frees all, but can handle partial completion */
1448 ixgbe_free_receive_structures(adapter);
1449 return (error);
1450 }
1451
1452 static void
1453 ixgbe_free_receive_ring(struct rx_ring *rxr)
1454 {
1455 struct ixgbe_rx_buf *rxbuf;
1456
1457 for (int i = 0; i < rxr->num_desc; i++) {
1458 rxbuf = &rxr->rx_buffers[i];
1459 if (rxbuf->buf != NULL) {
1460 bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
1461 0, rxbuf->buf->m_pkthdr.len,
1462 BUS_DMASYNC_POSTREAD);
1463 ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
1464 rxbuf->buf->m_flags |= M_PKTHDR;
1465 m_freem(rxbuf->buf);
1466 rxbuf->buf = NULL;
1467 rxbuf->flags = 0;
1468 }
1469 }
1470 }
1471
1472 /*********************************************************************
1473 *
1474 * Initialize a receive ring and its buffers.
1475 *
1476 **********************************************************************/
1477 static int
1478 ixgbe_setup_receive_ring(struct rx_ring *rxr)
1479 {
1480 struct adapter *adapter;
1481 struct ixgbe_rx_buf *rxbuf;
1482 #ifdef LRO
1483 struct ifnet *ifp;
1484 struct lro_ctrl *lro = &rxr->lro;
1485 #endif /* LRO */
1486 int rsize, error = 0;
1487 #ifdef DEV_NETMAP
1488 struct netmap_adapter *na = NA(rxr->adapter->ifp);
1489 struct netmap_slot *slot;
1490 #endif /* DEV_NETMAP */
1491
1492 adapter = rxr->adapter;
1493 #ifdef LRO
1494 ifp = adapter->ifp;
1495 #endif /* LRO */
1496
1497 /* Clear the ring contents */
1498 IXGBE_RX_LOCK(rxr);
1499 #ifdef DEV_NETMAP
1500 /* same as in ixgbe_setup_transmit_ring() */
1501 slot = netmap_reset(na, NR_RX, rxr->me, 0);
1502 #endif /* DEV_NETMAP */
1503 rsize = roundup2(adapter->num_rx_desc *
1504 sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
1505 bzero((void *)rxr->rx_base, rsize);
1506 /* Cache the size */
1507 rxr->mbuf_sz = adapter->rx_mbuf_sz;
1508
1509 /* Free current RX buffer structs and their mbufs */
1510 ixgbe_free_receive_ring(rxr);
1511
1512 IXGBE_RX_UNLOCK(rxr);
1513
1514 /* Now reinitialize our supply of jumbo mbufs. The number
1515 * or size of jumbo mbufs may have changed.
1516 */
1517 ixgbe_jcl_reinit(&adapter->jcl_head, rxr->ptag->dt_dmat,
1518 2 * adapter->num_rx_desc, adapter->rx_mbuf_sz);
1519
1520 IXGBE_RX_LOCK(rxr);
1521
1522 /* Now replenish the mbufs */
1523 for (int j = 0; j != rxr->num_desc; ++j) {
1524 struct mbuf *mp;
1525
1526 rxbuf = &rxr->rx_buffers[j];
1527 #ifdef DEV_NETMAP
1528 /*
1529 * In netmap mode, fill the map and set the buffer
1530 * address in the NIC ring, considering the offset
1531 * between the netmap and NIC rings (see comment in
1532 * ixgbe_setup_transmit_ring() ). No need to allocate
1533 * an mbuf, so end the block with a continue;
1534 */
1535 if (slot) {
1536 int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
1537 uint64_t paddr;
1538 void *addr;
1539
1540 addr = PNMB(na, slot + sj, &paddr);
1541 netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
1542 /* Update descriptor and the cached value */
1543 rxr->rx_base[j].read.pkt_addr = htole64(paddr);
1544 rxbuf->addr = htole64(paddr);
1545 continue;
1546 }
1547 #endif /* DEV_NETMAP */
1548 rxbuf->flags = 0;
1549 rxbuf->buf = ixgbe_getjcl(&adapter->jcl_head, M_NOWAIT,
1550 MT_DATA, M_PKTHDR, adapter->rx_mbuf_sz);
1551 if (rxbuf->buf == NULL) {
1552 error = ENOBUFS;
1553 goto fail;
1554 }
1555 mp = rxbuf->buf;
1556 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1557 /* Get the memory mapping */
1558 error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat,
1559 rxbuf->pmap, mp, BUS_DMA_NOWAIT);
1560 if (error != 0)
1561 goto fail;
1562 bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
1563 0, adapter->rx_mbuf_sz, BUS_DMASYNC_PREREAD);
1564 /* Update the descriptor and the cached value */
1565 rxr->rx_base[j].read.pkt_addr =
1566 htole64(rxbuf->pmap->dm_segs[0].ds_addr);
1567 rxbuf->addr = htole64(rxbuf->pmap->dm_segs[0].ds_addr);
1568 }
1569
1570
1571 /* Setup our descriptor indices */
1572 rxr->next_to_check = 0;
1573 rxr->next_to_refresh = 0;
1574 rxr->lro_enabled = FALSE;
1575 rxr->rx_copies.ev_count = 0;
1576 #if 0 /* NetBSD */
1577 rxr->rx_bytes.ev_count = 0;
1578 #if 1 /* Fix inconsistency */
1579 rxr->rx_packets.ev_count = 0;
1580 #endif
1581 #endif
1582 rxr->vtag_strip = FALSE;
1583
1584 ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1585 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1586
1587 /*
1588 ** Now set up the LRO interface:
1589 */
1590 if (ixgbe_rsc_enable)
1591 ixgbe_setup_hw_rsc(rxr);
1592 #ifdef LRO
1593 else if (ifp->if_capenable & IFCAP_LRO) {
1594 device_t dev = adapter->dev;
1595 int err = tcp_lro_init(lro);
1596 if (err) {
1597 device_printf(dev, "LRO Initialization failed!\n");
1598 goto fail;
1599 }
1600 INIT_DEBUGOUT("RX Soft LRO Initialized\n");
1601 rxr->lro_enabled = TRUE;
1602 lro->ifp = adapter->ifp;
1603 }
1604 #endif /* LRO */
1605
1606 IXGBE_RX_UNLOCK(rxr);
1607 return (0);
1608
1609 fail:
1610 ixgbe_free_receive_ring(rxr);
1611 IXGBE_RX_UNLOCK(rxr);
1612 return (error);
1613 }
1614
1615 /*********************************************************************
1616 *
1617 * Initialize all receive rings.
1618 *
1619 **********************************************************************/
1620 int
1621 ixgbe_setup_receive_structures(struct adapter *adapter)
1622 {
1623 struct rx_ring *rxr = adapter->rx_rings;
1624 int j;
1625
1626 for (j = 0; j < adapter->num_queues; j++, rxr++)
1627 if (ixgbe_setup_receive_ring(rxr))
1628 goto fail;
1629
1630 return (0);
1631 fail:
1632 /*
1633 * Free RX buffers allocated so far, we will only handle
1634 * the rings that completed, the failing case will have
1635 * cleaned up for itself. 'j' failed, so its the terminus.
1636 */
1637 for (int i = 0; i < j; ++i) {
1638 rxr = &adapter->rx_rings[i];
1639 ixgbe_free_receive_ring(rxr);
1640 }
1641
1642 return (ENOBUFS);
1643 }
1644
1645
1646 /*********************************************************************
1647 *
1648 * Free all receive rings.
1649 *
1650 **********************************************************************/
1651 void
1652 ixgbe_free_receive_structures(struct adapter *adapter)
1653 {
1654 struct rx_ring *rxr = adapter->rx_rings;
1655
1656 INIT_DEBUGOUT("ixgbe_free_receive_structures: begin");
1657
1658 for (int i = 0; i < adapter->num_queues; i++, rxr++) {
1659 #ifdef LRO
1660 struct lro_ctrl *lro = &rxr->lro;
1661 #endif /* LRO */
1662 ixgbe_free_receive_buffers(rxr);
1663 #ifdef LRO
1664 /* Free LRO memory */
1665 tcp_lro_free(lro);
1666 #endif /* LRO */
1667 /* Free the ring memory as well */
1668 ixgbe_dma_free(adapter, &rxr->rxdma);
1669 IXGBE_RX_LOCK_DESTROY(rxr);
1670 }
1671
1672 free(adapter->rx_rings, M_DEVBUF);
1673 }
1674
1675
1676 /*********************************************************************
1677 *
1678 * Free receive ring data structures
1679 *
1680 **********************************************************************/
1681 static void
1682 ixgbe_free_receive_buffers(struct rx_ring *rxr)
1683 {
1684 struct adapter *adapter = rxr->adapter;
1685 struct ixgbe_rx_buf *rxbuf;
1686
1687 INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin");
1688
1689 /* Cleanup any existing buffers */
1690 if (rxr->rx_buffers != NULL) {
1691 for (int i = 0; i < adapter->num_rx_desc; i++) {
1692 rxbuf = &rxr->rx_buffers[i];
1693 if (rxbuf->buf != NULL) {
1694 bus_dmamap_sync(rxr->ptag->dt_dmat,
1695 rxbuf->pmap, 0, rxbuf->buf->m_pkthdr.len,
1696 BUS_DMASYNC_POSTREAD);
1697 ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
1698 rxbuf->buf->m_flags |= M_PKTHDR;
1699 m_freem(rxbuf->buf);
1700 }
1701 rxbuf->buf = NULL;
1702 if (rxbuf->pmap != NULL) {
1703 ixgbe_dmamap_destroy(rxr->ptag, rxbuf->pmap);
1704 rxbuf->pmap = NULL;
1705 }
1706 }
1707 if (rxr->rx_buffers != NULL) {
1708 free(rxr->rx_buffers, M_DEVBUF);
1709 rxr->rx_buffers = NULL;
1710 }
1711 }
1712
1713 if (rxr->ptag != NULL) {
1714 ixgbe_dma_tag_destroy(rxr->ptag);
1715 rxr->ptag = NULL;
1716 }
1717
1718 return;
1719 }
1720
1721 static __inline void
1722 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
1723 {
1724 int s;
1725
1726 #ifdef LRO
1727 struct adapter *adapter = ifp->if_softc;
1728 struct ethercom *ec = &adapter->osdep.ec;
1729
1730 /*
1731 * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
1732 * should be computed by hardware. Also it should not have VLAN tag in
1733 * ethernet header. In case of IPv6 we do not yet support ext. hdrs.
1734 */
1735 if (rxr->lro_enabled &&
1736 (ec->ec_capenable & ETHERCAP_VLAN_HWTAGGING) != 0 &&
1737 (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
1738 ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1739 (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
1740 (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1741 (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
1742 (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
1743 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
1744 /*
1745 * Send to the stack if:
1746 ** - LRO not enabled, or
1747 ** - no LRO resources, or
1748 ** - lro enqueue fails
1749 */
1750 if (rxr->lro.lro_cnt != 0)
1751 if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
1752 return;
1753 }
1754 #endif /* LRO */
1755
1756 IXGBE_RX_UNLOCK(rxr);
1757
1758 s = splnet();
1759 if_input(ifp, m);
1760 splx(s);
1761
1762 IXGBE_RX_LOCK(rxr);
1763 }
1764
1765 static __inline void
1766 ixgbe_rx_discard(struct rx_ring *rxr, int i)
1767 {
1768 struct ixgbe_rx_buf *rbuf;
1769
1770 rbuf = &rxr->rx_buffers[i];
1771
1772
1773 /*
1774 ** With advanced descriptors the writeback
1775 ** clobbers the buffer addrs, so its easier
1776 ** to just free the existing mbufs and take
1777 ** the normal refresh path to get new buffers
1778 ** and mapping.
1779 */
1780
1781 if (rbuf->buf != NULL) {/* Partial chain ? */
1782 rbuf->fmp->m_flags |= M_PKTHDR;
1783 m_freem(rbuf->fmp);
1784 rbuf->fmp = NULL;
1785 rbuf->buf = NULL; /* rbuf->buf is part of fmp's chain */
1786 } else if (rbuf->buf) {
1787 m_free(rbuf->buf);
1788 rbuf->buf = NULL;
1789 }
1790 ixgbe_dmamap_unload(rxr->ptag, rbuf->pmap);
1791
1792 rbuf->flags = 0;
1793
1794 return;
1795 }
1796
1797
1798 /*********************************************************************
1799 *
1800 * This routine executes in interrupt context. It replenishes
1801 * the mbufs in the descriptor and sends data which has been
1802 * dma'ed into host memory to upper layer.
1803 *
1804 * Return TRUE for more work, FALSE for all clean.
1805 *********************************************************************/
1806 bool
1807 ixgbe_rxeof(struct ix_queue *que)
1808 {
1809 struct adapter *adapter = que->adapter;
1810 struct rx_ring *rxr = que->rxr;
1811 struct ifnet *ifp = adapter->ifp;
1812 #ifdef LRO
1813 struct lro_ctrl *lro = &rxr->lro;
1814 #endif /* LRO */
1815 int i, nextp, processed = 0;
1816 u32 staterr = 0;
1817 u32 count = adapter->rx_process_limit;
1818 union ixgbe_adv_rx_desc *cur;
1819 struct ixgbe_rx_buf *rbuf, *nbuf;
1820 #ifdef RSS
1821 u16 pkt_info;
1822 #endif
1823
1824 IXGBE_RX_LOCK(rxr);
1825
1826 #ifdef DEV_NETMAP
1827 /* Same as the txeof routine: wakeup clients on intr. */
1828 if (netmap_rx_irq(ifp, rxr->me, &processed)) {
1829 IXGBE_RX_UNLOCK(rxr);
1830 return (FALSE);
1831 }
1832 #endif /* DEV_NETMAP */
1833
1834 for (i = rxr->next_to_check; count != 0;) {
1835 struct mbuf *sendmp, *mp;
1836 u32 rsc, ptype;
1837 u16 len;
1838 u16 vtag = 0;
1839 bool eop;
1840
1841 /* Sync the ring. */
1842 ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1843 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1844
1845 cur = &rxr->rx_base[i];
1846 staterr = le32toh(cur->wb.upper.status_error);
1847 #ifdef RSS
1848 pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
1849 #endif
1850
1851 if ((staterr & IXGBE_RXD_STAT_DD) == 0)
1852 break;
1853 if ((ifp->if_flags & IFF_RUNNING) == 0)
1854 break;
1855
1856 count--;
1857 sendmp = NULL;
1858 nbuf = NULL;
1859 rsc = 0;
1860 cur->wb.upper.status_error = 0;
1861 rbuf = &rxr->rx_buffers[i];
1862 mp = rbuf->buf;
1863
1864 len = le16toh(cur->wb.upper.length);
1865 ptype = le32toh(cur->wb.lower.lo_dword.data) &
1866 IXGBE_RXDADV_PKTTYPE_MASK;
1867 eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
1868
1869 /* Make sure bad packets are discarded */
1870 if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) {
1871 #if __FreeBSD_version >= 1100036
1872 if (IXGBE_IS_VF(adapter))
1873 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
1874 #endif
1875 rxr->rx_discarded.ev_count++;
1876 ixgbe_rx_discard(rxr, i);
1877 goto next_desc;
1878 }
1879
1880 /*
1881 ** On 82599 which supports a hardware
1882 ** LRO (called HW RSC), packets need
1883 ** not be fragmented across sequential
1884 ** descriptors, rather the next descriptor
1885 ** is indicated in bits of the descriptor.
1886 ** This also means that we might proceses
1887 ** more than one packet at a time, something
1888 ** that has never been true before, it
1889 ** required eliminating global chain pointers
1890 ** in favor of what we are doing here. -jfv
1891 */
1892 if (!eop) {
1893 /*
1894 ** Figure out the next descriptor
1895 ** of this frame.
1896 */
1897 if (rxr->hw_rsc == TRUE) {
1898 rsc = ixgbe_rsc_count(cur);
1899 rxr->rsc_num += (rsc - 1);
1900 }
1901 if (rsc) { /* Get hardware index */
1902 nextp = ((staterr &
1903 IXGBE_RXDADV_NEXTP_MASK) >>
1904 IXGBE_RXDADV_NEXTP_SHIFT);
1905 } else { /* Just sequential */
1906 nextp = i + 1;
1907 if (nextp == adapter->num_rx_desc)
1908 nextp = 0;
1909 }
1910 nbuf = &rxr->rx_buffers[nextp];
1911 prefetch(nbuf);
1912 }
1913 /*
1914 ** Rather than using the fmp/lmp global pointers
1915 ** we now keep the head of a packet chain in the
1916 ** buffer struct and pass this along from one
1917 ** descriptor to the next, until we get EOP.
1918 */
1919 mp->m_len = len;
1920 /*
1921 ** See if there is a stored head
1922 ** that determines what we are
1923 */
1924 sendmp = rbuf->fmp;
1925 if (sendmp != NULL) { /* secondary frag */
1926 rbuf->buf = rbuf->fmp = NULL;
1927 mp->m_flags &= ~M_PKTHDR;
1928 sendmp->m_pkthdr.len += mp->m_len;
1929 } else {
1930 /*
1931 * Optimize. This might be a small packet,
1932 * maybe just a TCP ACK. Do a fast copy that
1933 * is cache aligned into a new mbuf, and
1934 * leave the old mbuf+cluster for re-use.
1935 */
1936 if (eop && len <= IXGBE_RX_COPY_LEN) {
1937 sendmp = m_gethdr(M_NOWAIT, MT_DATA);
1938 if (sendmp != NULL) {
1939 sendmp->m_data +=
1940 IXGBE_RX_COPY_ALIGN;
1941 ixgbe_bcopy(mp->m_data,
1942 sendmp->m_data, len);
1943 sendmp->m_len = len;
1944 rxr->rx_copies.ev_count++;
1945 rbuf->flags |= IXGBE_RX_COPY;
1946 }
1947 }
1948 if (sendmp == NULL) {
1949 rbuf->buf = rbuf->fmp = NULL;
1950 sendmp = mp;
1951 }
1952
1953 /* first desc of a non-ps chain */
1954 sendmp->m_flags |= M_PKTHDR;
1955 sendmp->m_pkthdr.len = mp->m_len;
1956 }
1957 ++processed;
1958
1959 /* Pass the head pointer on */
1960 if (eop == 0) {
1961 nbuf->fmp = sendmp;
1962 sendmp = NULL;
1963 mp->m_next = nbuf->buf;
1964 } else { /* Sending this frame */
1965 m_set_rcvif(sendmp, ifp);
1966 ifp->if_ipackets++;
1967 rxr->rx_packets.ev_count++;
1968 /* capture data for AIM */
1969 rxr->bytes += sendmp->m_pkthdr.len;
1970 rxr->rx_bytes.ev_count += sendmp->m_pkthdr.len;
1971 /* Process vlan info */
1972 if ((rxr->vtag_strip) &&
1973 (staterr & IXGBE_RXD_STAT_VP))
1974 vtag = le16toh(cur->wb.upper.vlan);
1975 if (vtag) {
1976 VLAN_INPUT_TAG(ifp, sendmp, vtag,
1977 printf("%s: could not apply VLAN "
1978 "tag", __func__));
1979 }
1980 if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) {
1981 ixgbe_rx_checksum(staterr, sendmp, ptype,
1982 &adapter->stats.pf);
1983 }
1984
1985 #if 0 /* FreeBSD */
1986 /*
1987 * In case of multiqueue, we have RXCSUM.PCSD bit set
1988 * and never cleared. This means we have RSS hash
1989 * available to be used.
1990 */
1991 if (adapter->num_queues > 1) {
1992 sendmp->m_pkthdr.flowid =
1993 le32toh(cur->wb.lower.hi_dword.rss);
1994 switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) {
1995 case IXGBE_RXDADV_RSSTYPE_IPV4:
1996 M_HASHTYPE_SET(sendmp,
1997 M_HASHTYPE_RSS_IPV4);
1998 break;
1999 case IXGBE_RXDADV_RSSTYPE_IPV4_TCP:
2000 M_HASHTYPE_SET(sendmp,
2001 M_HASHTYPE_RSS_TCP_IPV4);
2002 break;
2003 case IXGBE_RXDADV_RSSTYPE_IPV6:
2004 M_HASHTYPE_SET(sendmp,
2005 M_HASHTYPE_RSS_IPV6);
2006 break;
2007 case IXGBE_RXDADV_RSSTYPE_IPV6_TCP:
2008 M_HASHTYPE_SET(sendmp,
2009 M_HASHTYPE_RSS_TCP_IPV6);
2010 break;
2011 case IXGBE_RXDADV_RSSTYPE_IPV6_EX:
2012 M_HASHTYPE_SET(sendmp,
2013 M_HASHTYPE_RSS_IPV6_EX);
2014 break;
2015 case IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX:
2016 M_HASHTYPE_SET(sendmp,
2017 M_HASHTYPE_RSS_TCP_IPV6_EX);
2018 break;
2019 #if __FreeBSD_version > 1100000
2020 case IXGBE_RXDADV_RSSTYPE_IPV4_UDP:
2021 M_HASHTYPE_SET(sendmp,
2022 M_HASHTYPE_RSS_UDP_IPV4);
2023 break;
2024 case IXGBE_RXDADV_RSSTYPE_IPV6_UDP:
2025 M_HASHTYPE_SET(sendmp,
2026 M_HASHTYPE_RSS_UDP_IPV6);
2027 break;
2028 case IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX:
2029 M_HASHTYPE_SET(sendmp,
2030 M_HASHTYPE_RSS_UDP_IPV6_EX);
2031 break;
2032 #endif
2033 default:
2034 M_HASHTYPE_SET(sendmp,
2035 M_HASHTYPE_OPAQUE_HASH);
2036 }
2037 } else {
2038 sendmp->m_pkthdr.flowid = que->msix;
2039 M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
2040 }
2041 #endif
2042 }
2043 next_desc:
2044 ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
2045 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2046
2047 /* Advance our pointers to the next descriptor. */
2048 if (++i == rxr->num_desc)
2049 i = 0;
2050
2051 /* Now send to the stack or do LRO */
2052 if (sendmp != NULL) {
2053 rxr->next_to_check = i;
2054 ixgbe_rx_input(rxr, ifp, sendmp, ptype);
2055 i = rxr->next_to_check;
2056 }
2057
2058 /* Every 8 descriptors we go to refresh mbufs */
2059 if (processed == 8) {
2060 ixgbe_refresh_mbufs(rxr, i);
2061 processed = 0;
2062 }
2063 }
2064
2065 /* Refresh any remaining buf structs */
2066 if (ixgbe_rx_unrefreshed(rxr))
2067 ixgbe_refresh_mbufs(rxr, i);
2068
2069 rxr->next_to_check = i;
2070
2071 #ifdef LRO
2072 /*
2073 * Flush any outstanding LRO work
2074 */
2075 tcp_lro_flush_all(lro);
2076 #endif /* LRO */
2077
2078 IXGBE_RX_UNLOCK(rxr);
2079
2080 /*
2081 ** Still have cleaning to do?
2082 */
2083 if ((staterr & IXGBE_RXD_STAT_DD) != 0)
2084 return true;
2085 else
2086 return false;
2087 }
2088
2089
2090 /*********************************************************************
2091 *
2092 * Verify that the hardware indicated that the checksum is valid.
2093 * Inform the stack about the status of checksum so that stack
2094 * doesn't spend time verifying the checksum.
2095 *
2096 *********************************************************************/
2097 static void
2098 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype,
2099 struct ixgbe_hw_stats *stats)
2100 {
2101 u16 status = (u16) staterr;
2102 u8 errors = (u8) (staterr >> 24);
2103 #if 0
2104 bool sctp = false;
2105
2106 if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
2107 (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
2108 sctp = true;
2109 #endif
2110
2111 /* IPv4 checksum */
2112 if (status & IXGBE_RXD_STAT_IPCS) {
2113 stats->ipcs.ev_count++;
2114 if (!(errors & IXGBE_RXD_ERR_IPE)) {
2115 /* IP Checksum Good */
2116 mp->m_pkthdr.csum_flags = M_CSUM_IPv4;
2117 } else {
2118 stats->ipcs_bad.ev_count++;
2119 mp->m_pkthdr.csum_flags = M_CSUM_IPv4|M_CSUM_IPv4_BAD;
2120 }
2121 }
2122 /* TCP/UDP/SCTP checksum */
2123 if (status & IXGBE_RXD_STAT_L4CS) {
2124 stats->l4cs.ev_count++;
2125 int type = M_CSUM_TCPv4|M_CSUM_TCPv6|M_CSUM_UDPv4|M_CSUM_UDPv6;
2126 if (!(errors & IXGBE_RXD_ERR_TCPE)) {
2127 mp->m_pkthdr.csum_flags |= type;
2128 } else {
2129 stats->l4cs_bad.ev_count++;
2130 mp->m_pkthdr.csum_flags |= type | M_CSUM_TCP_UDP_BAD;
2131 }
2132 }
2133 }
2134
2135
2136 /********************************************************************
2137 * Manage DMA'able memory.
2138 *******************************************************************/
2139
2140 int
2141 ixgbe_dma_malloc(struct adapter *adapter, const bus_size_t size,
2142 struct ixgbe_dma_alloc *dma, const int mapflags)
2143 {
2144 device_t dev = adapter->dev;
2145 int r, rsegs;
2146
2147 r = ixgbe_dma_tag_create(adapter->osdep.dmat, /* parent */
2148 DBA_ALIGN, 0, /* alignment, bounds */
2149 size, /* maxsize */
2150 1, /* nsegments */
2151 size, /* maxsegsize */
2152 BUS_DMA_ALLOCNOW, /* flags */
2153 &dma->dma_tag);
2154 if (r != 0) {
2155 aprint_error_dev(dev,
2156 "%s: ixgbe_dma_tag_create failed; error %d\n", __func__, r);
2157 goto fail_0;
2158 }
2159
2160 r = bus_dmamem_alloc(dma->dma_tag->dt_dmat,
2161 size,
2162 dma->dma_tag->dt_alignment,
2163 dma->dma_tag->dt_boundary,
2164 &dma->dma_seg, 1, &rsegs, BUS_DMA_NOWAIT);
2165 if (r != 0) {
2166 aprint_error_dev(dev,
2167 "%s: bus_dmamem_alloc failed; error %d\n", __func__, r);
2168 goto fail_1;
2169 }
2170
2171 r = bus_dmamem_map(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs,
2172 size, &dma->dma_vaddr, BUS_DMA_NOWAIT);
2173 if (r != 0) {
2174 aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
2175 __func__, r);
2176 goto fail_2;
2177 }
2178
2179 r = ixgbe_dmamap_create(dma->dma_tag, 0, &dma->dma_map);
2180 if (r != 0) {
2181 aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
2182 __func__, r);
2183 goto fail_3;
2184 }
2185
2186 r = bus_dmamap_load(dma->dma_tag->dt_dmat, dma->dma_map, dma->dma_vaddr,
2187 size,
2188 NULL,
2189 mapflags | BUS_DMA_NOWAIT);
2190 if (r != 0) {
2191 aprint_error_dev(dev, "%s: bus_dmamap_load failed; error %d\n",
2192 __func__, r);
2193 goto fail_4;
2194 }
2195 dma->dma_paddr = dma->dma_map->dm_segs[0].ds_addr;
2196 dma->dma_size = size;
2197 return 0;
2198 fail_4:
2199 ixgbe_dmamap_destroy(dma->dma_tag, dma->dma_map);
2200 fail_3:
2201 bus_dmamem_unmap(dma->dma_tag->dt_dmat, dma->dma_vaddr, size);
2202 fail_2:
2203 bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs);
2204 fail_1:
2205 ixgbe_dma_tag_destroy(dma->dma_tag);
2206 fail_0:
2207 return r;
2208 }
2209
2210 void
2211 ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma)
2212 {
2213 bus_dmamap_sync(dma->dma_tag->dt_dmat, dma->dma_map, 0, dma->dma_size,
2214 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2215 ixgbe_dmamap_unload(dma->dma_tag, dma->dma_map);
2216 bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, 1);
2217 ixgbe_dma_tag_destroy(dma->dma_tag);
2218 }
2219
2220
2221 /*********************************************************************
2222 *
2223 * Allocate memory for the transmit and receive rings, and then
2224 * the descriptors associated with each, called only once at attach.
2225 *
2226 **********************************************************************/
2227 int
2228 ixgbe_allocate_queues(struct adapter *adapter)
2229 {
2230 device_t dev = adapter->dev;
2231 struct ix_queue *que;
2232 struct tx_ring *txr;
2233 struct rx_ring *rxr;
2234 int rsize, tsize, error = IXGBE_SUCCESS;
2235 int txconf = 0, rxconf = 0;
2236 #ifdef PCI_IOV
2237 enum ixgbe_iov_mode iov_mode;
2238 #endif
2239
2240 /* First allocate the top level queue structs */
2241 if (!(adapter->queues =
2242 (struct ix_queue *) malloc(sizeof(struct ix_queue) *
2243 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2244 aprint_error_dev(dev, "Unable to allocate queue memory\n");
2245 error = ENOMEM;
2246 goto fail;
2247 }
2248
2249 /* First allocate the TX ring struct memory */
2250 if (!(adapter->tx_rings =
2251 (struct tx_ring *) malloc(sizeof(struct tx_ring) *
2252 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2253 aprint_error_dev(dev, "Unable to allocate TX ring memory\n");
2254 error = ENOMEM;
2255 goto tx_fail;
2256 }
2257
2258 /* Next allocate the RX */
2259 if (!(adapter->rx_rings =
2260 (struct rx_ring *) malloc(sizeof(struct rx_ring) *
2261 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2262 aprint_error_dev(dev, "Unable to allocate RX ring memory\n");
2263 error = ENOMEM;
2264 goto rx_fail;
2265 }
2266
2267 /* For the ring itself */
2268 tsize = roundup2(adapter->num_tx_desc *
2269 sizeof(union ixgbe_adv_tx_desc), DBA_ALIGN);
2270
2271 #ifdef PCI_IOV
2272 iov_mode = ixgbe_get_iov_mode(adapter);
2273 adapter->pool = ixgbe_max_vfs(iov_mode);
2274 #else
2275 adapter->pool = 0;
2276 #endif
2277 /*
2278 * Now set up the TX queues, txconf is needed to handle the
2279 * possibility that things fail midcourse and we need to
2280 * undo memory gracefully
2281 */
2282 for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2283 /* Set up some basics */
2284 txr = &adapter->tx_rings[i];
2285 txr->adapter = adapter;
2286 #ifdef PCI_IOV
2287 txr->me = ixgbe_pf_que_index(iov_mode, i);
2288 #else
2289 txr->me = i;
2290 #endif
2291 txr->num_desc = adapter->num_tx_desc;
2292
2293 /* Initialize the TX side lock */
2294 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
2295 device_xname(dev), txr->me);
2296 mutex_init(&txr->tx_mtx, MUTEX_DEFAULT, IPL_NET);
2297
2298 if (ixgbe_dma_malloc(adapter, tsize,
2299 &txr->txdma, BUS_DMA_NOWAIT)) {
2300 aprint_error_dev(dev,
2301 "Unable to allocate TX Descriptor memory\n");
2302 error = ENOMEM;
2303 goto err_tx_desc;
2304 }
2305 txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
2306 bzero((void *)txr->tx_base, tsize);
2307
2308 /* Now allocate transmit buffers for the ring */
2309 if (ixgbe_allocate_transmit_buffers(txr)) {
2310 aprint_error_dev(dev,
2311 "Critical Failure setting up transmit buffers\n");
2312 error = ENOMEM;
2313 goto err_tx_desc;
2314 }
2315 #ifndef IXGBE_LEGACY_TX
2316 /* Allocate a buf ring */
2317 txr->br = buf_ring_alloc(IXGBE_BR_SIZE, M_DEVBUF,
2318 M_WAITOK, &txr->tx_mtx);
2319 if (txr->br == NULL) {
2320 aprint_error_dev(dev,
2321 "Critical Failure setting up buf ring\n");
2322 error = ENOMEM;
2323 goto err_tx_desc;
2324 }
2325 #endif
2326 }
2327
2328 /*
2329 * Next the RX queues...
2330 */
2331 rsize = roundup2(adapter->num_rx_desc *
2332 sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
2333 for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2334 rxr = &adapter->rx_rings[i];
2335 /* Set up some basics */
2336 rxr->adapter = adapter;
2337 #ifdef PCI_IOV
2338 rxr->me = ixgbe_pf_que_index(iov_mode, i);
2339 #else
2340 rxr->me = i;
2341 #endif
2342 rxr->num_desc = adapter->num_rx_desc;
2343
2344 /* Initialize the RX side lock */
2345 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
2346 device_xname(dev), rxr->me);
2347 mutex_init(&rxr->rx_mtx, MUTEX_DEFAULT, IPL_NET);
2348
2349 if (ixgbe_dma_malloc(adapter, rsize,
2350 &rxr->rxdma, BUS_DMA_NOWAIT)) {
2351 aprint_error_dev(dev,
2352 "Unable to allocate RxDescriptor memory\n");
2353 error = ENOMEM;
2354 goto err_rx_desc;
2355 }
2356 rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2357 bzero((void *)rxr->rx_base, rsize);
2358
2359 /* Allocate receive buffers for the ring*/
2360 if (ixgbe_allocate_receive_buffers(rxr)) {
2361 aprint_error_dev(dev,
2362 "Critical Failure setting up receive buffers\n");
2363 error = ENOMEM;
2364 goto err_rx_desc;
2365 }
2366 }
2367
2368 /*
2369 ** Finally set up the queue holding structs
2370 */
2371 for (int i = 0; i < adapter->num_queues; i++) {
2372 que = &adapter->queues[i];
2373 que->adapter = adapter;
2374 que->me = i;
2375 que->txr = &adapter->tx_rings[i];
2376 que->rxr = &adapter->rx_rings[i];
2377 }
2378
2379 return (0);
2380
2381 err_rx_desc:
2382 for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2383 ixgbe_dma_free(adapter, &rxr->rxdma);
2384 err_tx_desc:
2385 for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2386 ixgbe_dma_free(adapter, &txr->txdma);
2387 free(adapter->rx_rings, M_DEVBUF);
2388 rx_fail:
2389 free(adapter->tx_rings, M_DEVBUF);
2390 tx_fail:
2391 free(adapter->queues, M_DEVBUF);
2392 fail:
2393 return (error);
2394 }
2395
2396