ix_txrx.c revision 1.4 1 /******************************************************************************
2
3 Copyright (c) 2001-2015, Intel Corporation
4 All rights reserved.
5
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
8
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
11
12 2. Redistributions in binary form must reproduce the above copyright
13 notice, this list of conditions and the following disclaimer in the
14 documentation and/or other materials provided with the distribution.
15
16 3. Neither the name of the Intel Corporation nor the names of its
17 contributors may be used to endorse or promote products derived from
18 this software without specific prior written permission.
19
20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*
34 * Copyright (c) 2011 The NetBSD Foundation, Inc.
35 * All rights reserved.
36 *
37 * This code is derived from software contributed to The NetBSD Foundation
38 * by Coyote Point Systems, Inc.
39 *
40 * Redistribution and use in source and binary forms, with or without
41 * modification, are permitted provided that the following conditions
42 * are met:
43 * 1. Redistributions of source code must retain the above copyright
44 * notice, this list of conditions and the following disclaimer.
45 * 2. Redistributions in binary form must reproduce the above copyright
46 * notice, this list of conditions and the following disclaimer in the
47 * documentation and/or other materials provided with the distribution.
48 *
49 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
50 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
51 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
52 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
53 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
54 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
55 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
56 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
57 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
58 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
59 * POSSIBILITY OF SUCH DAMAGE.
60 */
61 /*$FreeBSD: head/sys/dev/ixgbe/ix_txrx.c 282289 2015-04-30 22:53:27Z erj $*/
62 /*$NetBSD: ix_txrx.c,v 1.4 2016/12/01 06:56:28 msaitoh Exp $*/
63
64 #include "ixgbe.h"
65
66 #ifdef DEV_NETMAP
67 #include <net/netmap.h>
68 #include <sys/selinfo.h>
69 #include <dev/netmap/netmap_kern.h>
70
71 extern int ix_crcstrip;
72 #endif
73
74 /*
75 ** HW RSC control:
76 ** this feature only works with
77 ** IPv4, and only on 82599 and later.
78 ** Also this will cause IP forwarding to
79 ** fail and that can't be controlled by
80 ** the stack as LRO can. For all these
81 ** reasons I've deemed it best to leave
82 ** this off and not bother with a tuneable
83 ** interface, this would need to be compiled
84 ** to enable.
85 */
86 static bool ixgbe_rsc_enable = FALSE;
87
88 #ifdef IXGBE_FDIR
89 /*
90 ** For Flow Director: this is the
91 ** number of TX packets we sample
92 ** for the filter pool, this means
93 ** every 20th packet will be probed.
94 **
95 ** This feature can be disabled by
96 ** setting this to 0.
97 */
98 static int atr_sample_rate = 20;
99 #endif
100
101 /* Shared PCI config read/write */
102 u16
103 ixgbe_read_pci_cfg(struct ixgbe_hw *hw, u32 reg)
104 {
105 switch (reg % 4) {
106 case 0:
107 return pci_conf_read(hw->back->pc, hw->back->tag, reg) &
108 __BITS(15, 0);
109 case 2:
110 return __SHIFTOUT(pci_conf_read(hw->back->pc, hw->back->tag,
111 reg - 2), __BITS(31, 16));
112 default:
113 panic("%s: invalid register (%" PRIx32, __func__, reg);
114 break;
115 }
116 }
117
118 void
119 ixgbe_write_pci_cfg(struct ixgbe_hw *hw, u32 reg, u16 value)
120 {
121 pcireg_t old;
122
123 switch (reg % 4) {
124 case 0:
125 old = pci_conf_read(hw->back->pc, hw->back->tag, reg) &
126 __BITS(31, 16);
127 pci_conf_write(hw->back->pc, hw->back->tag, reg, value | old);
128 break;
129 case 2:
130 old = pci_conf_read(hw->back->pc, hw->back->tag, reg - 2) &
131 __BITS(15, 0);
132 pci_conf_write(hw->back->pc, hw->back->tag, reg - 2,
133 __SHIFTIN(value, __BITS(31, 16)) | old);
134 break;
135 default:
136 panic("%s: invalid register (%" PRIx32, __func__, reg);
137 break;
138 }
139
140 return;
141 }
142
143 /*********************************************************************
144 * Local Function prototypes
145 *********************************************************************/
146 static void ixgbe_setup_transmit_ring(struct tx_ring *);
147 static void ixgbe_free_transmit_buffers(struct tx_ring *);
148 static int ixgbe_setup_receive_ring(struct rx_ring *);
149 static void ixgbe_free_receive_buffers(struct rx_ring *);
150
151 static void ixgbe_rx_checksum(u32, struct mbuf *, u32,
152 struct ixgbe_hw_stats *);
153 static void ixgbe_refresh_mbufs(struct rx_ring *, int);
154 static int ixgbe_xmit(struct tx_ring *, struct mbuf *);
155 static int ixgbe_tx_ctx_setup(struct tx_ring *,
156 struct mbuf *, u32 *, u32 *);
157 static int ixgbe_tso_setup(struct tx_ring *,
158 struct mbuf *, u32 *, u32 *);
159 #ifdef IXGBE_FDIR
160 static void ixgbe_atr(struct tx_ring *, struct mbuf *);
161 #endif
162 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
163 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
164 struct mbuf *, u32);
165
166 static void ixgbe_setup_hw_rsc(struct rx_ring *);
167
168 #ifdef IXGBE_LEGACY_TX
169 /*********************************************************************
170 * Transmit entry point
171 *
172 * ixgbe_start is called by the stack to initiate a transmit.
173 * The driver will remain in this routine as long as there are
174 * packets to transmit and transmit resources are available.
175 * In case resources are not available stack is notified and
176 * the packet is requeued.
177 **********************************************************************/
178
179 void
180 ixgbe_start_locked(struct tx_ring *txr, struct ifnet * ifp)
181 {
182 int rc;
183 struct mbuf *m_head;
184 struct adapter *adapter = txr->adapter;
185
186 IXGBE_TX_LOCK_ASSERT(txr);
187
188 if ((ifp->if_flags & IFF_RUNNING) == 0)
189 return;
190 if (!adapter->link_active)
191 return;
192
193 while (!IFQ_IS_EMPTY(&ifp->if_snd)) {
194 if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
195 break;
196
197 IFQ_POLL(&ifp->if_snd, m_head);
198 if (m_head == NULL)
199 break;
200
201 if ((rc = ixgbe_xmit(txr, m_head)) == EAGAIN) {
202 break;
203 }
204 IFQ_DEQUEUE(&ifp->if_snd, m_head);
205 if (rc == EFBIG) {
206 struct mbuf *mtmp;
207
208 if ((mtmp = m_defrag(m_head, M_NOWAIT)) != NULL) {
209 m_head = mtmp;
210 rc = ixgbe_xmit(txr, m_head);
211 if (rc != 0)
212 adapter->efbig2_tx_dma_setup.ev_count++;
213 } else
214 adapter->m_defrag_failed.ev_count++;
215 }
216 if (rc != 0) {
217 m_freem(m_head);
218 continue;
219 }
220
221 /* Send a copy of the frame to the BPF listener */
222 bpf_mtap(ifp, m_head);
223 }
224 return;
225 }
226
227 /*
228 * Legacy TX start - called by the stack, this
229 * always uses the first tx ring, and should
230 * not be used with multiqueue tx enabled.
231 */
232 void
233 ixgbe_start(struct ifnet *ifp)
234 {
235 struct adapter *adapter = ifp->if_softc;
236 struct tx_ring *txr = adapter->tx_rings;
237
238 if (ifp->if_flags & IFF_RUNNING) {
239 IXGBE_TX_LOCK(txr);
240 ixgbe_start_locked(txr, ifp);
241 IXGBE_TX_UNLOCK(txr);
242 }
243 return;
244 }
245
246 #else /* ! IXGBE_LEGACY_TX */
247
248 /*
249 ** Multiqueue Transmit driver
250 **
251 */
252 int
253 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
254 {
255 struct adapter *adapter = ifp->if_softc;
256 struct ix_queue *que;
257 struct tx_ring *txr;
258 int i, err = 0;
259 #ifdef RSS
260 uint32_t bucket_id;
261 #endif
262
263 /*
264 * When doing RSS, map it to the same outbound queue
265 * as the incoming flow would be mapped to.
266 *
267 * If everything is setup correctly, it should be the
268 * same bucket that the current CPU we're on is.
269 */
270 #if __FreeBSD_version < 1100054
271 if (m->m_flags & M_FLOWID) {
272 #else
273 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
274 #endif
275 #ifdef RSS
276 if (rss_hash2bucket(m->m_pkthdr.flowid,
277 M_HASHTYPE_GET(m), &bucket_id) == 0)
278 /* TODO: spit out something if bucket_id > num_queues? */
279 i = bucket_id % adapter->num_queues;
280 else
281 #endif
282 i = m->m_pkthdr.flowid % adapter->num_queues;
283 } else
284 i = curcpu % adapter->num_queues;
285
286 /* Check for a hung queue and pick alternative */
287 if (((1 << i) & adapter->active_queues) == 0)
288 i = ffsl(adapter->active_queues);
289
290 txr = &adapter->tx_rings[i];
291 que = &adapter->queues[i];
292
293 err = drbr_enqueue(ifp, txr->br, m);
294 if (err)
295 return (err);
296 if (IXGBE_TX_TRYLOCK(txr)) {
297 ixgbe_mq_start_locked(ifp, txr);
298 IXGBE_TX_UNLOCK(txr);
299 } else
300 softint_schedule(txr->txq_si);
301
302 return (0);
303 }
304
305 int
306 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
307 {
308 struct adapter *adapter = txr->adapter;
309 struct mbuf *next;
310 int enqueued = 0, err = 0;
311
312 if (((ifp->if_flags & IFF_RUNNING) == 0) ||
313 adapter->link_active == 0)
314 return (ENETDOWN);
315
316 /* Process the queue */
317 #if __FreeBSD_version < 901504
318 next = drbr_dequeue(ifp, txr->br);
319 while (next != NULL) {
320 if ((err = ixgbe_xmit(txr, &next)) != 0) {
321 if (next != NULL)
322 err = drbr_enqueue(ifp, txr->br, next);
323 #else
324 while ((next = drbr_peek(ifp, txr->br)) != NULL) {
325 if ((err = ixgbe_xmit(txr, &next)) != 0) {
326 if (next == NULL) {
327 drbr_advance(ifp, txr->br);
328 } else {
329 drbr_putback(ifp, txr->br, next);
330 }
331 #endif
332 break;
333 }
334 #if __FreeBSD_version >= 901504
335 drbr_advance(ifp, txr->br);
336 #endif
337 enqueued++;
338 #if 0 // this is VF-only
339 #if __FreeBSD_version >= 1100036
340 /*
341 * Since we're looking at the tx ring, we can check
342 * to see if we're a VF by examing our tail register
343 * address.
344 */
345 if (txr->tail < IXGBE_TDT(0) && next->m_flags & M_MCAST)
346 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
347 #endif
348 #endif
349 /* Send a copy of the frame to the BPF listener */
350 bpf_mtap(ifp, next);
351 if ((ifp->if_flags & IFF_RUNNING) == 0)
352 break;
353 #if __FreeBSD_version < 901504
354 next = drbr_dequeue(ifp, txr->br);
355 #endif
356 }
357
358 if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD)
359 ixgbe_txeof(txr);
360
361 return (err);
362 }
363
364 /*
365 * Called from a taskqueue to drain queued transmit packets.
366 */
367 void
368 ixgbe_deferred_mq_start(void *arg, int pending)
369 {
370 struct tx_ring *txr = arg;
371 struct adapter *adapter = txr->adapter;
372 struct ifnet *ifp = adapter->ifp;
373
374 IXGBE_TX_LOCK(txr);
375 if (!drbr_empty(ifp, txr->br))
376 ixgbe_mq_start_locked(ifp, txr);
377 IXGBE_TX_UNLOCK(txr);
378 }
379
380 /*
381 * Flush all ring buffers
382 */
383 void
384 ixgbe_qflush(struct ifnet *ifp)
385 {
386 struct adapter *adapter = ifp->if_softc;
387 struct tx_ring *txr = adapter->tx_rings;
388 struct mbuf *m;
389
390 for (int i = 0; i < adapter->num_queues; i++, txr++) {
391 IXGBE_TX_LOCK(txr);
392 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
393 m_freem(m);
394 IXGBE_TX_UNLOCK(txr);
395 }
396 if_qflush(ifp);
397 }
398 #endif /* IXGBE_LEGACY_TX */
399
400
401 /*********************************************************************
402 *
403 * This routine maps the mbufs to tx descriptors, allowing the
404 * TX engine to transmit the packets.
405 * - return 0 on success, positive on failure
406 *
407 **********************************************************************/
408
409 static int
410 ixgbe_xmit(struct tx_ring *txr, struct mbuf *m_head)
411 {
412 struct m_tag *mtag;
413 struct adapter *adapter = txr->adapter;
414 struct ethercom *ec = &adapter->osdep.ec;
415 u32 olinfo_status = 0, cmd_type_len;
416 int i, j, error;
417 int first;
418 bus_dmamap_t map;
419 struct ixgbe_tx_buf *txbuf;
420 union ixgbe_adv_tx_desc *txd = NULL;
421
422 /* Basic descriptor defines */
423 cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
424 IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
425
426 if ((mtag = VLAN_OUTPUT_TAG(ec, m_head)) != NULL)
427 cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
428
429 /*
430 * Important to capture the first descriptor
431 * used because it will contain the index of
432 * the one we tell the hardware to report back
433 */
434 first = txr->next_avail_desc;
435 txbuf = &txr->tx_buffers[first];
436 map = txbuf->map;
437
438 /*
439 * Map the packet for DMA.
440 */
441 error = bus_dmamap_load_mbuf(txr->txtag->dt_dmat, map,
442 m_head, BUS_DMA_NOWAIT);
443
444 if (__predict_false(error)) {
445
446 switch (error) {
447 case EAGAIN:
448 adapter->eagain_tx_dma_setup.ev_count++;
449 return EAGAIN;
450 case ENOMEM:
451 adapter->enomem_tx_dma_setup.ev_count++;
452 return EAGAIN;
453 case EFBIG:
454 /*
455 * XXX Try it again?
456 * do m_defrag() and retry bus_dmamap_load_mbuf().
457 */
458 adapter->efbig_tx_dma_setup.ev_count++;
459 return error;
460 case EINVAL:
461 adapter->einval_tx_dma_setup.ev_count++;
462 return error;
463 default:
464 adapter->other_tx_dma_setup.ev_count++;
465 return error;
466 }
467 }
468
469 /* Make certain there are enough descriptors */
470 if (map->dm_nsegs > txr->tx_avail - 2) {
471 txr->no_desc_avail.ev_count++;
472 ixgbe_dmamap_unload(txr->txtag, txbuf->map);
473 return EAGAIN;
474 }
475
476 /*
477 * Set up the appropriate offload context
478 * this will consume the first descriptor
479 */
480 error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
481 if (__predict_false(error)) {
482 return (error);
483 }
484
485 #ifdef IXGBE_FDIR
486 /* Do the flow director magic */
487 if ((txr->atr_sample) && (!adapter->fdir_reinit)) {
488 ++txr->atr_count;
489 if (txr->atr_count >= atr_sample_rate) {
490 ixgbe_atr(txr, m_head);
491 txr->atr_count = 0;
492 }
493 }
494 #endif
495
496 i = txr->next_avail_desc;
497 for (j = 0; j < map->dm_nsegs; j++) {
498 bus_size_t seglen;
499 bus_addr_t segaddr;
500
501 txbuf = &txr->tx_buffers[i];
502 txd = &txr->tx_base[i];
503 seglen = map->dm_segs[j].ds_len;
504 segaddr = htole64(map->dm_segs[j].ds_addr);
505
506 txd->read.buffer_addr = segaddr;
507 txd->read.cmd_type_len = htole32(txr->txd_cmd |
508 cmd_type_len |seglen);
509 txd->read.olinfo_status = htole32(olinfo_status);
510
511 if (++i == txr->num_desc)
512 i = 0;
513 }
514
515 txd->read.cmd_type_len |=
516 htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
517 txr->tx_avail -= map->dm_nsegs;
518 txr->next_avail_desc = i;
519
520 txbuf->m_head = m_head;
521 /*
522 * Here we swap the map so the last descriptor,
523 * which gets the completion interrupt has the
524 * real map, and the first descriptor gets the
525 * unused map from this descriptor.
526 */
527 txr->tx_buffers[first].map = txbuf->map;
528 txbuf->map = map;
529 bus_dmamap_sync(txr->txtag->dt_dmat, map, 0, m_head->m_pkthdr.len,
530 BUS_DMASYNC_PREWRITE);
531
532 /* Set the EOP descriptor that will be marked done */
533 txbuf = &txr->tx_buffers[first];
534 txbuf->eop = txd;
535
536 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
537 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
538 /*
539 * Advance the Transmit Descriptor Tail (Tdt), this tells the
540 * hardware that this frame is available to transmit.
541 */
542 ++txr->total_packets.ev_count;
543 IXGBE_WRITE_REG(&adapter->hw, txr->tail, i);
544
545 /* Mark queue as having work */
546 if (txr->busy == 0)
547 txr->busy = 1;
548
549 return 0;
550 }
551
552 /*********************************************************************
553 *
554 * Allocate memory for tx_buffer structures. The tx_buffer stores all
555 * the information needed to transmit a packet on the wire. This is
556 * called only once at attach, setup is done every reset.
557 *
558 **********************************************************************/
559 int
560 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
561 {
562 struct adapter *adapter = txr->adapter;
563 device_t dev = adapter->dev;
564 struct ixgbe_tx_buf *txbuf;
565 int error, i;
566
567 /*
568 * Setup DMA descriptor areas.
569 */
570 if ((error = ixgbe_dma_tag_create(adapter->osdep.dmat, /* parent */
571 1, 0, /* alignment, bounds */
572 IXGBE_TSO_SIZE, /* maxsize */
573 adapter->num_segs, /* nsegments */
574 PAGE_SIZE, /* maxsegsize */
575 0, /* flags */
576 &txr->txtag))) {
577 aprint_error_dev(dev,"Unable to allocate TX DMA tag\n");
578 goto fail;
579 }
580
581 if (!(txr->tx_buffers =
582 (struct ixgbe_tx_buf *) malloc(sizeof(struct ixgbe_tx_buf) *
583 adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
584 aprint_error_dev(dev, "Unable to allocate tx_buffer memory\n");
585 error = ENOMEM;
586 goto fail;
587 }
588
589 /* Create the descriptor buffer dma maps */
590 txbuf = txr->tx_buffers;
591 for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
592 error = ixgbe_dmamap_create(txr->txtag, 0, &txbuf->map);
593 if (error != 0) {
594 aprint_error_dev(dev,
595 "Unable to create TX DMA map (%d)\n", error);
596 goto fail;
597 }
598 }
599
600 return 0;
601 fail:
602 /* We free all, it handles case where we are in the middle */
603 ixgbe_free_transmit_structures(adapter);
604 return (error);
605 }
606
607 /*********************************************************************
608 *
609 * Initialize a transmit ring.
610 *
611 **********************************************************************/
612 static void
613 ixgbe_setup_transmit_ring(struct tx_ring *txr)
614 {
615 struct adapter *adapter = txr->adapter;
616 struct ixgbe_tx_buf *txbuf;
617 int i;
618 #ifdef DEV_NETMAP
619 struct netmap_adapter *na = NA(adapter->ifp);
620 struct netmap_slot *slot;
621 #endif /* DEV_NETMAP */
622
623 /* Clear the old ring contents */
624 IXGBE_TX_LOCK(txr);
625 #ifdef DEV_NETMAP
626 /*
627 * (under lock): if in netmap mode, do some consistency
628 * checks and set slot to entry 0 of the netmap ring.
629 */
630 slot = netmap_reset(na, NR_TX, txr->me, 0);
631 #endif /* DEV_NETMAP */
632 bzero((void *)txr->tx_base,
633 (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
634 /* Reset indices */
635 txr->next_avail_desc = 0;
636 txr->next_to_clean = 0;
637
638 /* Free any existing tx buffers. */
639 txbuf = txr->tx_buffers;
640 for (i = 0; i < txr->num_desc; i++, txbuf++) {
641 if (txbuf->m_head != NULL) {
642 bus_dmamap_sync(txr->txtag->dt_dmat, txbuf->map,
643 0, txbuf->m_head->m_pkthdr.len,
644 BUS_DMASYNC_POSTWRITE);
645 ixgbe_dmamap_unload(txr->txtag, txbuf->map);
646 m_freem(txbuf->m_head);
647 txbuf->m_head = NULL;
648 }
649 #ifdef DEV_NETMAP
650 /*
651 * In netmap mode, set the map for the packet buffer.
652 * NOTE: Some drivers (not this one) also need to set
653 * the physical buffer address in the NIC ring.
654 * Slots in the netmap ring (indexed by "si") are
655 * kring->nkr_hwofs positions "ahead" wrt the
656 * corresponding slot in the NIC ring. In some drivers
657 * (not here) nkr_hwofs can be negative. Function
658 * netmap_idx_n2k() handles wraparounds properly.
659 */
660 if (slot) {
661 int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
662 netmap_load_map(na, txr->txtag, txbuf->map, NMB(na, slot + si));
663 }
664 #endif /* DEV_NETMAP */
665 /* Clear the EOP descriptor pointer */
666 txbuf->eop = NULL;
667 }
668
669 #ifdef IXGBE_FDIR
670 /* Set the rate at which we sample packets */
671 if (adapter->hw.mac.type != ixgbe_mac_82598EB)
672 txr->atr_sample = atr_sample_rate;
673 #endif
674
675 /* Set number of descriptors available */
676 txr->tx_avail = adapter->num_tx_desc;
677
678 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
679 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
680 IXGBE_TX_UNLOCK(txr);
681 }
682
683 /*********************************************************************
684 *
685 * Initialize all transmit rings.
686 *
687 **********************************************************************/
688 int
689 ixgbe_setup_transmit_structures(struct adapter *adapter)
690 {
691 struct tx_ring *txr = adapter->tx_rings;
692
693 for (int i = 0; i < adapter->num_queues; i++, txr++)
694 ixgbe_setup_transmit_ring(txr);
695
696 return (0);
697 }
698
699 /*********************************************************************
700 *
701 * Free all transmit rings.
702 *
703 **********************************************************************/
704 void
705 ixgbe_free_transmit_structures(struct adapter *adapter)
706 {
707 struct tx_ring *txr = adapter->tx_rings;
708
709 for (int i = 0; i < adapter->num_queues; i++, txr++) {
710 ixgbe_free_transmit_buffers(txr);
711 ixgbe_dma_free(adapter, &txr->txdma);
712 IXGBE_TX_LOCK_DESTROY(txr);
713 }
714 free(adapter->tx_rings, M_DEVBUF);
715 }
716
717 /*********************************************************************
718 *
719 * Free transmit ring related data structures.
720 *
721 **********************************************************************/
722 static void
723 ixgbe_free_transmit_buffers(struct tx_ring *txr)
724 {
725 struct adapter *adapter = txr->adapter;
726 struct ixgbe_tx_buf *tx_buffer;
727 int i;
728
729 INIT_DEBUGOUT("ixgbe_free_transmit_ring: begin");
730
731 if (txr->tx_buffers == NULL)
732 return;
733
734 tx_buffer = txr->tx_buffers;
735 for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
736 if (tx_buffer->m_head != NULL) {
737 bus_dmamap_sync(txr->txtag->dt_dmat, tx_buffer->map,
738 0, tx_buffer->m_head->m_pkthdr.len,
739 BUS_DMASYNC_POSTWRITE);
740 ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
741 m_freem(tx_buffer->m_head);
742 tx_buffer->m_head = NULL;
743 if (tx_buffer->map != NULL) {
744 ixgbe_dmamap_destroy(txr->txtag,
745 tx_buffer->map);
746 tx_buffer->map = NULL;
747 }
748 } else if (tx_buffer->map != NULL) {
749 ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
750 ixgbe_dmamap_destroy(txr->txtag, tx_buffer->map);
751 tx_buffer->map = NULL;
752 }
753 }
754 #ifndef IXGBE_LEGACY_TX
755 if (txr->br != NULL)
756 buf_ring_free(txr->br, M_DEVBUF);
757 #endif
758 if (txr->tx_buffers != NULL) {
759 free(txr->tx_buffers, M_DEVBUF);
760 txr->tx_buffers = NULL;
761 }
762 if (txr->txtag != NULL) {
763 ixgbe_dma_tag_destroy(txr->txtag);
764 txr->txtag = NULL;
765 }
766 return;
767 }
768
769 /*********************************************************************
770 *
771 * Advanced Context Descriptor setup for VLAN, CSUM or TSO
772 *
773 **********************************************************************/
774
775 static int
776 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
777 u32 *cmd_type_len, u32 *olinfo_status)
778 {
779 struct adapter *adapter = txr->adapter;
780 struct ethercom *ec = &adapter->osdep.ec;
781 struct m_tag *mtag;
782 struct ixgbe_adv_tx_context_desc *TXD;
783 struct ether_vlan_header *eh;
784 struct ip ip;
785 struct ip6_hdr ip6;
786 u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
787 int ehdrlen, ip_hlen = 0;
788 u16 etype;
789 u8 ipproto __diagused = 0;
790 int offload = TRUE;
791 int ctxd = txr->next_avail_desc;
792 u16 vtag = 0;
793
794 /* First check if TSO is to be used */
795 if (mp->m_pkthdr.csum_flags & (M_CSUM_TSOv4|M_CSUM_TSOv6))
796 return (ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status));
797
798 if ((mp->m_pkthdr.csum_flags & M_CSUM_OFFLOAD) == 0)
799 offload = FALSE;
800
801 /* Indicate the whole packet as payload when not doing TSO */
802 *olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
803
804 /* Now ready a context descriptor */
805 TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
806
807 /*
808 ** In advanced descriptors the vlan tag must
809 ** be placed into the context descriptor. Hence
810 ** we need to make one even if not doing offloads.
811 */
812 if ((mtag = VLAN_OUTPUT_TAG(ec, mp)) != NULL) {
813 vtag = htole16(VLAN_TAG_VALUE(mtag) & 0xffff);
814 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
815 }
816 else if (!IXGBE_IS_X550VF(adapter) && (offload == FALSE))
817 return (0);
818
819 /*
820 * Determine where frame payload starts.
821 * Jump over vlan headers if already present,
822 * helpful for QinQ too.
823 */
824 KASSERT(mp->m_len >= offsetof(struct ether_vlan_header, evl_tag));
825 eh = mtod(mp, struct ether_vlan_header *);
826 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
827 KASSERT(mp->m_len >= sizeof(struct ether_vlan_header));
828 etype = ntohs(eh->evl_proto);
829 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
830 } else {
831 etype = ntohs(eh->evl_encap_proto);
832 ehdrlen = ETHER_HDR_LEN;
833 }
834
835 /* Set the ether header length */
836 vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
837
838 if (offload == FALSE)
839 goto no_offloads;
840
841 switch (etype) {
842 case ETHERTYPE_IP:
843 m_copydata(mp, ehdrlen, sizeof(ip), &ip);
844 ip_hlen = ip.ip_hl << 2;
845 ipproto = ip.ip_p;
846 #if 0
847 ip.ip_sum = 0;
848 m_copyback(mp, ehdrlen, sizeof(ip), &ip);
849 #else
850 KASSERT((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) == 0 ||
851 ip.ip_sum == 0);
852 #endif
853 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
854 break;
855 case ETHERTYPE_IPV6:
856 m_copydata(mp, ehdrlen, sizeof(ip6), &ip6);
857 ip_hlen = sizeof(ip6);
858 /* XXX-BZ this will go badly in case of ext hdrs. */
859 ipproto = ip6.ip6_nxt;
860 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
861 break;
862 default:
863 break;
864 }
865
866 if ((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) != 0)
867 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
868
869 vlan_macip_lens |= ip_hlen;
870
871 if (mp->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_TCPv6)) {
872 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
873 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
874 KASSERT(ipproto == IPPROTO_TCP);
875 } else if (mp->m_pkthdr.csum_flags & (M_CSUM_UDPv4|M_CSUM_UDPv6)) {
876 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
877 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
878 KASSERT(ipproto == IPPROTO_UDP);
879 }
880
881 no_offloads:
882 type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
883
884 /* Now copy bits into descriptor */
885 TXD->vlan_macip_lens = htole32(vlan_macip_lens);
886 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
887 TXD->seqnum_seed = htole32(0);
888 TXD->mss_l4len_idx = htole32(0);
889
890 /* We've consumed the first desc, adjust counters */
891 if (++ctxd == txr->num_desc)
892 ctxd = 0;
893 txr->next_avail_desc = ctxd;
894 --txr->tx_avail;
895
896 return 0;
897 }
898
899 /**********************************************************************
900 *
901 * Setup work for hardware segmentation offload (TSO) on
902 * adapters using advanced tx descriptors
903 *
904 **********************************************************************/
905 static int
906 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp,
907 u32 *cmd_type_len, u32 *olinfo_status)
908 {
909 struct m_tag *mtag;
910 struct adapter *adapter = txr->adapter;
911 struct ethercom *ec = &adapter->osdep.ec;
912 struct ixgbe_adv_tx_context_desc *TXD;
913 u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
914 u32 mss_l4len_idx = 0, paylen;
915 u16 vtag = 0, eh_type;
916 int ctxd, ehdrlen, ip_hlen, tcp_hlen;
917 struct ether_vlan_header *eh;
918 #ifdef INET6
919 struct ip6_hdr *ip6;
920 #endif
921 #ifdef INET
922 struct ip *ip;
923 #endif
924 struct tcphdr *th;
925
926
927 /*
928 * Determine where frame payload starts.
929 * Jump over vlan headers if already present
930 */
931 eh = mtod(mp, struct ether_vlan_header *);
932 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
933 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
934 eh_type = eh->evl_proto;
935 } else {
936 ehdrlen = ETHER_HDR_LEN;
937 eh_type = eh->evl_encap_proto;
938 }
939
940 switch (ntohs(eh_type)) {
941 #ifdef INET6
942 case ETHERTYPE_IPV6:
943 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
944 /* XXX-BZ For now we do not pretend to support ext. hdrs. */
945 if (ip6->ip6_nxt != IPPROTO_TCP)
946 return (ENXIO);
947 ip_hlen = sizeof(struct ip6_hdr);
948 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
949 th = (struct tcphdr *)((char *)ip6 + ip_hlen);
950 th->th_sum = in6_cksum_phdr(&ip6->ip6_src,
951 &ip6->ip6_dst, 0, htonl(IPPROTO_TCP));
952 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
953 break;
954 #endif
955 #ifdef INET
956 case ETHERTYPE_IP:
957 ip = (struct ip *)(mp->m_data + ehdrlen);
958 if (ip->ip_p != IPPROTO_TCP)
959 return (ENXIO);
960 ip->ip_sum = 0;
961 ip_hlen = ip->ip_hl << 2;
962 th = (struct tcphdr *)((char *)ip + ip_hlen);
963 th->th_sum = in_cksum_phdr(ip->ip_src.s_addr,
964 ip->ip_dst.s_addr, htons(IPPROTO_TCP));
965 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
966 /* Tell transmit desc to also do IPv4 checksum. */
967 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
968 break;
969 #endif
970 default:
971 panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
972 __func__, ntohs(eh_type));
973 break;
974 }
975
976 ctxd = txr->next_avail_desc;
977 TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
978
979 tcp_hlen = th->th_off << 2;
980
981 /* This is used in the transmit desc in encap */
982 paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
983
984 /* VLAN MACLEN IPLEN */
985 if ((mtag = VLAN_OUTPUT_TAG(ec, mp)) != NULL) {
986 vtag = htole16(VLAN_TAG_VALUE(mtag) & 0xffff);
987 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
988 }
989
990 vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
991 vlan_macip_lens |= ip_hlen;
992 TXD->vlan_macip_lens = htole32(vlan_macip_lens);
993
994 /* ADV DTYPE TUCMD */
995 type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
996 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
997 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
998
999 /* MSS L4LEN IDX */
1000 mss_l4len_idx |= (mp->m_pkthdr.segsz << IXGBE_ADVTXD_MSS_SHIFT);
1001 mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
1002 TXD->mss_l4len_idx = htole32(mss_l4len_idx);
1003
1004 TXD->seqnum_seed = htole32(0);
1005
1006 if (++ctxd == txr->num_desc)
1007 ctxd = 0;
1008
1009 txr->tx_avail--;
1010 txr->next_avail_desc = ctxd;
1011 *cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
1012 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
1013 *olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
1014 ++txr->tso_tx.ev_count;
1015 return (0);
1016 }
1017
1018
1019 /**********************************************************************
1020 *
1021 * Examine each tx_buffer in the used queue. If the hardware is done
1022 * processing the packet then free associated resources. The
1023 * tx_buffer is put back on the free queue.
1024 *
1025 **********************************************************************/
1026 void
1027 ixgbe_txeof(struct tx_ring *txr)
1028 {
1029 struct adapter *adapter = txr->adapter;
1030 struct ifnet *ifp = adapter->ifp;
1031 u32 work, processed = 0;
1032 u16 limit = txr->process_limit;
1033 struct ixgbe_tx_buf *buf;
1034 union ixgbe_adv_tx_desc *txd;
1035
1036 KASSERT(mutex_owned(&txr->tx_mtx));
1037
1038 #ifdef DEV_NETMAP
1039 if (ifp->if_capenable & IFCAP_NETMAP) {
1040 struct netmap_adapter *na = NA(ifp);
1041 struct netmap_kring *kring = &na->tx_rings[txr->me];
1042 txd = txr->tx_base;
1043 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1044 BUS_DMASYNC_POSTREAD);
1045 /*
1046 * In netmap mode, all the work is done in the context
1047 * of the client thread. Interrupt handlers only wake up
1048 * clients, which may be sleeping on individual rings
1049 * or on a global resource for all rings.
1050 * To implement tx interrupt mitigation, we wake up the client
1051 * thread roughly every half ring, even if the NIC interrupts
1052 * more frequently. This is implemented as follows:
1053 * - ixgbe_txsync() sets kring->nr_kflags with the index of
1054 * the slot that should wake up the thread (nkr_num_slots
1055 * means the user thread should not be woken up);
1056 * - the driver ignores tx interrupts unless netmap_mitigate=0
1057 * or the slot has the DD bit set.
1058 */
1059 if (!netmap_mitigate ||
1060 (kring->nr_kflags < kring->nkr_num_slots &&
1061 txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD)) {
1062 netmap_tx_irq(ifp, txr->me);
1063 }
1064 return;
1065 }
1066 #endif /* DEV_NETMAP */
1067
1068 if (txr->tx_avail == txr->num_desc) {
1069 txr->busy = 0;
1070 return;
1071 }
1072
1073 /* Get work starting point */
1074 work = txr->next_to_clean;
1075 buf = &txr->tx_buffers[work];
1076 txd = &txr->tx_base[work];
1077 work -= txr->num_desc; /* The distance to ring end */
1078 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1079 BUS_DMASYNC_POSTREAD);
1080 do {
1081 union ixgbe_adv_tx_desc *eop= buf->eop;
1082 if (eop == NULL) /* No work */
1083 break;
1084
1085 if ((eop->wb.status & IXGBE_TXD_STAT_DD) == 0)
1086 break; /* I/O not complete */
1087
1088 if (buf->m_head) {
1089 txr->bytes +=
1090 buf->m_head->m_pkthdr.len;
1091 bus_dmamap_sync(txr->txtag->dt_dmat,
1092 buf->map,
1093 0, buf->m_head->m_pkthdr.len,
1094 BUS_DMASYNC_POSTWRITE);
1095 ixgbe_dmamap_unload(txr->txtag,
1096 buf->map);
1097 m_freem(buf->m_head);
1098 buf->m_head = NULL;
1099 }
1100 buf->eop = NULL;
1101 ++txr->tx_avail;
1102
1103 /* We clean the range if multi segment */
1104 while (txd != eop) {
1105 ++txd;
1106 ++buf;
1107 ++work;
1108 /* wrap the ring? */
1109 if (__predict_false(!work)) {
1110 work -= txr->num_desc;
1111 buf = txr->tx_buffers;
1112 txd = txr->tx_base;
1113 }
1114 if (buf->m_head) {
1115 txr->bytes +=
1116 buf->m_head->m_pkthdr.len;
1117 bus_dmamap_sync(txr->txtag->dt_dmat,
1118 buf->map,
1119 0, buf->m_head->m_pkthdr.len,
1120 BUS_DMASYNC_POSTWRITE);
1121 ixgbe_dmamap_unload(txr->txtag,
1122 buf->map);
1123 m_freem(buf->m_head);
1124 buf->m_head = NULL;
1125 }
1126 ++txr->tx_avail;
1127 buf->eop = NULL;
1128
1129 }
1130 ++txr->packets;
1131 ++processed;
1132 ++ifp->if_opackets;
1133
1134 /* Try the next packet */
1135 ++txd;
1136 ++buf;
1137 ++work;
1138 /* reset with a wrap */
1139 if (__predict_false(!work)) {
1140 work -= txr->num_desc;
1141 buf = txr->tx_buffers;
1142 txd = txr->tx_base;
1143 }
1144 prefetch(txd);
1145 } while (__predict_true(--limit));
1146
1147 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1148 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1149
1150 work += txr->num_desc;
1151 txr->next_to_clean = work;
1152
1153 /*
1154 ** Queue Hang detection, we know there's
1155 ** work outstanding or the first return
1156 ** would have been taken, so increment busy
1157 ** if nothing managed to get cleaned, then
1158 ** in local_timer it will be checked and
1159 ** marked as HUNG if it exceeds a MAX attempt.
1160 */
1161 if ((processed == 0) && (txr->busy != IXGBE_QUEUE_HUNG))
1162 ++txr->busy;
1163 /*
1164 ** If anything gets cleaned we reset state to 1,
1165 ** note this will turn off HUNG if its set.
1166 */
1167 if (processed)
1168 txr->busy = 1;
1169
1170 if (txr->tx_avail == txr->num_desc)
1171 txr->busy = 0;
1172
1173 return;
1174 }
1175
1176
1177 #ifdef IXGBE_FDIR
1178 /*
1179 ** This routine parses packet headers so that Flow
1180 ** Director can make a hashed filter table entry
1181 ** allowing traffic flows to be identified and kept
1182 ** on the same cpu. This would be a performance
1183 ** hit, but we only do it at IXGBE_FDIR_RATE of
1184 ** packets.
1185 */
1186 static void
1187 ixgbe_atr(struct tx_ring *txr, struct mbuf *mp)
1188 {
1189 struct adapter *adapter = txr->adapter;
1190 struct ix_queue *que;
1191 struct ip *ip;
1192 struct tcphdr *th;
1193 struct udphdr *uh;
1194 struct ether_vlan_header *eh;
1195 union ixgbe_atr_hash_dword input = {.dword = 0};
1196 union ixgbe_atr_hash_dword common = {.dword = 0};
1197 int ehdrlen, ip_hlen;
1198 u16 etype;
1199
1200 eh = mtod(mp, struct ether_vlan_header *);
1201 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
1202 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
1203 etype = eh->evl_proto;
1204 } else {
1205 ehdrlen = ETHER_HDR_LEN;
1206 etype = eh->evl_encap_proto;
1207 }
1208
1209 /* Only handling IPv4 */
1210 if (etype != htons(ETHERTYPE_IP))
1211 return;
1212
1213 ip = (struct ip *)(mp->m_data + ehdrlen);
1214 ip_hlen = ip->ip_hl << 2;
1215
1216 /* check if we're UDP or TCP */
1217 switch (ip->ip_p) {
1218 case IPPROTO_TCP:
1219 th = (struct tcphdr *)((char *)ip + ip_hlen);
1220 /* src and dst are inverted */
1221 common.port.dst ^= th->th_sport;
1222 common.port.src ^= th->th_dport;
1223 input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_TCPV4;
1224 break;
1225 case IPPROTO_UDP:
1226 uh = (struct udphdr *)((char *)ip + ip_hlen);
1227 /* src and dst are inverted */
1228 common.port.dst ^= uh->uh_sport;
1229 common.port.src ^= uh->uh_dport;
1230 input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_UDPV4;
1231 break;
1232 default:
1233 return;
1234 }
1235
1236 input.formatted.vlan_id = htobe16(mp->m_pkthdr.ether_vtag);
1237 if (mp->m_pkthdr.ether_vtag)
1238 common.flex_bytes ^= htons(ETHERTYPE_VLAN);
1239 else
1240 common.flex_bytes ^= etype;
1241 common.ip ^= ip->ip_src.s_addr ^ ip->ip_dst.s_addr;
1242
1243 que = &adapter->queues[txr->me];
1244 /*
1245 ** This assumes the Rx queue and Tx
1246 ** queue are bound to the same CPU
1247 */
1248 ixgbe_fdir_add_signature_filter_82599(&adapter->hw,
1249 input, common, que->msix);
1250 }
1251 #endif /* IXGBE_FDIR */
1252
1253 /*
1254 ** Used to detect a descriptor that has
1255 ** been merged by Hardware RSC.
1256 */
1257 static inline u32
1258 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1259 {
1260 return (le32toh(rx->wb.lower.lo_dword.data) &
1261 IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1262 }
1263
1264 /*********************************************************************
1265 *
1266 * Initialize Hardware RSC (LRO) feature on 82599
1267 * for an RX ring, this is toggled by the LRO capability
1268 * even though it is transparent to the stack.
1269 *
1270 * NOTE: since this HW feature only works with IPV4 and
1271 * our testing has shown soft LRO to be as effective
1272 * I have decided to disable this by default.
1273 *
1274 **********************************************************************/
1275 static void
1276 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
1277 {
1278 struct adapter *adapter = rxr->adapter;
1279 struct ixgbe_hw *hw = &adapter->hw;
1280 u32 rscctrl, rdrxctl;
1281
1282 /* If turning LRO/RSC off we need to disable it */
1283 if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) {
1284 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1285 rscctrl &= ~IXGBE_RSCCTL_RSCEN;
1286 return;
1287 }
1288
1289 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
1290 rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
1291 #ifdef DEV_NETMAP /* crcstrip is optional in netmap */
1292 if (adapter->ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip)
1293 #endif /* DEV_NETMAP */
1294 rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
1295 rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
1296 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
1297
1298 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1299 rscctrl |= IXGBE_RSCCTL_RSCEN;
1300 /*
1301 ** Limit the total number of descriptors that
1302 ** can be combined, so it does not exceed 64K
1303 */
1304 if (rxr->mbuf_sz == MCLBYTES)
1305 rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
1306 else if (rxr->mbuf_sz == MJUMPAGESIZE)
1307 rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
1308 else if (rxr->mbuf_sz == MJUM9BYTES)
1309 rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
1310 else /* Using 16K cluster */
1311 rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
1312
1313 IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
1314
1315 /* Enable TCP header recognition */
1316 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
1317 (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) |
1318 IXGBE_PSRTYPE_TCPHDR));
1319
1320 /* Disable RSC for ACK packets */
1321 IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
1322 (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
1323
1324 rxr->hw_rsc = TRUE;
1325 }
1326 /*********************************************************************
1327 *
1328 * Refresh mbuf buffers for RX descriptor rings
1329 * - now keeps its own state so discards due to resource
1330 * exhaustion are unnecessary, if an mbuf cannot be obtained
1331 * it just returns, keeping its placeholder, thus it can simply
1332 * be recalled to try again.
1333 *
1334 **********************************************************************/
1335 static void
1336 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
1337 {
1338 struct adapter *adapter = rxr->adapter;
1339 struct ixgbe_rx_buf *rxbuf;
1340 struct mbuf *mp;
1341 int i, j, error;
1342 bool refreshed = false;
1343
1344 i = j = rxr->next_to_refresh;
1345 /* Control the loop with one beyond */
1346 if (++j == rxr->num_desc)
1347 j = 0;
1348
1349 while (j != limit) {
1350 rxbuf = &rxr->rx_buffers[i];
1351 if (rxbuf->buf == NULL) {
1352 mp = ixgbe_getjcl(&adapter->jcl_head, M_NOWAIT,
1353 MT_DATA, M_PKTHDR, rxr->mbuf_sz);
1354 if (mp == NULL) {
1355 rxr->no_jmbuf.ev_count++;
1356 goto update;
1357 }
1358 if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
1359 m_adj(mp, ETHER_ALIGN);
1360 } else
1361 mp = rxbuf->buf;
1362
1363 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1364
1365 /* If we're dealing with an mbuf that was copied rather
1366 * than replaced, there's no need to go through busdma.
1367 */
1368 if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
1369 /* Get the memory mapping */
1370 ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
1371 error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat,
1372 rxbuf->pmap, mp, BUS_DMA_NOWAIT);
1373 if (error != 0) {
1374 printf("Refresh mbufs: payload dmamap load"
1375 " failure - %d\n", error);
1376 m_free(mp);
1377 rxbuf->buf = NULL;
1378 goto update;
1379 }
1380 rxbuf->buf = mp;
1381 bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
1382 0, mp->m_pkthdr.len, BUS_DMASYNC_PREREAD);
1383 rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
1384 htole64(rxbuf->pmap->dm_segs[0].ds_addr);
1385 } else {
1386 rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
1387 rxbuf->flags &= ~IXGBE_RX_COPY;
1388 }
1389
1390 refreshed = true;
1391 /* Next is precalculated */
1392 i = j;
1393 rxr->next_to_refresh = i;
1394 if (++j == rxr->num_desc)
1395 j = 0;
1396 }
1397 update:
1398 if (refreshed) /* Update hardware tail index */
1399 IXGBE_WRITE_REG(&adapter->hw,
1400 rxr->tail, rxr->next_to_refresh);
1401 return;
1402 }
1403
1404 /*********************************************************************
1405 *
1406 * Allocate memory for rx_buffer structures. Since we use one
1407 * rx_buffer per received packet, the maximum number of rx_buffer's
1408 * that we'll need is equal to the number of receive descriptors
1409 * that we've allocated.
1410 *
1411 **********************************************************************/
1412 int
1413 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
1414 {
1415 struct adapter *adapter = rxr->adapter;
1416 device_t dev = adapter->dev;
1417 struct ixgbe_rx_buf *rxbuf;
1418 int i, bsize, error;
1419
1420 bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
1421 if (!(rxr->rx_buffers =
1422 (struct ixgbe_rx_buf *) malloc(bsize,
1423 M_DEVBUF, M_NOWAIT | M_ZERO))) {
1424 aprint_error_dev(dev, "Unable to allocate rx_buffer memory\n");
1425 error = ENOMEM;
1426 goto fail;
1427 }
1428
1429 if ((error = ixgbe_dma_tag_create(adapter->osdep.dmat, /* parent */
1430 1, 0, /* alignment, bounds */
1431 MJUM16BYTES, /* maxsize */
1432 1, /* nsegments */
1433 MJUM16BYTES, /* maxsegsize */
1434 0, /* flags */
1435 &rxr->ptag))) {
1436 aprint_error_dev(dev, "Unable to create RX DMA tag\n");
1437 goto fail;
1438 }
1439
1440 for (i = 0; i < rxr->num_desc; i++, rxbuf++) {
1441 rxbuf = &rxr->rx_buffers[i];
1442 error = ixgbe_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
1443 if (error) {
1444 aprint_error_dev(dev, "Unable to create RX dma map\n");
1445 goto fail;
1446 }
1447 }
1448
1449 return (0);
1450
1451 fail:
1452 /* Frees all, but can handle partial completion */
1453 ixgbe_free_receive_structures(adapter);
1454 return (error);
1455 }
1456
1457
1458 static void
1459 ixgbe_free_receive_ring(struct rx_ring *rxr)
1460 {
1461 struct ixgbe_rx_buf *rxbuf;
1462 int i;
1463
1464 for (i = 0; i < rxr->num_desc; i++) {
1465 rxbuf = &rxr->rx_buffers[i];
1466 if (rxbuf->buf != NULL) {
1467 bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
1468 0, rxbuf->buf->m_pkthdr.len,
1469 BUS_DMASYNC_POSTREAD);
1470 ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
1471 rxbuf->buf->m_flags |= M_PKTHDR;
1472 m_freem(rxbuf->buf);
1473 rxbuf->buf = NULL;
1474 rxbuf->flags = 0;
1475 }
1476 }
1477 }
1478
1479
1480 /*********************************************************************
1481 *
1482 * Initialize a receive ring and its buffers.
1483 *
1484 **********************************************************************/
1485 static int
1486 ixgbe_setup_receive_ring(struct rx_ring *rxr)
1487 {
1488 struct adapter *adapter;
1489 struct ixgbe_rx_buf *rxbuf;
1490 #ifdef LRO
1491 struct ifnet *ifp;
1492 struct lro_ctrl *lro = &rxr->lro;
1493 #endif /* LRO */
1494 int rsize, error = 0;
1495 #ifdef DEV_NETMAP
1496 struct netmap_adapter *na = NA(rxr->adapter->ifp);
1497 struct netmap_slot *slot;
1498 #endif /* DEV_NETMAP */
1499
1500 adapter = rxr->adapter;
1501 #ifdef LRO
1502 ifp = adapter->ifp;
1503 #endif /* LRO */
1504
1505 /* Clear the ring contents */
1506 IXGBE_RX_LOCK(rxr);
1507 #ifdef DEV_NETMAP
1508 /* same as in ixgbe_setup_transmit_ring() */
1509 slot = netmap_reset(na, NR_RX, rxr->me, 0);
1510 #endif /* DEV_NETMAP */
1511 rsize = roundup2(adapter->num_rx_desc *
1512 sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
1513 bzero((void *)rxr->rx_base, rsize);
1514 /* Cache the size */
1515 rxr->mbuf_sz = adapter->rx_mbuf_sz;
1516
1517 /* Free current RX buffer structs and their mbufs */
1518 ixgbe_free_receive_ring(rxr);
1519
1520 IXGBE_RX_UNLOCK(rxr);
1521
1522 /* Now reinitialize our supply of jumbo mbufs. The number
1523 * or size of jumbo mbufs may have changed.
1524 */
1525 ixgbe_jcl_reinit(&adapter->jcl_head, rxr->ptag->dt_dmat,
1526 2 * adapter->num_rx_desc, adapter->rx_mbuf_sz);
1527
1528 IXGBE_RX_LOCK(rxr);
1529
1530 /* Now replenish the mbufs */
1531 for (int j = 0; j != rxr->num_desc; ++j) {
1532 struct mbuf *mp;
1533
1534 rxbuf = &rxr->rx_buffers[j];
1535 #ifdef DEV_NETMAP
1536 /*
1537 * In netmap mode, fill the map and set the buffer
1538 * address in the NIC ring, considering the offset
1539 * between the netmap and NIC rings (see comment in
1540 * ixgbe_setup_transmit_ring() ). No need to allocate
1541 * an mbuf, so end the block with a continue;
1542 */
1543 if (slot) {
1544 int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
1545 uint64_t paddr;
1546 void *addr;
1547
1548 addr = PNMB(na, slot + sj, &paddr);
1549 netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
1550 /* Update descriptor and the cached value */
1551 rxr->rx_base[j].read.pkt_addr = htole64(paddr);
1552 rxbuf->addr = htole64(paddr);
1553 continue;
1554 }
1555 #endif /* DEV_NETMAP */
1556 rxbuf->flags = 0;
1557 rxbuf->buf = ixgbe_getjcl(&adapter->jcl_head, M_NOWAIT,
1558 MT_DATA, M_PKTHDR, adapter->rx_mbuf_sz);
1559 if (rxbuf->buf == NULL) {
1560 error = ENOBUFS;
1561 goto fail;
1562 }
1563 mp = rxbuf->buf;
1564 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1565 /* Get the memory mapping */
1566 error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat,
1567 rxbuf->pmap, mp, BUS_DMA_NOWAIT);
1568 if (error != 0)
1569 goto fail;
1570 bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
1571 0, adapter->rx_mbuf_sz, BUS_DMASYNC_PREREAD);
1572 /* Update the descriptor and the cached value */
1573 rxr->rx_base[j].read.pkt_addr =
1574 htole64(rxbuf->pmap->dm_segs[0].ds_addr);
1575 rxbuf->addr = htole64(rxbuf->pmap->dm_segs[0].ds_addr);
1576 }
1577
1578
1579 /* Setup our descriptor indices */
1580 rxr->next_to_check = 0;
1581 rxr->next_to_refresh = 0;
1582 rxr->lro_enabled = FALSE;
1583 rxr->rx_copies.ev_count = 0;
1584 rxr->rx_bytes.ev_count = 0;
1585 rxr->vtag_strip = FALSE;
1586
1587 ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1588 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1589
1590 /*
1591 ** Now set up the LRO interface:
1592 */
1593 if (ixgbe_rsc_enable)
1594 ixgbe_setup_hw_rsc(rxr);
1595 #ifdef LRO
1596 else if (ifp->if_capenable & IFCAP_LRO) {
1597 device_t dev = adapter->dev;
1598 int err = tcp_lro_init(lro);
1599 if (err) {
1600 device_printf(dev, "LRO Initialization failed!\n");
1601 goto fail;
1602 }
1603 INIT_DEBUGOUT("RX Soft LRO Initialized\n");
1604 rxr->lro_enabled = TRUE;
1605 lro->ifp = adapter->ifp;
1606 }
1607 #endif /* LRO */
1608
1609 IXGBE_RX_UNLOCK(rxr);
1610 return (0);
1611
1612 fail:
1613 ixgbe_free_receive_ring(rxr);
1614 IXGBE_RX_UNLOCK(rxr);
1615 return (error);
1616 }
1617
1618 /*********************************************************************
1619 *
1620 * Initialize all receive rings.
1621 *
1622 **********************************************************************/
1623 int
1624 ixgbe_setup_receive_structures(struct adapter *adapter)
1625 {
1626 struct rx_ring *rxr = adapter->rx_rings;
1627 int j;
1628
1629 for (j = 0; j < adapter->num_queues; j++, rxr++)
1630 if (ixgbe_setup_receive_ring(rxr))
1631 goto fail;
1632
1633 return (0);
1634 fail:
1635 /*
1636 * Free RX buffers allocated so far, we will only handle
1637 * the rings that completed, the failing case will have
1638 * cleaned up for itself. 'j' failed, so its the terminus.
1639 */
1640 for (int i = 0; i < j; ++i) {
1641 rxr = &adapter->rx_rings[i];
1642 ixgbe_free_receive_ring(rxr);
1643 }
1644
1645 return (ENOBUFS);
1646 }
1647
1648
1649 /*********************************************************************
1650 *
1651 * Free all receive rings.
1652 *
1653 **********************************************************************/
1654 void
1655 ixgbe_free_receive_structures(struct adapter *adapter)
1656 {
1657 struct rx_ring *rxr = adapter->rx_rings;
1658
1659 INIT_DEBUGOUT("ixgbe_free_receive_structures: begin");
1660
1661 for (int i = 0; i < adapter->num_queues; i++, rxr++) {
1662 #ifdef LRO
1663 struct lro_ctrl *lro = &rxr->lro;
1664 #endif /* LRO */
1665 ixgbe_free_receive_buffers(rxr);
1666 #ifdef LRO
1667 /* Free LRO memory */
1668 tcp_lro_free(lro);
1669 #endif /* LRO */
1670 /* Free the ring memory as well */
1671 ixgbe_dma_free(adapter, &rxr->rxdma);
1672 IXGBE_RX_LOCK_DESTROY(rxr);
1673 }
1674
1675 free(adapter->rx_rings, M_DEVBUF);
1676 }
1677
1678
1679 /*********************************************************************
1680 *
1681 * Free receive ring data structures
1682 *
1683 **********************************************************************/
1684 static void
1685 ixgbe_free_receive_buffers(struct rx_ring *rxr)
1686 {
1687 struct adapter *adapter = rxr->adapter;
1688 struct ixgbe_rx_buf *rxbuf;
1689
1690 INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin");
1691
1692 /* Cleanup any existing buffers */
1693 if (rxr->rx_buffers != NULL) {
1694 for (int i = 0; i < adapter->num_rx_desc; i++) {
1695 rxbuf = &rxr->rx_buffers[i];
1696 if (rxbuf->buf != NULL) {
1697 bus_dmamap_sync(rxr->ptag->dt_dmat,
1698 rxbuf->pmap, 0, rxbuf->buf->m_pkthdr.len,
1699 BUS_DMASYNC_POSTREAD);
1700 ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
1701 rxbuf->buf->m_flags |= M_PKTHDR;
1702 m_freem(rxbuf->buf);
1703 }
1704 rxbuf->buf = NULL;
1705 if (rxbuf->pmap != NULL) {
1706 ixgbe_dmamap_destroy(rxr->ptag, rxbuf->pmap);
1707 rxbuf->pmap = NULL;
1708 }
1709 }
1710 if (rxr->rx_buffers != NULL) {
1711 free(rxr->rx_buffers, M_DEVBUF);
1712 rxr->rx_buffers = NULL;
1713 }
1714 }
1715
1716 if (rxr->ptag != NULL) {
1717 ixgbe_dma_tag_destroy(rxr->ptag);
1718 rxr->ptag = NULL;
1719 }
1720
1721 return;
1722 }
1723
1724 static __inline void
1725 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
1726 {
1727 int s;
1728
1729 #ifdef LRO
1730 struct adapter *adapter = ifp->if_softc;
1731 struct ethercom *ec = &adapter->osdep.ec;
1732
1733 /*
1734 * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
1735 * should be computed by hardware. Also it should not have VLAN tag in
1736 * ethernet header. In case of IPv6 we do not yet support ext. hdrs.
1737 */
1738 if (rxr->lro_enabled &&
1739 (ec->ec_capenable & ETHERCAP_VLAN_HWTAGGING) != 0 &&
1740 (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
1741 ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1742 (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
1743 (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1744 (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
1745 (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
1746 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
1747 /*
1748 * Send to the stack if:
1749 ** - LRO not enabled, or
1750 ** - no LRO resources, or
1751 ** - lro enqueue fails
1752 */
1753 if (rxr->lro.lro_cnt != 0)
1754 if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
1755 return;
1756 }
1757 #endif /* LRO */
1758
1759 IXGBE_RX_UNLOCK(rxr);
1760
1761 s = splnet();
1762 /* Pass this up to any BPF listeners. */
1763 bpf_mtap(ifp, m);
1764 if_input(ifp, m);
1765 splx(s);
1766
1767 IXGBE_RX_LOCK(rxr);
1768 }
1769
1770 static __inline void
1771 ixgbe_rx_discard(struct rx_ring *rxr, int i)
1772 {
1773 struct ixgbe_rx_buf *rbuf;
1774
1775 rbuf = &rxr->rx_buffers[i];
1776
1777
1778 /*
1779 ** With advanced descriptors the writeback
1780 ** clobbers the buffer addrs, so its easier
1781 ** to just free the existing mbufs and take
1782 ** the normal refresh path to get new buffers
1783 ** and mapping.
1784 */
1785
1786 if (rbuf->buf != NULL) {/* Partial chain ? */
1787 rbuf->fmp->m_flags |= M_PKTHDR;
1788 m_freem(rbuf->fmp);
1789 rbuf->fmp = NULL;
1790 rbuf->buf = NULL; /* rbuf->buf is part of fmp's chain */
1791 } else if (rbuf->buf) {
1792 m_free(rbuf->buf);
1793 rbuf->buf = NULL;
1794 }
1795 ixgbe_dmamap_unload(rxr->ptag, rbuf->pmap);
1796
1797 rbuf->flags = 0;
1798
1799 return;
1800 }
1801
1802
1803 /*********************************************************************
1804 *
1805 * This routine executes in interrupt context. It replenishes
1806 * the mbufs in the descriptor and sends data which has been
1807 * dma'ed into host memory to upper layer.
1808 *
1809 * Return TRUE for more work, FALSE for all clean.
1810 *********************************************************************/
1811 bool
1812 ixgbe_rxeof(struct ix_queue *que)
1813 {
1814 struct adapter *adapter = que->adapter;
1815 struct rx_ring *rxr = que->rxr;
1816 struct ifnet *ifp = adapter->ifp;
1817 #ifdef LRO
1818 struct lro_ctrl *lro = &rxr->lro;
1819 struct lro_entry *queued;
1820 #endif /* LRO */
1821 int i, nextp, processed = 0;
1822 u32 staterr = 0;
1823 u16 count = rxr->process_limit;
1824 union ixgbe_adv_rx_desc *cur;
1825 struct ixgbe_rx_buf *rbuf, *nbuf;
1826 #ifdef RSS
1827 u16 pkt_info;
1828 #endif
1829
1830 IXGBE_RX_LOCK(rxr);
1831
1832 #ifdef DEV_NETMAP
1833 /* Same as the txeof routine: wakeup clients on intr. */
1834 if (netmap_rx_irq(ifp, rxr->me, &processed)) {
1835 IXGBE_RX_UNLOCK(rxr);
1836 return (FALSE);
1837 }
1838 #endif /* DEV_NETMAP */
1839
1840 for (i = rxr->next_to_check; count != 0;) {
1841 struct mbuf *sendmp, *mp;
1842 u32 rsc, ptype;
1843 u16 len;
1844 u16 vtag = 0;
1845 bool eop;
1846
1847 /* Sync the ring. */
1848 ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1849 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1850
1851 cur = &rxr->rx_base[i];
1852 staterr = le32toh(cur->wb.upper.status_error);
1853 #ifdef RSS
1854 pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
1855 #endif
1856
1857 if ((staterr & IXGBE_RXD_STAT_DD) == 0)
1858 break;
1859 if ((ifp->if_flags & IFF_RUNNING) == 0)
1860 break;
1861
1862 count--;
1863 sendmp = NULL;
1864 nbuf = NULL;
1865 rsc = 0;
1866 cur->wb.upper.status_error = 0;
1867 rbuf = &rxr->rx_buffers[i];
1868 mp = rbuf->buf;
1869
1870 len = le16toh(cur->wb.upper.length);
1871 ptype = le32toh(cur->wb.lower.lo_dword.data) &
1872 IXGBE_RXDADV_PKTTYPE_MASK;
1873 eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
1874
1875 /* Make sure bad packets are discarded */
1876 if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) {
1877 #if __FreeBSD_version >= 1100036
1878 if (IXGBE_IS_VF(adapter))
1879 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
1880 #endif
1881 rxr->rx_discarded.ev_count++;
1882 ixgbe_rx_discard(rxr, i);
1883 goto next_desc;
1884 }
1885
1886 /*
1887 ** On 82599 which supports a hardware
1888 ** LRO (called HW RSC), packets need
1889 ** not be fragmented across sequential
1890 ** descriptors, rather the next descriptor
1891 ** is indicated in bits of the descriptor.
1892 ** This also means that we might proceses
1893 ** more than one packet at a time, something
1894 ** that has never been true before, it
1895 ** required eliminating global chain pointers
1896 ** in favor of what we are doing here. -jfv
1897 */
1898 if (!eop) {
1899 /*
1900 ** Figure out the next descriptor
1901 ** of this frame.
1902 */
1903 if (rxr->hw_rsc == TRUE) {
1904 rsc = ixgbe_rsc_count(cur);
1905 rxr->rsc_num += (rsc - 1);
1906 }
1907 if (rsc) { /* Get hardware index */
1908 nextp = ((staterr &
1909 IXGBE_RXDADV_NEXTP_MASK) >>
1910 IXGBE_RXDADV_NEXTP_SHIFT);
1911 } else { /* Just sequential */
1912 nextp = i + 1;
1913 if (nextp == adapter->num_rx_desc)
1914 nextp = 0;
1915 }
1916 nbuf = &rxr->rx_buffers[nextp];
1917 prefetch(nbuf);
1918 }
1919 /*
1920 ** Rather than using the fmp/lmp global pointers
1921 ** we now keep the head of a packet chain in the
1922 ** buffer struct and pass this along from one
1923 ** descriptor to the next, until we get EOP.
1924 */
1925 mp->m_len = len;
1926 /*
1927 ** See if there is a stored head
1928 ** that determines what we are
1929 */
1930 sendmp = rbuf->fmp;
1931 if (sendmp != NULL) { /* secondary frag */
1932 rbuf->buf = rbuf->fmp = NULL;
1933 mp->m_flags &= ~M_PKTHDR;
1934 sendmp->m_pkthdr.len += mp->m_len;
1935 } else {
1936 /*
1937 * Optimize. This might be a small packet,
1938 * maybe just a TCP ACK. Do a fast copy that
1939 * is cache aligned into a new mbuf, and
1940 * leave the old mbuf+cluster for re-use.
1941 */
1942 if (eop && len <= IXGBE_RX_COPY_LEN) {
1943 sendmp = m_gethdr(M_NOWAIT, MT_DATA);
1944 if (sendmp != NULL) {
1945 sendmp->m_data +=
1946 IXGBE_RX_COPY_ALIGN;
1947 ixgbe_bcopy(mp->m_data,
1948 sendmp->m_data, len);
1949 sendmp->m_len = len;
1950 rxr->rx_copies.ev_count++;
1951 rbuf->flags |= IXGBE_RX_COPY;
1952 }
1953 }
1954 if (sendmp == NULL) {
1955 rbuf->buf = rbuf->fmp = NULL;
1956 sendmp = mp;
1957 }
1958
1959 /* first desc of a non-ps chain */
1960 sendmp->m_flags |= M_PKTHDR;
1961 sendmp->m_pkthdr.len = mp->m_len;
1962 }
1963 ++processed;
1964
1965 /* Pass the head pointer on */
1966 if (eop == 0) {
1967 nbuf->fmp = sendmp;
1968 sendmp = NULL;
1969 mp->m_next = nbuf->buf;
1970 } else { /* Sending this frame */
1971 m_set_rcvif(sendmp, ifp);
1972 ifp->if_ipackets++;
1973 rxr->rx_packets.ev_count++;
1974 /* capture data for AIM */
1975 rxr->bytes += sendmp->m_pkthdr.len;
1976 rxr->rx_bytes.ev_count += sendmp->m_pkthdr.len;
1977 /* Process vlan info */
1978 if ((rxr->vtag_strip) &&
1979 (staterr & IXGBE_RXD_STAT_VP))
1980 vtag = le16toh(cur->wb.upper.vlan);
1981 if (vtag) {
1982 VLAN_INPUT_TAG(ifp, sendmp, vtag,
1983 printf("%s: could not apply VLAN "
1984 "tag", __func__));
1985 }
1986 if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) {
1987 ixgbe_rx_checksum(staterr, sendmp, ptype,
1988 &adapter->stats.pf);
1989 }
1990 #if __FreeBSD_version >= 800000
1991 #ifdef RSS
1992 sendmp->m_pkthdr.flowid =
1993 le32toh(cur->wb.lower.hi_dword.rss);
1994 #if __FreeBSD_version < 1100054
1995 sendmp->m_flags |= M_FLOWID;
1996 #endif
1997 switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) {
1998 case IXGBE_RXDADV_RSSTYPE_IPV4_TCP:
1999 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_TCP_IPV4);
2000 break;
2001 case IXGBE_RXDADV_RSSTYPE_IPV4:
2002 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_IPV4);
2003 break;
2004 case IXGBE_RXDADV_RSSTYPE_IPV6_TCP:
2005 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_TCP_IPV6);
2006 break;
2007 case IXGBE_RXDADV_RSSTYPE_IPV6_EX:
2008 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_IPV6_EX);
2009 break;
2010 case IXGBE_RXDADV_RSSTYPE_IPV6:
2011 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_IPV6);
2012 break;
2013 case IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX:
2014 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_TCP_IPV6_EX);
2015 break;
2016 case IXGBE_RXDADV_RSSTYPE_IPV4_UDP:
2017 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_UDP_IPV4);
2018 break;
2019 case IXGBE_RXDADV_RSSTYPE_IPV6_UDP:
2020 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_UDP_IPV6);
2021 break;
2022 case IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX:
2023 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_UDP_IPV6_EX);
2024 break;
2025 default:
2026 M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
2027 }
2028 #else /* RSS */
2029 sendmp->m_pkthdr.flowid = que->msix;
2030 #if __FreeBSD_version >= 1100054
2031 M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
2032 #else
2033 sendmp->m_flags |= M_FLOWID;
2034 #endif
2035 #endif /* RSS */
2036 #endif /* FreeBSD_version */
2037 }
2038 next_desc:
2039 ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
2040 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2041
2042 /* Advance our pointers to the next descriptor. */
2043 if (++i == rxr->num_desc)
2044 i = 0;
2045
2046 /* Now send to the stack or do LRO */
2047 if (sendmp != NULL) {
2048 rxr->next_to_check = i;
2049 ixgbe_rx_input(rxr, ifp, sendmp, ptype);
2050 i = rxr->next_to_check;
2051 }
2052
2053 /* Every 8 descriptors we go to refresh mbufs */
2054 if (processed == 8) {
2055 ixgbe_refresh_mbufs(rxr, i);
2056 processed = 0;
2057 }
2058 }
2059
2060 /* Refresh any remaining buf structs */
2061 if (ixgbe_rx_unrefreshed(rxr))
2062 ixgbe_refresh_mbufs(rxr, i);
2063
2064 rxr->next_to_check = i;
2065
2066 #ifdef LRO
2067 /*
2068 * Flush any outstanding LRO work
2069 */
2070 while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
2071 SLIST_REMOVE_HEAD(&lro->lro_active, next);
2072 tcp_lro_flush(lro, queued);
2073 }
2074 #endif /* LRO */
2075
2076 IXGBE_RX_UNLOCK(rxr);
2077
2078 /*
2079 ** Still have cleaning to do?
2080 */
2081 if ((staterr & IXGBE_RXD_STAT_DD) != 0)
2082 return true;
2083 else
2084 return false;
2085 }
2086
2087
2088 /*********************************************************************
2089 *
2090 * Verify that the hardware indicated that the checksum is valid.
2091 * Inform the stack about the status of checksum so that stack
2092 * doesn't spend time verifying the checksum.
2093 *
2094 *********************************************************************/
2095 static void
2096 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype,
2097 struct ixgbe_hw_stats *stats)
2098 {
2099 u16 status = (u16) staterr;
2100 u8 errors = (u8) (staterr >> 24);
2101 #if 0
2102 bool sctp = FALSE;
2103
2104 if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
2105 (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
2106 sctp = TRUE;
2107 #endif
2108
2109 if (status & IXGBE_RXD_STAT_IPCS) {
2110 stats->ipcs.ev_count++;
2111 if (!(errors & IXGBE_RXD_ERR_IPE)) {
2112 /* IP Checksum Good */
2113 mp->m_pkthdr.csum_flags = M_CSUM_IPv4;
2114
2115 } else {
2116 stats->ipcs_bad.ev_count++;
2117 mp->m_pkthdr.csum_flags = M_CSUM_IPv4|M_CSUM_IPv4_BAD;
2118 }
2119 }
2120 if (status & IXGBE_RXD_STAT_L4CS) {
2121 stats->l4cs.ev_count++;
2122 int type = M_CSUM_TCPv4|M_CSUM_TCPv6|M_CSUM_UDPv4|M_CSUM_UDPv6;
2123 if (!(errors & IXGBE_RXD_ERR_TCPE)) {
2124 mp->m_pkthdr.csum_flags |= type;
2125 } else {
2126 stats->l4cs_bad.ev_count++;
2127 mp->m_pkthdr.csum_flags |= type | M_CSUM_TCP_UDP_BAD;
2128 }
2129 }
2130 return;
2131 }
2132
2133
2134 /********************************************************************
2135 * Manage DMA'able memory.
2136 *******************************************************************/
2137
2138 int
2139 ixgbe_dma_malloc(struct adapter *adapter, const bus_size_t size,
2140 struct ixgbe_dma_alloc *dma, const int mapflags)
2141 {
2142 device_t dev = adapter->dev;
2143 int r, rsegs;
2144
2145 r = ixgbe_dma_tag_create(adapter->osdep.dmat, /* parent */
2146 DBA_ALIGN, 0, /* alignment, bounds */
2147 size, /* maxsize */
2148 1, /* nsegments */
2149 size, /* maxsegsize */
2150 BUS_DMA_ALLOCNOW, /* flags */
2151 &dma->dma_tag);
2152 if (r != 0) {
2153 aprint_error_dev(dev,
2154 "%s: ixgbe_dma_tag_create failed; error %d\n", __func__, r);
2155 goto fail_0;
2156 }
2157
2158 r = bus_dmamem_alloc(dma->dma_tag->dt_dmat,
2159 size,
2160 dma->dma_tag->dt_alignment,
2161 dma->dma_tag->dt_boundary,
2162 &dma->dma_seg, 1, &rsegs, BUS_DMA_NOWAIT);
2163 if (r != 0) {
2164 aprint_error_dev(dev,
2165 "%s: bus_dmamem_alloc failed; error %d\n", __func__, r);
2166 goto fail_1;
2167 }
2168
2169 r = bus_dmamem_map(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs,
2170 size, &dma->dma_vaddr, BUS_DMA_NOWAIT);
2171 if (r != 0) {
2172 aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
2173 __func__, r);
2174 goto fail_2;
2175 }
2176
2177 r = ixgbe_dmamap_create(dma->dma_tag, 0, &dma->dma_map);
2178 if (r != 0) {
2179 aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
2180 __func__, r);
2181 goto fail_3;
2182 }
2183
2184 r = bus_dmamap_load(dma->dma_tag->dt_dmat, dma->dma_map, dma->dma_vaddr,
2185 size,
2186 NULL,
2187 mapflags | BUS_DMA_NOWAIT);
2188 if (r != 0) {
2189 aprint_error_dev(dev, "%s: bus_dmamap_load failed; error %d\n",
2190 __func__, r);
2191 goto fail_4;
2192 }
2193 dma->dma_paddr = dma->dma_map->dm_segs[0].ds_addr;
2194 dma->dma_size = size;
2195 return 0;
2196 fail_4:
2197 ixgbe_dmamap_destroy(dma->dma_tag, dma->dma_map);
2198 fail_3:
2199 bus_dmamem_unmap(dma->dma_tag->dt_dmat, dma->dma_vaddr, size);
2200 fail_2:
2201 bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs);
2202 fail_1:
2203 ixgbe_dma_tag_destroy(dma->dma_tag);
2204 fail_0:
2205 return r;
2206 }
2207
2208 void
2209 ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma)
2210 {
2211 bus_dmamap_sync(dma->dma_tag->dt_dmat, dma->dma_map, 0, dma->dma_size,
2212 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2213 ixgbe_dmamap_unload(dma->dma_tag, dma->dma_map);
2214 bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, 1);
2215 ixgbe_dma_tag_destroy(dma->dma_tag);
2216 }
2217
2218
2219 /*********************************************************************
2220 *
2221 * Allocate memory for the transmit and receive rings, and then
2222 * the descriptors associated with each, called only once at attach.
2223 *
2224 **********************************************************************/
2225 int
2226 ixgbe_allocate_queues(struct adapter *adapter)
2227 {
2228 device_t dev = adapter->dev;
2229 struct ix_queue *que;
2230 struct tx_ring *txr;
2231 struct rx_ring *rxr;
2232 int rsize, tsize, error = IXGBE_SUCCESS;
2233 int txconf = 0, rxconf = 0;
2234
2235 /* First allocate the top level queue structs */
2236 if (!(adapter->queues =
2237 (struct ix_queue *) malloc(sizeof(struct ix_queue) *
2238 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2239 aprint_error_dev(dev, "Unable to allocate queue memory\n");
2240 error = ENOMEM;
2241 goto fail;
2242 }
2243
2244 /* First allocate the TX ring struct memory */
2245 if (!(adapter->tx_rings =
2246 (struct tx_ring *) malloc(sizeof(struct tx_ring) *
2247 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2248 aprint_error_dev(dev, "Unable to allocate TX ring memory\n");
2249 error = ENOMEM;
2250 goto tx_fail;
2251 }
2252
2253 /* Next allocate the RX */
2254 if (!(adapter->rx_rings =
2255 (struct rx_ring *) malloc(sizeof(struct rx_ring) *
2256 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2257 aprint_error_dev(dev, "Unable to allocate RX ring memory\n");
2258 error = ENOMEM;
2259 goto rx_fail;
2260 }
2261
2262 /* For the ring itself */
2263 tsize = roundup2(adapter->num_tx_desc *
2264 sizeof(union ixgbe_adv_tx_desc), DBA_ALIGN);
2265
2266 /*
2267 * Now set up the TX queues, txconf is needed to handle the
2268 * possibility that things fail midcourse and we need to
2269 * undo memory gracefully
2270 */
2271 for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2272 /* Set up some basics */
2273 txr = &adapter->tx_rings[i];
2274 txr->adapter = adapter;
2275 txr->me = i;
2276 txr->num_desc = adapter->num_tx_desc;
2277
2278 /* Initialize the TX side lock */
2279 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
2280 device_xname(dev), txr->me);
2281 mutex_init(&txr->tx_mtx, MUTEX_DEFAULT, IPL_NET);
2282
2283 if (ixgbe_dma_malloc(adapter, tsize,
2284 &txr->txdma, BUS_DMA_NOWAIT)) {
2285 aprint_error_dev(dev,
2286 "Unable to allocate TX Descriptor memory\n");
2287 error = ENOMEM;
2288 goto err_tx_desc;
2289 }
2290 txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
2291 bzero((void *)txr->tx_base, tsize);
2292
2293 /* Now allocate transmit buffers for the ring */
2294 if (ixgbe_allocate_transmit_buffers(txr)) {
2295 aprint_error_dev(dev,
2296 "Critical Failure setting up transmit buffers\n");
2297 error = ENOMEM;
2298 goto err_tx_desc;
2299 }
2300 #ifndef IXGBE_LEGACY_TX
2301 /* Allocate a buf ring */
2302 txr->br = buf_ring_alloc(IXGBE_BR_SIZE, M_DEVBUF,
2303 M_WAITOK, &txr->tx_mtx);
2304 if (txr->br == NULL) {
2305 aprint_error_dev(dev,
2306 "Critical Failure setting up buf ring\n");
2307 error = ENOMEM;
2308 goto err_tx_desc;
2309 }
2310 #endif
2311 }
2312
2313 /*
2314 * Next the RX queues...
2315 */
2316 rsize = roundup2(adapter->num_rx_desc *
2317 sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
2318 for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2319 rxr = &adapter->rx_rings[i];
2320 /* Set up some basics */
2321 rxr->adapter = adapter;
2322 rxr->me = i;
2323 rxr->num_desc = adapter->num_rx_desc;
2324
2325 /* Initialize the RX side lock */
2326 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
2327 device_xname(dev), rxr->me);
2328 mutex_init(&rxr->rx_mtx, MUTEX_DEFAULT, IPL_NET);
2329
2330 if (ixgbe_dma_malloc(adapter, rsize,
2331 &rxr->rxdma, BUS_DMA_NOWAIT)) {
2332 aprint_error_dev(dev,
2333 "Unable to allocate RxDescriptor memory\n");
2334 error = ENOMEM;
2335 goto err_rx_desc;
2336 }
2337 rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2338 bzero((void *)rxr->rx_base, rsize);
2339
2340 /* Allocate receive buffers for the ring*/
2341 if (ixgbe_allocate_receive_buffers(rxr)) {
2342 aprint_error_dev(dev,
2343 "Critical Failure setting up receive buffers\n");
2344 error = ENOMEM;
2345 goto err_rx_desc;
2346 }
2347 }
2348
2349 /*
2350 ** Finally set up the queue holding structs
2351 */
2352 for (int i = 0; i < adapter->num_queues; i++) {
2353 que = &adapter->queues[i];
2354 que->adapter = adapter;
2355 que->me = i;
2356 que->txr = &adapter->tx_rings[i];
2357 que->rxr = &adapter->rx_rings[i];
2358 }
2359
2360 return (0);
2361
2362 err_rx_desc:
2363 for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2364 ixgbe_dma_free(adapter, &rxr->rxdma);
2365 err_tx_desc:
2366 for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2367 ixgbe_dma_free(adapter, &txr->txdma);
2368 free(adapter->rx_rings, M_DEVBUF);
2369 rx_fail:
2370 free(adapter->tx_rings, M_DEVBUF);
2371 tx_fail:
2372 free(adapter->queues, M_DEVBUF);
2373 fail:
2374 return (error);
2375 }
2376
2377