ix_txrx.c revision 1.7 1 /******************************************************************************
2
3 Copyright (c) 2001-2015, Intel Corporation
4 All rights reserved.
5
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
8
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
11
12 2. Redistributions in binary form must reproduce the above copyright
13 notice, this list of conditions and the following disclaimer in the
14 documentation and/or other materials provided with the distribution.
15
16 3. Neither the name of the Intel Corporation nor the names of its
17 contributors may be used to endorse or promote products derived from
18 this software without specific prior written permission.
19
20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*
34 * Copyright (c) 2011 The NetBSD Foundation, Inc.
35 * All rights reserved.
36 *
37 * This code is derived from software contributed to The NetBSD Foundation
38 * by Coyote Point Systems, Inc.
39 *
40 * Redistribution and use in source and binary forms, with or without
41 * modification, are permitted provided that the following conditions
42 * are met:
43 * 1. Redistributions of source code must retain the above copyright
44 * notice, this list of conditions and the following disclaimer.
45 * 2. Redistributions in binary form must reproduce the above copyright
46 * notice, this list of conditions and the following disclaimer in the
47 * documentation and/or other materials provided with the distribution.
48 *
49 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
50 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
51 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
52 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
53 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
54 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
55 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
56 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
57 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
58 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
59 * POSSIBILITY OF SUCH DAMAGE.
60 */
61 /*$FreeBSD: head/sys/dev/ixgbe/ix_txrx.c 289238 2015-10-13 17:34:18Z sbruno $*/
62 /*$NetBSD: ix_txrx.c,v 1.7 2016/12/02 10:34:23 msaitoh Exp $*/
63
64 #include "ixgbe.h"
65
66 #ifdef DEV_NETMAP
67 #include <net/netmap.h>
68 #include <sys/selinfo.h>
69 #include <dev/netmap/netmap_kern.h>
70
71 extern int ix_crcstrip;
72 #endif
73
74 /*
75 ** HW RSC control:
76 ** this feature only works with
77 ** IPv4, and only on 82599 and later.
78 ** Also this will cause IP forwarding to
79 ** fail and that can't be controlled by
80 ** the stack as LRO can. For all these
81 ** reasons I've deemed it best to leave
82 ** this off and not bother with a tuneable
83 ** interface, this would need to be compiled
84 ** to enable.
85 */
86 static bool ixgbe_rsc_enable = FALSE;
87
88 #ifdef IXGBE_FDIR
89 /*
90 ** For Flow Director: this is the
91 ** number of TX packets we sample
92 ** for the filter pool, this means
93 ** every 20th packet will be probed.
94 **
95 ** This feature can be disabled by
96 ** setting this to 0.
97 */
98 static int atr_sample_rate = 20;
99 #endif
100
101 /* Shared PCI config read/write */
102 u16
103 ixgbe_read_pci_cfg(struct ixgbe_hw *hw, u32 reg)
104 {
105 switch (reg % 4) {
106 case 0:
107 return pci_conf_read(hw->back->pc, hw->back->tag, reg) &
108 __BITS(15, 0);
109 case 2:
110 return __SHIFTOUT(pci_conf_read(hw->back->pc, hw->back->tag,
111 reg - 2), __BITS(31, 16));
112 default:
113 panic("%s: invalid register (%" PRIx32, __func__, reg);
114 break;
115 }
116 }
117
118 void
119 ixgbe_write_pci_cfg(struct ixgbe_hw *hw, u32 reg, u16 value)
120 {
121 pcireg_t old;
122
123 switch (reg % 4) {
124 case 0:
125 old = pci_conf_read(hw->back->pc, hw->back->tag, reg) &
126 __BITS(31, 16);
127 pci_conf_write(hw->back->pc, hw->back->tag, reg, value | old);
128 break;
129 case 2:
130 old = pci_conf_read(hw->back->pc, hw->back->tag, reg - 2) &
131 __BITS(15, 0);
132 pci_conf_write(hw->back->pc, hw->back->tag, reg - 2,
133 __SHIFTIN(value, __BITS(31, 16)) | old);
134 break;
135 default:
136 panic("%s: invalid register (%" PRIx32, __func__, reg);
137 break;
138 }
139
140 return;
141 }
142
143 /*********************************************************************
144 * Local Function prototypes
145 *********************************************************************/
146 static void ixgbe_setup_transmit_ring(struct tx_ring *);
147 static void ixgbe_free_transmit_buffers(struct tx_ring *);
148 static int ixgbe_setup_receive_ring(struct rx_ring *);
149 static void ixgbe_free_receive_buffers(struct rx_ring *);
150
151 static void ixgbe_rx_checksum(u32, struct mbuf *, u32,
152 struct ixgbe_hw_stats *);
153 static void ixgbe_refresh_mbufs(struct rx_ring *, int);
154 static int ixgbe_xmit(struct tx_ring *, struct mbuf *);
155 static int ixgbe_tx_ctx_setup(struct tx_ring *,
156 struct mbuf *, u32 *, u32 *);
157 static int ixgbe_tso_setup(struct tx_ring *,
158 struct mbuf *, u32 *, u32 *);
159 #ifdef IXGBE_FDIR
160 static void ixgbe_atr(struct tx_ring *, struct mbuf *);
161 #endif
162 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
163 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
164 struct mbuf *, u32);
165
166 static void ixgbe_setup_hw_rsc(struct rx_ring *);
167
168 #ifdef IXGBE_LEGACY_TX
169 /*********************************************************************
170 * Transmit entry point
171 *
172 * ixgbe_start is called by the stack to initiate a transmit.
173 * The driver will remain in this routine as long as there are
174 * packets to transmit and transmit resources are available.
175 * In case resources are not available stack is notified and
176 * the packet is requeued.
177 **********************************************************************/
178
179 void
180 ixgbe_start_locked(struct tx_ring *txr, struct ifnet * ifp)
181 {
182 int rc;
183 struct mbuf *m_head;
184 struct adapter *adapter = txr->adapter;
185
186 IXGBE_TX_LOCK_ASSERT(txr);
187
188 if ((ifp->if_flags & IFF_RUNNING) == 0)
189 return;
190 if (!adapter->link_active)
191 return;
192
193 while (!IFQ_IS_EMPTY(&ifp->if_snd)) {
194 if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
195 break;
196
197 IFQ_POLL(&ifp->if_snd, m_head);
198 if (m_head == NULL)
199 break;
200
201 if ((rc = ixgbe_xmit(txr, m_head)) == EAGAIN) {
202 break;
203 }
204 IFQ_DEQUEUE(&ifp->if_snd, m_head);
205 if (rc == EFBIG) {
206 struct mbuf *mtmp;
207
208 if ((mtmp = m_defrag(m_head, M_NOWAIT)) != NULL) {
209 m_head = mtmp;
210 rc = ixgbe_xmit(txr, m_head);
211 if (rc != 0)
212 adapter->efbig2_tx_dma_setup.ev_count++;
213 } else
214 adapter->m_defrag_failed.ev_count++;
215 }
216 if (rc != 0) {
217 m_freem(m_head);
218 continue;
219 }
220
221 /* Send a copy of the frame to the BPF listener */
222 bpf_mtap(ifp, m_head);
223 }
224 return;
225 }
226
227 /*
228 * Legacy TX start - called by the stack, this
229 * always uses the first tx ring, and should
230 * not be used with multiqueue tx enabled.
231 */
232 void
233 ixgbe_start(struct ifnet *ifp)
234 {
235 struct adapter *adapter = ifp->if_softc;
236 struct tx_ring *txr = adapter->tx_rings;
237
238 if (ifp->if_flags & IFF_RUNNING) {
239 IXGBE_TX_LOCK(txr);
240 ixgbe_start_locked(txr, ifp);
241 IXGBE_TX_UNLOCK(txr);
242 }
243 return;
244 }
245
246 #else /* ! IXGBE_LEGACY_TX */
247
248 /*
249 ** Multiqueue Transmit driver
250 **
251 */
252 int
253 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
254 {
255 struct adapter *adapter = ifp->if_softc;
256 struct ix_queue *que;
257 struct tx_ring *txr;
258 int i, err = 0;
259 #ifdef RSS
260 uint32_t bucket_id;
261 #endif
262
263 /*
264 * When doing RSS, map it to the same outbound queue
265 * as the incoming flow would be mapped to.
266 *
267 * If everything is setup correctly, it should be the
268 * same bucket that the current CPU we're on is.
269 */
270 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
271 #ifdef RSS
272 if (rss_hash2bucket(m->m_pkthdr.flowid,
273 M_HASHTYPE_GET(m), &bucket_id) == 0)
274 /* TODO: spit out something if bucket_id > num_queues? */
275 i = bucket_id % adapter->num_queues;
276 else
277 #endif
278 i = m->m_pkthdr.flowid % adapter->num_queues;
279 } else
280 i = curcpu % adapter->num_queues;
281
282 /* Check for a hung queue and pick alternative */
283 if (((1 << i) & adapter->active_queues) == 0)
284 i = ffsl(adapter->active_queues);
285
286 txr = &adapter->tx_rings[i];
287 que = &adapter->queues[i];
288
289 err = drbr_enqueue(ifp, txr->br, m);
290 if (err)
291 return (err);
292 if (IXGBE_TX_TRYLOCK(txr)) {
293 ixgbe_mq_start_locked(ifp, txr);
294 IXGBE_TX_UNLOCK(txr);
295 } else
296 softint_schedule(txr->txq_si);
297
298 return (0);
299 }
300
301 int
302 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
303 {
304 struct adapter *adapter = txr->adapter;
305 struct mbuf *next;
306 int enqueued = 0, err = 0;
307
308 if (((ifp->if_flags & IFF_RUNNING) == 0) ||
309 adapter->link_active == 0)
310 return (ENETDOWN);
311
312 /* Process the queue */
313 #if __FreeBSD_version < 901504
314 next = drbr_dequeue(ifp, txr->br);
315 while (next != NULL) {
316 if ((err = ixgbe_xmit(txr, &next)) != 0) {
317 if (next != NULL)
318 err = drbr_enqueue(ifp, txr->br, next);
319 #else
320 while ((next = drbr_peek(ifp, txr->br)) != NULL) {
321 if ((err = ixgbe_xmit(txr, &next)) != 0) {
322 if (next == NULL) {
323 drbr_advance(ifp, txr->br);
324 } else {
325 drbr_putback(ifp, txr->br, next);
326 }
327 #endif
328 break;
329 }
330 #if __FreeBSD_version >= 901504
331 drbr_advance(ifp, txr->br);
332 #endif
333 enqueued++;
334 #if 0 // this is VF-only
335 #if __FreeBSD_version >= 1100036
336 /*
337 * Since we're looking at the tx ring, we can check
338 * to see if we're a VF by examing our tail register
339 * address.
340 */
341 if (txr->tail < IXGBE_TDT(0) && next->m_flags & M_MCAST)
342 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
343 #endif
344 #endif
345 /* Send a copy of the frame to the BPF listener */
346 bpf_mtap(ifp, next);
347 if ((ifp->if_flags & IFF_RUNNING) == 0)
348 break;
349 #if __FreeBSD_version < 901504
350 next = drbr_dequeue(ifp, txr->br);
351 #endif
352 }
353
354 if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD)
355 ixgbe_txeof(txr);
356
357 return (err);
358 }
359
360 /*
361 * Called from a taskqueue to drain queued transmit packets.
362 */
363 void
364 ixgbe_deferred_mq_start(void *arg, int pending)
365 {
366 struct tx_ring *txr = arg;
367 struct adapter *adapter = txr->adapter;
368 struct ifnet *ifp = adapter->ifp;
369
370 IXGBE_TX_LOCK(txr);
371 if (!drbr_empty(ifp, txr->br))
372 ixgbe_mq_start_locked(ifp, txr);
373 IXGBE_TX_UNLOCK(txr);
374 }
375
376 /*
377 * Flush all ring buffers
378 */
379 void
380 ixgbe_qflush(struct ifnet *ifp)
381 {
382 struct adapter *adapter = ifp->if_softc;
383 struct tx_ring *txr = adapter->tx_rings;
384 struct mbuf *m;
385
386 for (int i = 0; i < adapter->num_queues; i++, txr++) {
387 IXGBE_TX_LOCK(txr);
388 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
389 m_freem(m);
390 IXGBE_TX_UNLOCK(txr);
391 }
392 if_qflush(ifp);
393 }
394 #endif /* IXGBE_LEGACY_TX */
395
396
397 /*********************************************************************
398 *
399 * This routine maps the mbufs to tx descriptors, allowing the
400 * TX engine to transmit the packets.
401 * - return 0 on success, positive on failure
402 *
403 **********************************************************************/
404
405 static int
406 ixgbe_xmit(struct tx_ring *txr, struct mbuf *m_head)
407 {
408 struct m_tag *mtag;
409 struct adapter *adapter = txr->adapter;
410 struct ethercom *ec = &adapter->osdep.ec;
411 u32 olinfo_status = 0, cmd_type_len;
412 int i, j, error;
413 int first;
414 bus_dmamap_t map;
415 struct ixgbe_tx_buf *txbuf;
416 union ixgbe_adv_tx_desc *txd = NULL;
417
418 /* Basic descriptor defines */
419 cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
420 IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
421
422 if ((mtag = VLAN_OUTPUT_TAG(ec, m_head)) != NULL)
423 cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
424
425 /*
426 * Important to capture the first descriptor
427 * used because it will contain the index of
428 * the one we tell the hardware to report back
429 */
430 first = txr->next_avail_desc;
431 txbuf = &txr->tx_buffers[first];
432 map = txbuf->map;
433
434 /*
435 * Map the packet for DMA.
436 */
437 error = bus_dmamap_load_mbuf(txr->txtag->dt_dmat, map,
438 m_head, BUS_DMA_NOWAIT);
439
440 if (__predict_false(error)) {
441
442 switch (error) {
443 case EAGAIN:
444 adapter->eagain_tx_dma_setup.ev_count++;
445 return EAGAIN;
446 case ENOMEM:
447 adapter->enomem_tx_dma_setup.ev_count++;
448 return EAGAIN;
449 case EFBIG:
450 /*
451 * XXX Try it again?
452 * do m_defrag() and retry bus_dmamap_load_mbuf().
453 */
454 adapter->efbig_tx_dma_setup.ev_count++;
455 return error;
456 case EINVAL:
457 adapter->einval_tx_dma_setup.ev_count++;
458 return error;
459 default:
460 adapter->other_tx_dma_setup.ev_count++;
461 return error;
462 }
463 }
464
465 /* Make certain there are enough descriptors */
466 if (map->dm_nsegs > txr->tx_avail - 2) {
467 txr->no_desc_avail.ev_count++;
468 ixgbe_dmamap_unload(txr->txtag, txbuf->map);
469 return EAGAIN;
470 }
471
472 /*
473 * Set up the appropriate offload context
474 * this will consume the first descriptor
475 */
476 error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
477 if (__predict_false(error)) {
478 return (error);
479 }
480
481 #ifdef IXGBE_FDIR
482 /* Do the flow director magic */
483 if ((txr->atr_sample) && (!adapter->fdir_reinit)) {
484 ++txr->atr_count;
485 if (txr->atr_count >= atr_sample_rate) {
486 ixgbe_atr(txr, m_head);
487 txr->atr_count = 0;
488 }
489 }
490 #endif
491
492 i = txr->next_avail_desc;
493 for (j = 0; j < map->dm_nsegs; j++) {
494 bus_size_t seglen;
495 bus_addr_t segaddr;
496
497 txbuf = &txr->tx_buffers[i];
498 txd = &txr->tx_base[i];
499 seglen = map->dm_segs[j].ds_len;
500 segaddr = htole64(map->dm_segs[j].ds_addr);
501
502 txd->read.buffer_addr = segaddr;
503 txd->read.cmd_type_len = htole32(txr->txd_cmd |
504 cmd_type_len |seglen);
505 txd->read.olinfo_status = htole32(olinfo_status);
506
507 if (++i == txr->num_desc)
508 i = 0;
509 }
510
511 txd->read.cmd_type_len |=
512 htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
513 txr->tx_avail -= map->dm_nsegs;
514 txr->next_avail_desc = i;
515
516 txbuf->m_head = m_head;
517 /*
518 * Here we swap the map so the last descriptor,
519 * which gets the completion interrupt has the
520 * real map, and the first descriptor gets the
521 * unused map from this descriptor.
522 */
523 txr->tx_buffers[first].map = txbuf->map;
524 txbuf->map = map;
525 bus_dmamap_sync(txr->txtag->dt_dmat, map, 0, m_head->m_pkthdr.len,
526 BUS_DMASYNC_PREWRITE);
527
528 /* Set the EOP descriptor that will be marked done */
529 txbuf = &txr->tx_buffers[first];
530 txbuf->eop = txd;
531
532 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
533 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
534 /*
535 * Advance the Transmit Descriptor Tail (Tdt), this tells the
536 * hardware that this frame is available to transmit.
537 */
538 ++txr->total_packets.ev_count;
539 IXGBE_WRITE_REG(&adapter->hw, txr->tail, i);
540
541 /* Mark queue as having work */
542 if (txr->busy == 0)
543 txr->busy = 1;
544
545 return 0;
546 }
547
548 /*********************************************************************
549 *
550 * Allocate memory for tx_buffer structures. The tx_buffer stores all
551 * the information needed to transmit a packet on the wire. This is
552 * called only once at attach, setup is done every reset.
553 *
554 **********************************************************************/
555 int
556 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
557 {
558 struct adapter *adapter = txr->adapter;
559 device_t dev = adapter->dev;
560 struct ixgbe_tx_buf *txbuf;
561 int error, i;
562
563 /*
564 * Setup DMA descriptor areas.
565 */
566 if ((error = ixgbe_dma_tag_create(adapter->osdep.dmat, /* parent */
567 1, 0, /* alignment, bounds */
568 IXGBE_TSO_SIZE, /* maxsize */
569 adapter->num_segs, /* nsegments */
570 PAGE_SIZE, /* maxsegsize */
571 0, /* flags */
572 &txr->txtag))) {
573 aprint_error_dev(dev,"Unable to allocate TX DMA tag\n");
574 goto fail;
575 }
576
577 if (!(txr->tx_buffers =
578 (struct ixgbe_tx_buf *) malloc(sizeof(struct ixgbe_tx_buf) *
579 adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
580 aprint_error_dev(dev, "Unable to allocate tx_buffer memory\n");
581 error = ENOMEM;
582 goto fail;
583 }
584
585 /* Create the descriptor buffer dma maps */
586 txbuf = txr->tx_buffers;
587 for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
588 error = ixgbe_dmamap_create(txr->txtag, 0, &txbuf->map);
589 if (error != 0) {
590 aprint_error_dev(dev,
591 "Unable to create TX DMA map (%d)\n", error);
592 goto fail;
593 }
594 }
595
596 return 0;
597 fail:
598 /* We free all, it handles case where we are in the middle */
599 ixgbe_free_transmit_structures(adapter);
600 return (error);
601 }
602
603 /*********************************************************************
604 *
605 * Initialize a transmit ring.
606 *
607 **********************************************************************/
608 static void
609 ixgbe_setup_transmit_ring(struct tx_ring *txr)
610 {
611 struct adapter *adapter = txr->adapter;
612 struct ixgbe_tx_buf *txbuf;
613 #ifdef DEV_NETMAP
614 struct netmap_adapter *na = NA(adapter->ifp);
615 struct netmap_slot *slot;
616 #endif /* DEV_NETMAP */
617
618 /* Clear the old ring contents */
619 IXGBE_TX_LOCK(txr);
620 #ifdef DEV_NETMAP
621 /*
622 * (under lock): if in netmap mode, do some consistency
623 * checks and set slot to entry 0 of the netmap ring.
624 */
625 slot = netmap_reset(na, NR_TX, txr->me, 0);
626 #endif /* DEV_NETMAP */
627 bzero((void *)txr->tx_base,
628 (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
629 /* Reset indices */
630 txr->next_avail_desc = 0;
631 txr->next_to_clean = 0;
632
633 /* Free any existing tx buffers. */
634 txbuf = txr->tx_buffers;
635 for (int i = 0; i < txr->num_desc; i++, txbuf++) {
636 if (txbuf->m_head != NULL) {
637 bus_dmamap_sync(txr->txtag->dt_dmat, txbuf->map,
638 0, txbuf->m_head->m_pkthdr.len,
639 BUS_DMASYNC_POSTWRITE);
640 ixgbe_dmamap_unload(txr->txtag, txbuf->map);
641 m_freem(txbuf->m_head);
642 txbuf->m_head = NULL;
643 }
644 #ifdef DEV_NETMAP
645 /*
646 * In netmap mode, set the map for the packet buffer.
647 * NOTE: Some drivers (not this one) also need to set
648 * the physical buffer address in the NIC ring.
649 * Slots in the netmap ring (indexed by "si") are
650 * kring->nkr_hwofs positions "ahead" wrt the
651 * corresponding slot in the NIC ring. In some drivers
652 * (not here) nkr_hwofs can be negative. Function
653 * netmap_idx_n2k() handles wraparounds properly.
654 */
655 if (slot) {
656 int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
657 netmap_load_map(na, txr->txtag,
658 txbuf->map, NMB(na, slot + si));
659 }
660 #endif /* DEV_NETMAP */
661 /* Clear the EOP descriptor pointer */
662 txbuf->eop = NULL;
663 }
664
665 #ifdef IXGBE_FDIR
666 /* Set the rate at which we sample packets */
667 if (adapter->hw.mac.type != ixgbe_mac_82598EB)
668 txr->atr_sample = atr_sample_rate;
669 #endif
670
671 /* Set number of descriptors available */
672 txr->tx_avail = adapter->num_tx_desc;
673
674 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
675 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
676 IXGBE_TX_UNLOCK(txr);
677 }
678
679 /*********************************************************************
680 *
681 * Initialize all transmit rings.
682 *
683 **********************************************************************/
684 int
685 ixgbe_setup_transmit_structures(struct adapter *adapter)
686 {
687 struct tx_ring *txr = adapter->tx_rings;
688
689 for (int i = 0; i < adapter->num_queues; i++, txr++)
690 ixgbe_setup_transmit_ring(txr);
691
692 return (0);
693 }
694
695 /*********************************************************************
696 *
697 * Free all transmit rings.
698 *
699 **********************************************************************/
700 void
701 ixgbe_free_transmit_structures(struct adapter *adapter)
702 {
703 struct tx_ring *txr = adapter->tx_rings;
704
705 for (int i = 0; i < adapter->num_queues; i++, txr++) {
706 ixgbe_free_transmit_buffers(txr);
707 ixgbe_dma_free(adapter, &txr->txdma);
708 IXGBE_TX_LOCK_DESTROY(txr);
709 }
710 free(adapter->tx_rings, M_DEVBUF);
711 }
712
713 /*********************************************************************
714 *
715 * Free transmit ring related data structures.
716 *
717 **********************************************************************/
718 static void
719 ixgbe_free_transmit_buffers(struct tx_ring *txr)
720 {
721 struct adapter *adapter = txr->adapter;
722 struct ixgbe_tx_buf *tx_buffer;
723 int i;
724
725 INIT_DEBUGOUT("ixgbe_free_transmit_ring: begin");
726
727 if (txr->tx_buffers == NULL)
728 return;
729
730 tx_buffer = txr->tx_buffers;
731 for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
732 if (tx_buffer->m_head != NULL) {
733 bus_dmamap_sync(txr->txtag->dt_dmat, tx_buffer->map,
734 0, tx_buffer->m_head->m_pkthdr.len,
735 BUS_DMASYNC_POSTWRITE);
736 ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
737 m_freem(tx_buffer->m_head);
738 tx_buffer->m_head = NULL;
739 if (tx_buffer->map != NULL) {
740 ixgbe_dmamap_destroy(txr->txtag,
741 tx_buffer->map);
742 tx_buffer->map = NULL;
743 }
744 } else if (tx_buffer->map != NULL) {
745 ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
746 ixgbe_dmamap_destroy(txr->txtag, tx_buffer->map);
747 tx_buffer->map = NULL;
748 }
749 }
750 #ifndef IXGBE_LEGACY_TX
751 if (txr->br != NULL)
752 buf_ring_free(txr->br, M_DEVBUF);
753 #endif
754 if (txr->tx_buffers != NULL) {
755 free(txr->tx_buffers, M_DEVBUF);
756 txr->tx_buffers = NULL;
757 }
758 if (txr->txtag != NULL) {
759 ixgbe_dma_tag_destroy(txr->txtag);
760 txr->txtag = NULL;
761 }
762 return;
763 }
764
765 /*********************************************************************
766 *
767 * Advanced Context Descriptor setup for VLAN, CSUM or TSO
768 *
769 **********************************************************************/
770
771 static int
772 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
773 u32 *cmd_type_len, u32 *olinfo_status)
774 {
775 struct adapter *adapter = txr->adapter;
776 struct ethercom *ec = &adapter->osdep.ec;
777 struct m_tag *mtag;
778 struct ixgbe_adv_tx_context_desc *TXD;
779 struct ether_vlan_header *eh;
780 struct ip ip;
781 struct ip6_hdr ip6;
782 u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
783 int ehdrlen, ip_hlen = 0;
784 u16 etype;
785 u8 ipproto __diagused = 0;
786 int offload = TRUE;
787 int ctxd = txr->next_avail_desc;
788 u16 vtag = 0;
789
790 /* First check if TSO is to be used */
791 if (mp->m_pkthdr.csum_flags & (M_CSUM_TSOv4|M_CSUM_TSOv6))
792 return (ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status));
793
794 if ((mp->m_pkthdr.csum_flags & M_CSUM_OFFLOAD) == 0)
795 offload = FALSE;
796
797 /* Indicate the whole packet as payload when not doing TSO */
798 *olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
799
800 /* Now ready a context descriptor */
801 TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
802
803 /*
804 ** In advanced descriptors the vlan tag must
805 ** be placed into the context descriptor. Hence
806 ** we need to make one even if not doing offloads.
807 */
808 if ((mtag = VLAN_OUTPUT_TAG(ec, mp)) != NULL) {
809 vtag = htole16(VLAN_TAG_VALUE(mtag) & 0xffff);
810 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
811 } else if (!IXGBE_IS_X550VF(adapter) && (offload == FALSE))
812 return (0);
813
814 /*
815 * Determine where frame payload starts.
816 * Jump over vlan headers if already present,
817 * helpful for QinQ too.
818 */
819 KASSERT(mp->m_len >= offsetof(struct ether_vlan_header, evl_tag));
820 eh = mtod(mp, struct ether_vlan_header *);
821 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
822 KASSERT(mp->m_len >= sizeof(struct ether_vlan_header));
823 etype = ntohs(eh->evl_proto);
824 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
825 } else {
826 etype = ntohs(eh->evl_encap_proto);
827 ehdrlen = ETHER_HDR_LEN;
828 }
829
830 /* Set the ether header length */
831 vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
832
833 if (offload == FALSE)
834 goto no_offloads;
835
836 switch (etype) {
837 case ETHERTYPE_IP:
838 m_copydata(mp, ehdrlen, sizeof(ip), &ip);
839 ip_hlen = ip.ip_hl << 2;
840 ipproto = ip.ip_p;
841 #if 0
842 ip.ip_sum = 0;
843 m_copyback(mp, ehdrlen, sizeof(ip), &ip);
844 #else
845 KASSERT((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) == 0 ||
846 ip.ip_sum == 0);
847 #endif
848 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
849 break;
850 case ETHERTYPE_IPV6:
851 m_copydata(mp, ehdrlen, sizeof(ip6), &ip6);
852 ip_hlen = sizeof(ip6);
853 /* XXX-BZ this will go badly in case of ext hdrs. */
854 ipproto = ip6.ip6_nxt;
855 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
856 break;
857 default:
858 break;
859 }
860
861 if ((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) != 0)
862 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
863
864 vlan_macip_lens |= ip_hlen;
865
866 if (mp->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_TCPv6)) {
867 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
868 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
869 KASSERT(ipproto == IPPROTO_TCP);
870 } else if (mp->m_pkthdr.csum_flags & (M_CSUM_UDPv4|M_CSUM_UDPv6)) {
871 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
872 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
873 KASSERT(ipproto == IPPROTO_UDP);
874 }
875
876 no_offloads:
877 type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
878
879 /* Now copy bits into descriptor */
880 TXD->vlan_macip_lens = htole32(vlan_macip_lens);
881 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
882 TXD->seqnum_seed = htole32(0);
883 TXD->mss_l4len_idx = htole32(0);
884
885 /* We've consumed the first desc, adjust counters */
886 if (++ctxd == txr->num_desc)
887 ctxd = 0;
888 txr->next_avail_desc = ctxd;
889 --txr->tx_avail;
890
891 return 0;
892 }
893
894 /**********************************************************************
895 *
896 * Setup work for hardware segmentation offload (TSO) on
897 * adapters using advanced tx descriptors
898 *
899 **********************************************************************/
900 static int
901 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp,
902 u32 *cmd_type_len, u32 *olinfo_status)
903 {
904 struct m_tag *mtag;
905 struct adapter *adapter = txr->adapter;
906 struct ethercom *ec = &adapter->osdep.ec;
907 struct ixgbe_adv_tx_context_desc *TXD;
908 u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
909 u32 mss_l4len_idx = 0, paylen;
910 u16 vtag = 0, eh_type;
911 int ctxd, ehdrlen, ip_hlen, tcp_hlen;
912 struct ether_vlan_header *eh;
913 #ifdef INET6
914 struct ip6_hdr *ip6;
915 #endif
916 #ifdef INET
917 struct ip *ip;
918 #endif
919 struct tcphdr *th;
920
921
922 /*
923 * Determine where frame payload starts.
924 * Jump over vlan headers if already present
925 */
926 eh = mtod(mp, struct ether_vlan_header *);
927 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
928 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
929 eh_type = eh->evl_proto;
930 } else {
931 ehdrlen = ETHER_HDR_LEN;
932 eh_type = eh->evl_encap_proto;
933 }
934
935 switch (ntohs(eh_type)) {
936 #ifdef INET6
937 case ETHERTYPE_IPV6:
938 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
939 /* XXX-BZ For now we do not pretend to support ext. hdrs. */
940 if (ip6->ip6_nxt != IPPROTO_TCP)
941 return (ENXIO);
942 ip_hlen = sizeof(struct ip6_hdr);
943 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
944 th = (struct tcphdr *)((char *)ip6 + ip_hlen);
945 th->th_sum = in6_cksum_phdr(&ip6->ip6_src,
946 &ip6->ip6_dst, 0, htonl(IPPROTO_TCP));
947 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
948 break;
949 #endif
950 #ifdef INET
951 case ETHERTYPE_IP:
952 ip = (struct ip *)(mp->m_data + ehdrlen);
953 if (ip->ip_p != IPPROTO_TCP)
954 return (ENXIO);
955 ip->ip_sum = 0;
956 ip_hlen = ip->ip_hl << 2;
957 th = (struct tcphdr *)((char *)ip + ip_hlen);
958 th->th_sum = in_cksum_phdr(ip->ip_src.s_addr,
959 ip->ip_dst.s_addr, htons(IPPROTO_TCP));
960 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
961 /* Tell transmit desc to also do IPv4 checksum. */
962 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
963 break;
964 #endif
965 default:
966 panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
967 __func__, ntohs(eh_type));
968 break;
969 }
970
971 ctxd = txr->next_avail_desc;
972 TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
973
974 tcp_hlen = th->th_off << 2;
975
976 /* This is used in the transmit desc in encap */
977 paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
978
979 /* VLAN MACLEN IPLEN */
980 if ((mtag = VLAN_OUTPUT_TAG(ec, mp)) != NULL) {
981 vtag = htole16(VLAN_TAG_VALUE(mtag) & 0xffff);
982 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
983 }
984
985 vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
986 vlan_macip_lens |= ip_hlen;
987 TXD->vlan_macip_lens = htole32(vlan_macip_lens);
988
989 /* ADV DTYPE TUCMD */
990 type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
991 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
992 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
993
994 /* MSS L4LEN IDX */
995 mss_l4len_idx |= (mp->m_pkthdr.segsz << IXGBE_ADVTXD_MSS_SHIFT);
996 mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
997 TXD->mss_l4len_idx = htole32(mss_l4len_idx);
998
999 TXD->seqnum_seed = htole32(0);
1000
1001 if (++ctxd == txr->num_desc)
1002 ctxd = 0;
1003
1004 txr->tx_avail--;
1005 txr->next_avail_desc = ctxd;
1006 *cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
1007 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
1008 *olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
1009 ++txr->tso_tx.ev_count;
1010 return (0);
1011 }
1012
1013
1014 /**********************************************************************
1015 *
1016 * Examine each tx_buffer in the used queue. If the hardware is done
1017 * processing the packet then free associated resources. The
1018 * tx_buffer is put back on the free queue.
1019 *
1020 **********************************************************************/
1021 void
1022 ixgbe_txeof(struct tx_ring *txr)
1023 {
1024 struct adapter *adapter = txr->adapter;
1025 struct ifnet *ifp = adapter->ifp;
1026 u32 work, processed = 0;
1027 u32 limit = adapter->tx_process_limit;
1028 struct ixgbe_tx_buf *buf;
1029 union ixgbe_adv_tx_desc *txd;
1030
1031 KASSERT(mutex_owned(&txr->tx_mtx));
1032
1033 #ifdef DEV_NETMAP
1034 if (ifp->if_capenable & IFCAP_NETMAP) {
1035 struct netmap_adapter *na = NA(ifp);
1036 struct netmap_kring *kring = &na->tx_rings[txr->me];
1037 txd = txr->tx_base;
1038 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1039 BUS_DMASYNC_POSTREAD);
1040 /*
1041 * In netmap mode, all the work is done in the context
1042 * of the client thread. Interrupt handlers only wake up
1043 * clients, which may be sleeping on individual rings
1044 * or on a global resource for all rings.
1045 * To implement tx interrupt mitigation, we wake up the client
1046 * thread roughly every half ring, even if the NIC interrupts
1047 * more frequently. This is implemented as follows:
1048 * - ixgbe_txsync() sets kring->nr_kflags with the index of
1049 * the slot that should wake up the thread (nkr_num_slots
1050 * means the user thread should not be woken up);
1051 * - the driver ignores tx interrupts unless netmap_mitigate=0
1052 * or the slot has the DD bit set.
1053 */
1054 if (!netmap_mitigate ||
1055 (kring->nr_kflags < kring->nkr_num_slots &&
1056 txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD)) {
1057 netmap_tx_irq(ifp, txr->me);
1058 }
1059 return;
1060 }
1061 #endif /* DEV_NETMAP */
1062
1063 if (txr->tx_avail == txr->num_desc) {
1064 txr->busy = 0;
1065 return;
1066 }
1067
1068 /* Get work starting point */
1069 work = txr->next_to_clean;
1070 buf = &txr->tx_buffers[work];
1071 txd = &txr->tx_base[work];
1072 work -= txr->num_desc; /* The distance to ring end */
1073 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1074 BUS_DMASYNC_POSTREAD);
1075 do {
1076 union ixgbe_adv_tx_desc *eop= buf->eop;
1077 if (eop == NULL) /* No work */
1078 break;
1079
1080 if ((eop->wb.status & IXGBE_TXD_STAT_DD) == 0)
1081 break; /* I/O not complete */
1082
1083 if (buf->m_head) {
1084 txr->bytes +=
1085 buf->m_head->m_pkthdr.len;
1086 bus_dmamap_sync(txr->txtag->dt_dmat,
1087 buf->map,
1088 0, buf->m_head->m_pkthdr.len,
1089 BUS_DMASYNC_POSTWRITE);
1090 ixgbe_dmamap_unload(txr->txtag,
1091 buf->map);
1092 m_freem(buf->m_head);
1093 buf->m_head = NULL;
1094 }
1095 buf->eop = NULL;
1096 ++txr->tx_avail;
1097
1098 /* We clean the range if multi segment */
1099 while (txd != eop) {
1100 ++txd;
1101 ++buf;
1102 ++work;
1103 /* wrap the ring? */
1104 if (__predict_false(!work)) {
1105 work -= txr->num_desc;
1106 buf = txr->tx_buffers;
1107 txd = txr->tx_base;
1108 }
1109 if (buf->m_head) {
1110 txr->bytes +=
1111 buf->m_head->m_pkthdr.len;
1112 bus_dmamap_sync(txr->txtag->dt_dmat,
1113 buf->map,
1114 0, buf->m_head->m_pkthdr.len,
1115 BUS_DMASYNC_POSTWRITE);
1116 ixgbe_dmamap_unload(txr->txtag,
1117 buf->map);
1118 m_freem(buf->m_head);
1119 buf->m_head = NULL;
1120 }
1121 ++txr->tx_avail;
1122 buf->eop = NULL;
1123
1124 }
1125 ++txr->packets;
1126 ++processed;
1127 ++ifp->if_opackets;
1128
1129 /* Try the next packet */
1130 ++txd;
1131 ++buf;
1132 ++work;
1133 /* reset with a wrap */
1134 if (__predict_false(!work)) {
1135 work -= txr->num_desc;
1136 buf = txr->tx_buffers;
1137 txd = txr->tx_base;
1138 }
1139 prefetch(txd);
1140 } while (__predict_true(--limit));
1141
1142 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1143 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1144
1145 work += txr->num_desc;
1146 txr->next_to_clean = work;
1147
1148 /*
1149 ** Queue Hang detection, we know there's
1150 ** work outstanding or the first return
1151 ** would have been taken, so increment busy
1152 ** if nothing managed to get cleaned, then
1153 ** in local_timer it will be checked and
1154 ** marked as HUNG if it exceeds a MAX attempt.
1155 */
1156 if ((processed == 0) && (txr->busy != IXGBE_QUEUE_HUNG))
1157 ++txr->busy;
1158 /*
1159 ** If anything gets cleaned we reset state to 1,
1160 ** note this will turn off HUNG if its set.
1161 */
1162 if (processed)
1163 txr->busy = 1;
1164
1165 if (txr->tx_avail == txr->num_desc)
1166 txr->busy = 0;
1167
1168 return;
1169 }
1170
1171
1172 #ifdef IXGBE_FDIR
1173 /*
1174 ** This routine parses packet headers so that Flow
1175 ** Director can make a hashed filter table entry
1176 ** allowing traffic flows to be identified and kept
1177 ** on the same cpu. This would be a performance
1178 ** hit, but we only do it at IXGBE_FDIR_RATE of
1179 ** packets.
1180 */
1181 static void
1182 ixgbe_atr(struct tx_ring *txr, struct mbuf *mp)
1183 {
1184 struct adapter *adapter = txr->adapter;
1185 struct ix_queue *que;
1186 struct ip *ip;
1187 struct tcphdr *th;
1188 struct udphdr *uh;
1189 struct ether_vlan_header *eh;
1190 union ixgbe_atr_hash_dword input = {.dword = 0};
1191 union ixgbe_atr_hash_dword common = {.dword = 0};
1192 int ehdrlen, ip_hlen;
1193 u16 etype;
1194
1195 eh = mtod(mp, struct ether_vlan_header *);
1196 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
1197 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
1198 etype = eh->evl_proto;
1199 } else {
1200 ehdrlen = ETHER_HDR_LEN;
1201 etype = eh->evl_encap_proto;
1202 }
1203
1204 /* Only handling IPv4 */
1205 if (etype != htons(ETHERTYPE_IP))
1206 return;
1207
1208 ip = (struct ip *)(mp->m_data + ehdrlen);
1209 ip_hlen = ip->ip_hl << 2;
1210
1211 /* check if we're UDP or TCP */
1212 switch (ip->ip_p) {
1213 case IPPROTO_TCP:
1214 th = (struct tcphdr *)((char *)ip + ip_hlen);
1215 /* src and dst are inverted */
1216 common.port.dst ^= th->th_sport;
1217 common.port.src ^= th->th_dport;
1218 input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_TCPV4;
1219 break;
1220 case IPPROTO_UDP:
1221 uh = (struct udphdr *)((char *)ip + ip_hlen);
1222 /* src and dst are inverted */
1223 common.port.dst ^= uh->uh_sport;
1224 common.port.src ^= uh->uh_dport;
1225 input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_UDPV4;
1226 break;
1227 default:
1228 return;
1229 }
1230
1231 input.formatted.vlan_id = htobe16(mp->m_pkthdr.ether_vtag);
1232 if (mp->m_pkthdr.ether_vtag)
1233 common.flex_bytes ^= htons(ETHERTYPE_VLAN);
1234 else
1235 common.flex_bytes ^= etype;
1236 common.ip ^= ip->ip_src.s_addr ^ ip->ip_dst.s_addr;
1237
1238 que = &adapter->queues[txr->me];
1239 /*
1240 ** This assumes the Rx queue and Tx
1241 ** queue are bound to the same CPU
1242 */
1243 ixgbe_fdir_add_signature_filter_82599(&adapter->hw,
1244 input, common, que->msix);
1245 }
1246 #endif /* IXGBE_FDIR */
1247
1248 /*
1249 ** Used to detect a descriptor that has
1250 ** been merged by Hardware RSC.
1251 */
1252 static inline u32
1253 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1254 {
1255 return (le32toh(rx->wb.lower.lo_dword.data) &
1256 IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1257 }
1258
1259 /*********************************************************************
1260 *
1261 * Initialize Hardware RSC (LRO) feature on 82599
1262 * for an RX ring, this is toggled by the LRO capability
1263 * even though it is transparent to the stack.
1264 *
1265 * NOTE: since this HW feature only works with IPV4 and
1266 * our testing has shown soft LRO to be as effective
1267 * I have decided to disable this by default.
1268 *
1269 **********************************************************************/
1270 static void
1271 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
1272 {
1273 struct adapter *adapter = rxr->adapter;
1274 struct ixgbe_hw *hw = &adapter->hw;
1275 u32 rscctrl, rdrxctl;
1276
1277 /* If turning LRO/RSC off we need to disable it */
1278 if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) {
1279 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1280 rscctrl &= ~IXGBE_RSCCTL_RSCEN;
1281 return;
1282 }
1283
1284 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
1285 rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
1286 #ifdef DEV_NETMAP /* crcstrip is optional in netmap */
1287 if (adapter->ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip)
1288 #endif /* DEV_NETMAP */
1289 rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
1290 rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
1291 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
1292
1293 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1294 rscctrl |= IXGBE_RSCCTL_RSCEN;
1295 /*
1296 ** Limit the total number of descriptors that
1297 ** can be combined, so it does not exceed 64K
1298 */
1299 if (rxr->mbuf_sz == MCLBYTES)
1300 rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
1301 else if (rxr->mbuf_sz == MJUMPAGESIZE)
1302 rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
1303 else if (rxr->mbuf_sz == MJUM9BYTES)
1304 rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
1305 else /* Using 16K cluster */
1306 rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
1307
1308 IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
1309
1310 /* Enable TCP header recognition */
1311 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
1312 (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) |
1313 IXGBE_PSRTYPE_TCPHDR));
1314
1315 /* Disable RSC for ACK packets */
1316 IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
1317 (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
1318
1319 rxr->hw_rsc = TRUE;
1320 }
1321 /*********************************************************************
1322 *
1323 * Refresh mbuf buffers for RX descriptor rings
1324 * - now keeps its own state so discards due to resource
1325 * exhaustion are unnecessary, if an mbuf cannot be obtained
1326 * it just returns, keeping its placeholder, thus it can simply
1327 * be recalled to try again.
1328 *
1329 **********************************************************************/
1330 static void
1331 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
1332 {
1333 struct adapter *adapter = rxr->adapter;
1334 struct ixgbe_rx_buf *rxbuf;
1335 struct mbuf *mp;
1336 int i, j, error;
1337 bool refreshed = false;
1338
1339 i = j = rxr->next_to_refresh;
1340 /* Control the loop with one beyond */
1341 if (++j == rxr->num_desc)
1342 j = 0;
1343
1344 while (j != limit) {
1345 rxbuf = &rxr->rx_buffers[i];
1346 if (rxbuf->buf == NULL) {
1347 mp = ixgbe_getjcl(&adapter->jcl_head, M_NOWAIT,
1348 MT_DATA, M_PKTHDR, rxr->mbuf_sz);
1349 if (mp == NULL) {
1350 rxr->no_jmbuf.ev_count++;
1351 goto update;
1352 }
1353 if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
1354 m_adj(mp, ETHER_ALIGN);
1355 } else
1356 mp = rxbuf->buf;
1357
1358 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1359
1360 /* If we're dealing with an mbuf that was copied rather
1361 * than replaced, there's no need to go through busdma.
1362 */
1363 if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
1364 /* Get the memory mapping */
1365 ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
1366 error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat,
1367 rxbuf->pmap, mp, BUS_DMA_NOWAIT);
1368 if (error != 0) {
1369 printf("Refresh mbufs: payload dmamap load"
1370 " failure - %d\n", error);
1371 m_free(mp);
1372 rxbuf->buf = NULL;
1373 goto update;
1374 }
1375 rxbuf->buf = mp;
1376 bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
1377 0, mp->m_pkthdr.len, BUS_DMASYNC_PREREAD);
1378 rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
1379 htole64(rxbuf->pmap->dm_segs[0].ds_addr);
1380 } else {
1381 rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
1382 rxbuf->flags &= ~IXGBE_RX_COPY;
1383 }
1384
1385 refreshed = true;
1386 /* Next is precalculated */
1387 i = j;
1388 rxr->next_to_refresh = i;
1389 if (++j == rxr->num_desc)
1390 j = 0;
1391 }
1392 update:
1393 if (refreshed) /* Update hardware tail index */
1394 IXGBE_WRITE_REG(&adapter->hw,
1395 rxr->tail, rxr->next_to_refresh);
1396 return;
1397 }
1398
1399 /*********************************************************************
1400 *
1401 * Allocate memory for rx_buffer structures. Since we use one
1402 * rx_buffer per received packet, the maximum number of rx_buffer's
1403 * that we'll need is equal to the number of receive descriptors
1404 * that we've allocated.
1405 *
1406 **********************************************************************/
1407 int
1408 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
1409 {
1410 struct adapter *adapter = rxr->adapter;
1411 device_t dev = adapter->dev;
1412 struct ixgbe_rx_buf *rxbuf;
1413 int bsize, error;
1414
1415 bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
1416 if (!(rxr->rx_buffers =
1417 (struct ixgbe_rx_buf *) malloc(bsize,
1418 M_DEVBUF, M_NOWAIT | M_ZERO))) {
1419 aprint_error_dev(dev, "Unable to allocate rx_buffer memory\n");
1420 error = ENOMEM;
1421 goto fail;
1422 }
1423
1424 if ((error = ixgbe_dma_tag_create(adapter->osdep.dmat, /* parent */
1425 1, 0, /* alignment, bounds */
1426 MJUM16BYTES, /* maxsize */
1427 1, /* nsegments */
1428 MJUM16BYTES, /* maxsegsize */
1429 0, /* flags */
1430 &rxr->ptag))) {
1431 aprint_error_dev(dev, "Unable to create RX DMA tag\n");
1432 goto fail;
1433 }
1434
1435 for (int i = 0; i < rxr->num_desc; i++, rxbuf++) {
1436 rxbuf = &rxr->rx_buffers[i];
1437 error = ixgbe_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
1438 if (error) {
1439 aprint_error_dev(dev, "Unable to create RX dma map\n");
1440 goto fail;
1441 }
1442 }
1443
1444 return (0);
1445
1446 fail:
1447 /* Frees all, but can handle partial completion */
1448 ixgbe_free_receive_structures(adapter);
1449 return (error);
1450 }
1451
1452
1453 static void
1454 ixgbe_free_receive_ring(struct rx_ring *rxr)
1455 {
1456 struct ixgbe_rx_buf *rxbuf;
1457
1458 for (int i = 0; i < rxr->num_desc; i++) {
1459 rxbuf = &rxr->rx_buffers[i];
1460 if (rxbuf->buf != NULL) {
1461 bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
1462 0, rxbuf->buf->m_pkthdr.len,
1463 BUS_DMASYNC_POSTREAD);
1464 ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
1465 rxbuf->buf->m_flags |= M_PKTHDR;
1466 m_freem(rxbuf->buf);
1467 rxbuf->buf = NULL;
1468 rxbuf->flags = 0;
1469 }
1470 }
1471 }
1472
1473
1474 /*********************************************************************
1475 *
1476 * Initialize a receive ring and its buffers.
1477 *
1478 **********************************************************************/
1479 static int
1480 ixgbe_setup_receive_ring(struct rx_ring *rxr)
1481 {
1482 struct adapter *adapter;
1483 struct ixgbe_rx_buf *rxbuf;
1484 #ifdef LRO
1485 struct ifnet *ifp;
1486 struct lro_ctrl *lro = &rxr->lro;
1487 #endif /* LRO */
1488 int rsize, error = 0;
1489 #ifdef DEV_NETMAP
1490 struct netmap_adapter *na = NA(rxr->adapter->ifp);
1491 struct netmap_slot *slot;
1492 #endif /* DEV_NETMAP */
1493
1494 adapter = rxr->adapter;
1495 #ifdef LRO
1496 ifp = adapter->ifp;
1497 #endif /* LRO */
1498
1499 /* Clear the ring contents */
1500 IXGBE_RX_LOCK(rxr);
1501 #ifdef DEV_NETMAP
1502 /* same as in ixgbe_setup_transmit_ring() */
1503 slot = netmap_reset(na, NR_RX, rxr->me, 0);
1504 #endif /* DEV_NETMAP */
1505 rsize = roundup2(adapter->num_rx_desc *
1506 sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
1507 bzero((void *)rxr->rx_base, rsize);
1508 /* Cache the size */
1509 rxr->mbuf_sz = adapter->rx_mbuf_sz;
1510
1511 /* Free current RX buffer structs and their mbufs */
1512 ixgbe_free_receive_ring(rxr);
1513
1514 IXGBE_RX_UNLOCK(rxr);
1515
1516 /* Now reinitialize our supply of jumbo mbufs. The number
1517 * or size of jumbo mbufs may have changed.
1518 */
1519 ixgbe_jcl_reinit(&adapter->jcl_head, rxr->ptag->dt_dmat,
1520 2 * adapter->num_rx_desc, adapter->rx_mbuf_sz);
1521
1522 IXGBE_RX_LOCK(rxr);
1523
1524 /* Now replenish the mbufs */
1525 for (int j = 0; j != rxr->num_desc; ++j) {
1526 struct mbuf *mp;
1527
1528 rxbuf = &rxr->rx_buffers[j];
1529 #ifdef DEV_NETMAP
1530 /*
1531 * In netmap mode, fill the map and set the buffer
1532 * address in the NIC ring, considering the offset
1533 * between the netmap and NIC rings (see comment in
1534 * ixgbe_setup_transmit_ring() ). No need to allocate
1535 * an mbuf, so end the block with a continue;
1536 */
1537 if (slot) {
1538 int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
1539 uint64_t paddr;
1540 void *addr;
1541
1542 addr = PNMB(na, slot + sj, &paddr);
1543 netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
1544 /* Update descriptor and the cached value */
1545 rxr->rx_base[j].read.pkt_addr = htole64(paddr);
1546 rxbuf->addr = htole64(paddr);
1547 continue;
1548 }
1549 #endif /* DEV_NETMAP */
1550 rxbuf->flags = 0;
1551 rxbuf->buf = ixgbe_getjcl(&adapter->jcl_head, M_NOWAIT,
1552 MT_DATA, M_PKTHDR, adapter->rx_mbuf_sz);
1553 if (rxbuf->buf == NULL) {
1554 error = ENOBUFS;
1555 goto fail;
1556 }
1557 mp = rxbuf->buf;
1558 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1559 /* Get the memory mapping */
1560 error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat,
1561 rxbuf->pmap, mp, BUS_DMA_NOWAIT);
1562 if (error != 0)
1563 goto fail;
1564 bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
1565 0, adapter->rx_mbuf_sz, BUS_DMASYNC_PREREAD);
1566 /* Update the descriptor and the cached value */
1567 rxr->rx_base[j].read.pkt_addr =
1568 htole64(rxbuf->pmap->dm_segs[0].ds_addr);
1569 rxbuf->addr = htole64(rxbuf->pmap->dm_segs[0].ds_addr);
1570 }
1571
1572
1573 /* Setup our descriptor indices */
1574 rxr->next_to_check = 0;
1575 rxr->next_to_refresh = 0;
1576 rxr->lro_enabled = FALSE;
1577 rxr->rx_copies.ev_count = 0;
1578 rxr->rx_bytes.ev_count = 0;
1579 rxr->vtag_strip = FALSE;
1580
1581 ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1582 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1583
1584 /*
1585 ** Now set up the LRO interface:
1586 */
1587 if (ixgbe_rsc_enable)
1588 ixgbe_setup_hw_rsc(rxr);
1589 #ifdef LRO
1590 else if (ifp->if_capenable & IFCAP_LRO) {
1591 device_t dev = adapter->dev;
1592 int err = tcp_lro_init(lro);
1593 if (err) {
1594 device_printf(dev, "LRO Initialization failed!\n");
1595 goto fail;
1596 }
1597 INIT_DEBUGOUT("RX Soft LRO Initialized\n");
1598 rxr->lro_enabled = TRUE;
1599 lro->ifp = adapter->ifp;
1600 }
1601 #endif /* LRO */
1602
1603 IXGBE_RX_UNLOCK(rxr);
1604 return (0);
1605
1606 fail:
1607 ixgbe_free_receive_ring(rxr);
1608 IXGBE_RX_UNLOCK(rxr);
1609 return (error);
1610 }
1611
1612 /*********************************************************************
1613 *
1614 * Initialize all receive rings.
1615 *
1616 **********************************************************************/
1617 int
1618 ixgbe_setup_receive_structures(struct adapter *adapter)
1619 {
1620 struct rx_ring *rxr = adapter->rx_rings;
1621 int j;
1622
1623 for (j = 0; j < adapter->num_queues; j++, rxr++)
1624 if (ixgbe_setup_receive_ring(rxr))
1625 goto fail;
1626
1627 return (0);
1628 fail:
1629 /*
1630 * Free RX buffers allocated so far, we will only handle
1631 * the rings that completed, the failing case will have
1632 * cleaned up for itself. 'j' failed, so its the terminus.
1633 */
1634 for (int i = 0; i < j; ++i) {
1635 rxr = &adapter->rx_rings[i];
1636 ixgbe_free_receive_ring(rxr);
1637 }
1638
1639 return (ENOBUFS);
1640 }
1641
1642
1643 /*********************************************************************
1644 *
1645 * Free all receive rings.
1646 *
1647 **********************************************************************/
1648 void
1649 ixgbe_free_receive_structures(struct adapter *adapter)
1650 {
1651 struct rx_ring *rxr = adapter->rx_rings;
1652
1653 INIT_DEBUGOUT("ixgbe_free_receive_structures: begin");
1654
1655 for (int i = 0; i < adapter->num_queues; i++, rxr++) {
1656 #ifdef LRO
1657 struct lro_ctrl *lro = &rxr->lro;
1658 #endif /* LRO */
1659 ixgbe_free_receive_buffers(rxr);
1660 #ifdef LRO
1661 /* Free LRO memory */
1662 tcp_lro_free(lro);
1663 #endif /* LRO */
1664 /* Free the ring memory as well */
1665 ixgbe_dma_free(adapter, &rxr->rxdma);
1666 IXGBE_RX_LOCK_DESTROY(rxr);
1667 }
1668
1669 free(adapter->rx_rings, M_DEVBUF);
1670 }
1671
1672
1673 /*********************************************************************
1674 *
1675 * Free receive ring data structures
1676 *
1677 **********************************************************************/
1678 static void
1679 ixgbe_free_receive_buffers(struct rx_ring *rxr)
1680 {
1681 struct adapter *adapter = rxr->adapter;
1682 struct ixgbe_rx_buf *rxbuf;
1683
1684 INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin");
1685
1686 /* Cleanup any existing buffers */
1687 if (rxr->rx_buffers != NULL) {
1688 for (int i = 0; i < adapter->num_rx_desc; i++) {
1689 rxbuf = &rxr->rx_buffers[i];
1690 if (rxbuf->buf != NULL) {
1691 bus_dmamap_sync(rxr->ptag->dt_dmat,
1692 rxbuf->pmap, 0, rxbuf->buf->m_pkthdr.len,
1693 BUS_DMASYNC_POSTREAD);
1694 ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
1695 rxbuf->buf->m_flags |= M_PKTHDR;
1696 m_freem(rxbuf->buf);
1697 }
1698 rxbuf->buf = NULL;
1699 if (rxbuf->pmap != NULL) {
1700 ixgbe_dmamap_destroy(rxr->ptag, rxbuf->pmap);
1701 rxbuf->pmap = NULL;
1702 }
1703 }
1704 if (rxr->rx_buffers != NULL) {
1705 free(rxr->rx_buffers, M_DEVBUF);
1706 rxr->rx_buffers = NULL;
1707 }
1708 }
1709
1710 if (rxr->ptag != NULL) {
1711 ixgbe_dma_tag_destroy(rxr->ptag);
1712 rxr->ptag = NULL;
1713 }
1714
1715 return;
1716 }
1717
1718 static __inline void
1719 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
1720 {
1721 int s;
1722
1723 #ifdef LRO
1724 struct adapter *adapter = ifp->if_softc;
1725 struct ethercom *ec = &adapter->osdep.ec;
1726
1727 /*
1728 * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
1729 * should be computed by hardware. Also it should not have VLAN tag in
1730 * ethernet header. In case of IPv6 we do not yet support ext. hdrs.
1731 */
1732 if (rxr->lro_enabled &&
1733 (ec->ec_capenable & ETHERCAP_VLAN_HWTAGGING) != 0 &&
1734 (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
1735 ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1736 (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
1737 (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1738 (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
1739 (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
1740 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
1741 /*
1742 * Send to the stack if:
1743 ** - LRO not enabled, or
1744 ** - no LRO resources, or
1745 ** - lro enqueue fails
1746 */
1747 if (rxr->lro.lro_cnt != 0)
1748 if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
1749 return;
1750 }
1751 #endif /* LRO */
1752
1753 IXGBE_RX_UNLOCK(rxr);
1754
1755 s = splnet();
1756 /* Pass this up to any BPF listeners. */
1757 bpf_mtap(ifp, m);
1758 if_input(ifp, m);
1759 splx(s);
1760
1761 IXGBE_RX_LOCK(rxr);
1762 }
1763
1764 static __inline void
1765 ixgbe_rx_discard(struct rx_ring *rxr, int i)
1766 {
1767 struct ixgbe_rx_buf *rbuf;
1768
1769 rbuf = &rxr->rx_buffers[i];
1770
1771
1772 /*
1773 ** With advanced descriptors the writeback
1774 ** clobbers the buffer addrs, so its easier
1775 ** to just free the existing mbufs and take
1776 ** the normal refresh path to get new buffers
1777 ** and mapping.
1778 */
1779
1780 if (rbuf->buf != NULL) {/* Partial chain ? */
1781 rbuf->fmp->m_flags |= M_PKTHDR;
1782 m_freem(rbuf->fmp);
1783 rbuf->fmp = NULL;
1784 rbuf->buf = NULL; /* rbuf->buf is part of fmp's chain */
1785 } else if (rbuf->buf) {
1786 m_free(rbuf->buf);
1787 rbuf->buf = NULL;
1788 }
1789 ixgbe_dmamap_unload(rxr->ptag, rbuf->pmap);
1790
1791 rbuf->flags = 0;
1792
1793 return;
1794 }
1795
1796
1797 /*********************************************************************
1798 *
1799 * This routine executes in interrupt context. It replenishes
1800 * the mbufs in the descriptor and sends data which has been
1801 * dma'ed into host memory to upper layer.
1802 *
1803 * Return TRUE for more work, FALSE for all clean.
1804 *********************************************************************/
1805 bool
1806 ixgbe_rxeof(struct ix_queue *que)
1807 {
1808 struct adapter *adapter = que->adapter;
1809 struct rx_ring *rxr = que->rxr;
1810 struct ifnet *ifp = adapter->ifp;
1811 #ifdef LRO
1812 struct lro_ctrl *lro = &rxr->lro;
1813 struct lro_entry *queued;
1814 #endif /* LRO */
1815 int i, nextp, processed = 0;
1816 u32 staterr = 0;
1817 u32 count = adapter->rx_process_limit;
1818 union ixgbe_adv_rx_desc *cur;
1819 struct ixgbe_rx_buf *rbuf, *nbuf;
1820 #ifdef RSS
1821 u16 pkt_info;
1822 #endif
1823
1824 IXGBE_RX_LOCK(rxr);
1825
1826 #ifdef DEV_NETMAP
1827 /* Same as the txeof routine: wakeup clients on intr. */
1828 if (netmap_rx_irq(ifp, rxr->me, &processed)) {
1829 IXGBE_RX_UNLOCK(rxr);
1830 return (FALSE);
1831 }
1832 #endif /* DEV_NETMAP */
1833
1834 for (i = rxr->next_to_check; count != 0;) {
1835 struct mbuf *sendmp, *mp;
1836 u32 rsc, ptype;
1837 u16 len;
1838 u16 vtag = 0;
1839 bool eop;
1840
1841 /* Sync the ring. */
1842 ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1843 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1844
1845 cur = &rxr->rx_base[i];
1846 staterr = le32toh(cur->wb.upper.status_error);
1847 #ifdef RSS
1848 pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
1849 #endif
1850
1851 if ((staterr & IXGBE_RXD_STAT_DD) == 0)
1852 break;
1853 if ((ifp->if_flags & IFF_RUNNING) == 0)
1854 break;
1855
1856 count--;
1857 sendmp = NULL;
1858 nbuf = NULL;
1859 rsc = 0;
1860 cur->wb.upper.status_error = 0;
1861 rbuf = &rxr->rx_buffers[i];
1862 mp = rbuf->buf;
1863
1864 len = le16toh(cur->wb.upper.length);
1865 ptype = le32toh(cur->wb.lower.lo_dword.data) &
1866 IXGBE_RXDADV_PKTTYPE_MASK;
1867 eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
1868
1869 /* Make sure bad packets are discarded */
1870 if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) {
1871 #if __FreeBSD_version >= 1100036
1872 if (IXGBE_IS_VF(adapter))
1873 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
1874 #endif
1875 rxr->rx_discarded.ev_count++;
1876 ixgbe_rx_discard(rxr, i);
1877 goto next_desc;
1878 }
1879
1880 /*
1881 ** On 82599 which supports a hardware
1882 ** LRO (called HW RSC), packets need
1883 ** not be fragmented across sequential
1884 ** descriptors, rather the next descriptor
1885 ** is indicated in bits of the descriptor.
1886 ** This also means that we might proceses
1887 ** more than one packet at a time, something
1888 ** that has never been true before, it
1889 ** required eliminating global chain pointers
1890 ** in favor of what we are doing here. -jfv
1891 */
1892 if (!eop) {
1893 /*
1894 ** Figure out the next descriptor
1895 ** of this frame.
1896 */
1897 if (rxr->hw_rsc == TRUE) {
1898 rsc = ixgbe_rsc_count(cur);
1899 rxr->rsc_num += (rsc - 1);
1900 }
1901 if (rsc) { /* Get hardware index */
1902 nextp = ((staterr &
1903 IXGBE_RXDADV_NEXTP_MASK) >>
1904 IXGBE_RXDADV_NEXTP_SHIFT);
1905 } else { /* Just sequential */
1906 nextp = i + 1;
1907 if (nextp == adapter->num_rx_desc)
1908 nextp = 0;
1909 }
1910 nbuf = &rxr->rx_buffers[nextp];
1911 prefetch(nbuf);
1912 }
1913 /*
1914 ** Rather than using the fmp/lmp global pointers
1915 ** we now keep the head of a packet chain in the
1916 ** buffer struct and pass this along from one
1917 ** descriptor to the next, until we get EOP.
1918 */
1919 mp->m_len = len;
1920 /*
1921 ** See if there is a stored head
1922 ** that determines what we are
1923 */
1924 sendmp = rbuf->fmp;
1925 if (sendmp != NULL) { /* secondary frag */
1926 rbuf->buf = rbuf->fmp = NULL;
1927 mp->m_flags &= ~M_PKTHDR;
1928 sendmp->m_pkthdr.len += mp->m_len;
1929 } else {
1930 /*
1931 * Optimize. This might be a small packet,
1932 * maybe just a TCP ACK. Do a fast copy that
1933 * is cache aligned into a new mbuf, and
1934 * leave the old mbuf+cluster for re-use.
1935 */
1936 if (eop && len <= IXGBE_RX_COPY_LEN) {
1937 sendmp = m_gethdr(M_NOWAIT, MT_DATA);
1938 if (sendmp != NULL) {
1939 sendmp->m_data +=
1940 IXGBE_RX_COPY_ALIGN;
1941 ixgbe_bcopy(mp->m_data,
1942 sendmp->m_data, len);
1943 sendmp->m_len = len;
1944 rxr->rx_copies.ev_count++;
1945 rbuf->flags |= IXGBE_RX_COPY;
1946 }
1947 }
1948 if (sendmp == NULL) {
1949 rbuf->buf = rbuf->fmp = NULL;
1950 sendmp = mp;
1951 }
1952
1953 /* first desc of a non-ps chain */
1954 sendmp->m_flags |= M_PKTHDR;
1955 sendmp->m_pkthdr.len = mp->m_len;
1956 }
1957 ++processed;
1958
1959 /* Pass the head pointer on */
1960 if (eop == 0) {
1961 nbuf->fmp = sendmp;
1962 sendmp = NULL;
1963 mp->m_next = nbuf->buf;
1964 } else { /* Sending this frame */
1965 m_set_rcvif(sendmp, ifp);
1966 ifp->if_ipackets++;
1967 rxr->rx_packets.ev_count++;
1968 /* capture data for AIM */
1969 rxr->bytes += sendmp->m_pkthdr.len;
1970 rxr->rx_bytes.ev_count += sendmp->m_pkthdr.len;
1971 /* Process vlan info */
1972 if ((rxr->vtag_strip) &&
1973 (staterr & IXGBE_RXD_STAT_VP))
1974 vtag = le16toh(cur->wb.upper.vlan);
1975 if (vtag) {
1976 VLAN_INPUT_TAG(ifp, sendmp, vtag,
1977 printf("%s: could not apply VLAN "
1978 "tag", __func__));
1979 }
1980 if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) {
1981 ixgbe_rx_checksum(staterr, sendmp, ptype,
1982 &adapter->stats.pf);
1983 }
1984 #if 0 /* FreeBSD */
1985 /*
1986 * In case of multiqueue, we have RXCSUM.PCSD bit set
1987 * and never cleared. This means we have RSS hash
1988 * available to be used.
1989 */
1990 if (adapter->num_queues > 1) {
1991 sendmp->m_pkthdr.flowid =
1992 le32toh(cur->wb.lower.hi_dword.rss);
1993 switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) {
1994 case IXGBE_RXDADV_RSSTYPE_IPV4:
1995 M_HASHTYPE_SET(sendmp,
1996 M_HASHTYPE_RSS_IPV4);
1997 break;
1998 case IXGBE_RXDADV_RSSTYPE_IPV4_TCP:
1999 M_HASHTYPE_SET(sendmp,
2000 M_HASHTYPE_RSS_TCP_IPV4);
2001 break;
2002 case IXGBE_RXDADV_RSSTYPE_IPV6:
2003 M_HASHTYPE_SET(sendmp,
2004 M_HASHTYPE_RSS_IPV6);
2005 break;
2006 case IXGBE_RXDADV_RSSTYPE_IPV6_TCP:
2007 M_HASHTYPE_SET(sendmp,
2008 M_HASHTYPE_RSS_TCP_IPV6);
2009 break;
2010 case IXGBE_RXDADV_RSSTYPE_IPV6_EX:
2011 M_HASHTYPE_SET(sendmp,
2012 M_HASHTYPE_RSS_IPV6_EX);
2013 break;
2014 case IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX:
2015 M_HASHTYPE_SET(sendmp,
2016 M_HASHTYPE_RSS_TCP_IPV6_EX);
2017 break;
2018 #if __FreeBSD_version > 1100000
2019 case IXGBE_RXDADV_RSSTYPE_IPV4_UDP:
2020 M_HASHTYPE_SET(sendmp,
2021 M_HASHTYPE_RSS_UDP_IPV4);
2022 break;
2023 case IXGBE_RXDADV_RSSTYPE_IPV6_UDP:
2024 M_HASHTYPE_SET(sendmp,
2025 M_HASHTYPE_RSS_UDP_IPV6);
2026 break;
2027 case IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX:
2028 M_HASHTYPE_SET(sendmp,
2029 M_HASHTYPE_RSS_UDP_IPV6_EX);
2030 break;
2031 #endif
2032 default:
2033 M_HASHTYPE_SET(sendmp,
2034 M_HASHTYPE_OPAQUE);
2035 }
2036 } else {
2037 sendmp->m_pkthdr.flowid = que->msix;
2038 M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
2039 }
2040 #endif /* FreeBSD_version */
2041 }
2042 next_desc:
2043 ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
2044 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2045
2046 /* Advance our pointers to the next descriptor. */
2047 if (++i == rxr->num_desc)
2048 i = 0;
2049
2050 /* Now send to the stack or do LRO */
2051 if (sendmp != NULL) {
2052 rxr->next_to_check = i;
2053 ixgbe_rx_input(rxr, ifp, sendmp, ptype);
2054 i = rxr->next_to_check;
2055 }
2056
2057 /* Every 8 descriptors we go to refresh mbufs */
2058 if (processed == 8) {
2059 ixgbe_refresh_mbufs(rxr, i);
2060 processed = 0;
2061 }
2062 }
2063
2064 /* Refresh any remaining buf structs */
2065 if (ixgbe_rx_unrefreshed(rxr))
2066 ixgbe_refresh_mbufs(rxr, i);
2067
2068 rxr->next_to_check = i;
2069
2070 #ifdef LRO
2071 /*
2072 * Flush any outstanding LRO work
2073 */
2074 while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
2075 SLIST_REMOVE_HEAD(&lro->lro_active, next);
2076 tcp_lro_flush(lro, queued);
2077 }
2078 #endif /* LRO */
2079
2080 IXGBE_RX_UNLOCK(rxr);
2081
2082 /*
2083 ** Still have cleaning to do?
2084 */
2085 if ((staterr & IXGBE_RXD_STAT_DD) != 0)
2086 return true;
2087 else
2088 return false;
2089 }
2090
2091
2092 /*********************************************************************
2093 *
2094 * Verify that the hardware indicated that the checksum is valid.
2095 * Inform the stack about the status of checksum so that stack
2096 * doesn't spend time verifying the checksum.
2097 *
2098 *********************************************************************/
2099 static void
2100 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype,
2101 struct ixgbe_hw_stats *stats)
2102 {
2103 u16 status = (u16) staterr;
2104 u8 errors = (u8) (staterr >> 24);
2105 #if 0
2106 bool sctp = FALSE;
2107
2108 if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
2109 (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
2110 sctp = TRUE;
2111 #endif
2112
2113 if (status & IXGBE_RXD_STAT_IPCS) {
2114 stats->ipcs.ev_count++;
2115 if (!(errors & IXGBE_RXD_ERR_IPE)) {
2116 /* IP Checksum Good */
2117 mp->m_pkthdr.csum_flags = M_CSUM_IPv4;
2118
2119 } else {
2120 stats->ipcs_bad.ev_count++;
2121 mp->m_pkthdr.csum_flags = M_CSUM_IPv4|M_CSUM_IPv4_BAD;
2122 }
2123 }
2124 if (status & IXGBE_RXD_STAT_L4CS) {
2125 stats->l4cs.ev_count++;
2126 int type = M_CSUM_TCPv4|M_CSUM_TCPv6|M_CSUM_UDPv4|M_CSUM_UDPv6;
2127 if (!(errors & IXGBE_RXD_ERR_TCPE)) {
2128 mp->m_pkthdr.csum_flags |= type;
2129 } else {
2130 stats->l4cs_bad.ev_count++;
2131 mp->m_pkthdr.csum_flags |= type | M_CSUM_TCP_UDP_BAD;
2132 }
2133 }
2134 return;
2135 }
2136
2137
2138 /********************************************************************
2139 * Manage DMA'able memory.
2140 *******************************************************************/
2141
2142 int
2143 ixgbe_dma_malloc(struct adapter *adapter, const bus_size_t size,
2144 struct ixgbe_dma_alloc *dma, const int mapflags)
2145 {
2146 device_t dev = adapter->dev;
2147 int r, rsegs;
2148
2149 r = ixgbe_dma_tag_create(adapter->osdep.dmat, /* parent */
2150 DBA_ALIGN, 0, /* alignment, bounds */
2151 size, /* maxsize */
2152 1, /* nsegments */
2153 size, /* maxsegsize */
2154 BUS_DMA_ALLOCNOW, /* flags */
2155 &dma->dma_tag);
2156 if (r != 0) {
2157 aprint_error_dev(dev,
2158 "%s: ixgbe_dma_tag_create failed; error %d\n", __func__, r);
2159 goto fail_0;
2160 }
2161
2162 r = bus_dmamem_alloc(dma->dma_tag->dt_dmat,
2163 size,
2164 dma->dma_tag->dt_alignment,
2165 dma->dma_tag->dt_boundary,
2166 &dma->dma_seg, 1, &rsegs, BUS_DMA_NOWAIT);
2167 if (r != 0) {
2168 aprint_error_dev(dev,
2169 "%s: bus_dmamem_alloc failed; error %d\n", __func__, r);
2170 goto fail_1;
2171 }
2172
2173 r = bus_dmamem_map(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs,
2174 size, &dma->dma_vaddr, BUS_DMA_NOWAIT);
2175 if (r != 0) {
2176 aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
2177 __func__, r);
2178 goto fail_2;
2179 }
2180
2181 r = ixgbe_dmamap_create(dma->dma_tag, 0, &dma->dma_map);
2182 if (r != 0) {
2183 aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
2184 __func__, r);
2185 goto fail_3;
2186 }
2187
2188 r = bus_dmamap_load(dma->dma_tag->dt_dmat, dma->dma_map, dma->dma_vaddr,
2189 size,
2190 NULL,
2191 mapflags | BUS_DMA_NOWAIT);
2192 if (r != 0) {
2193 aprint_error_dev(dev, "%s: bus_dmamap_load failed; error %d\n",
2194 __func__, r);
2195 goto fail_4;
2196 }
2197 dma->dma_paddr = dma->dma_map->dm_segs[0].ds_addr;
2198 dma->dma_size = size;
2199 return 0;
2200 fail_4:
2201 ixgbe_dmamap_destroy(dma->dma_tag, dma->dma_map);
2202 fail_3:
2203 bus_dmamem_unmap(dma->dma_tag->dt_dmat, dma->dma_vaddr, size);
2204 fail_2:
2205 bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs);
2206 fail_1:
2207 ixgbe_dma_tag_destroy(dma->dma_tag);
2208 fail_0:
2209 return r;
2210 }
2211
2212 void
2213 ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma)
2214 {
2215 bus_dmamap_sync(dma->dma_tag->dt_dmat, dma->dma_map, 0, dma->dma_size,
2216 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2217 ixgbe_dmamap_unload(dma->dma_tag, dma->dma_map);
2218 bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, 1);
2219 ixgbe_dma_tag_destroy(dma->dma_tag);
2220 }
2221
2222
2223 /*********************************************************************
2224 *
2225 * Allocate memory for the transmit and receive rings, and then
2226 * the descriptors associated with each, called only once at attach.
2227 *
2228 **********************************************************************/
2229 int
2230 ixgbe_allocate_queues(struct adapter *adapter)
2231 {
2232 device_t dev = adapter->dev;
2233 struct ix_queue *que;
2234 struct tx_ring *txr;
2235 struct rx_ring *rxr;
2236 int rsize, tsize, error = IXGBE_SUCCESS;
2237 int txconf = 0, rxconf = 0;
2238 #ifdef PCI_IOV
2239 enum ixgbe_iov_mode iov_mode;
2240 #endif
2241
2242 /* First allocate the top level queue structs */
2243 if (!(adapter->queues =
2244 (struct ix_queue *) malloc(sizeof(struct ix_queue) *
2245 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2246 aprint_error_dev(dev, "Unable to allocate queue memory\n");
2247 error = ENOMEM;
2248 goto fail;
2249 }
2250
2251 /* First allocate the TX ring struct memory */
2252 if (!(adapter->tx_rings =
2253 (struct tx_ring *) malloc(sizeof(struct tx_ring) *
2254 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2255 aprint_error_dev(dev, "Unable to allocate TX ring memory\n");
2256 error = ENOMEM;
2257 goto tx_fail;
2258 }
2259
2260 /* Next allocate the RX */
2261 if (!(adapter->rx_rings =
2262 (struct rx_ring *) malloc(sizeof(struct rx_ring) *
2263 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2264 aprint_error_dev(dev, "Unable to allocate RX ring memory\n");
2265 error = ENOMEM;
2266 goto rx_fail;
2267 }
2268
2269 /* For the ring itself */
2270 tsize = roundup2(adapter->num_tx_desc *
2271 sizeof(union ixgbe_adv_tx_desc), DBA_ALIGN);
2272
2273 #ifdef PCI_IOV
2274 iov_mode = ixgbe_get_iov_mode(adapter);
2275 adapter->pool = ixgbe_max_vfs(iov_mode);
2276 #else
2277 adapter->pool = 0;
2278 #endif
2279 /*
2280 * Now set up the TX queues, txconf is needed to handle the
2281 * possibility that things fail midcourse and we need to
2282 * undo memory gracefully
2283 */
2284 for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2285 /* Set up some basics */
2286 txr = &adapter->tx_rings[i];
2287 txr->adapter = adapter;
2288 #ifdef PCI_IOV
2289 txr->me = ixgbe_pf_que_index(iov_mode, i);
2290 #else
2291 txr->me = i;
2292 #endif
2293 txr->num_desc = adapter->num_tx_desc;
2294
2295 /* Initialize the TX side lock */
2296 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
2297 device_xname(dev), txr->me);
2298 mutex_init(&txr->tx_mtx, MUTEX_DEFAULT, IPL_NET);
2299
2300 if (ixgbe_dma_malloc(adapter, tsize,
2301 &txr->txdma, BUS_DMA_NOWAIT)) {
2302 aprint_error_dev(dev,
2303 "Unable to allocate TX Descriptor memory\n");
2304 error = ENOMEM;
2305 goto err_tx_desc;
2306 }
2307 txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
2308 bzero((void *)txr->tx_base, tsize);
2309
2310 /* Now allocate transmit buffers for the ring */
2311 if (ixgbe_allocate_transmit_buffers(txr)) {
2312 aprint_error_dev(dev,
2313 "Critical Failure setting up transmit buffers\n");
2314 error = ENOMEM;
2315 goto err_tx_desc;
2316 }
2317 #ifndef IXGBE_LEGACY_TX
2318 /* Allocate a buf ring */
2319 txr->br = buf_ring_alloc(IXGBE_BR_SIZE, M_DEVBUF,
2320 M_WAITOK, &txr->tx_mtx);
2321 if (txr->br == NULL) {
2322 aprint_error_dev(dev,
2323 "Critical Failure setting up buf ring\n");
2324 error = ENOMEM;
2325 goto err_tx_desc;
2326 }
2327 #endif
2328 }
2329
2330 /*
2331 * Next the RX queues...
2332 */
2333 rsize = roundup2(adapter->num_rx_desc *
2334 sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
2335 for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2336 rxr = &adapter->rx_rings[i];
2337 /* Set up some basics */
2338 rxr->adapter = adapter;
2339 #ifdef PCI_IOV
2340 rxr->me = ixgbe_pf_que_index(iov_mode, i);
2341 #else
2342 rxr->me = i;
2343 #endif
2344 rxr->num_desc = adapter->num_rx_desc;
2345
2346 /* Initialize the RX side lock */
2347 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
2348 device_xname(dev), rxr->me);
2349 mutex_init(&rxr->rx_mtx, MUTEX_DEFAULT, IPL_NET);
2350
2351 if (ixgbe_dma_malloc(adapter, rsize,
2352 &rxr->rxdma, BUS_DMA_NOWAIT)) {
2353 aprint_error_dev(dev,
2354 "Unable to allocate RxDescriptor memory\n");
2355 error = ENOMEM;
2356 goto err_rx_desc;
2357 }
2358 rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2359 bzero((void *)rxr->rx_base, rsize);
2360
2361 /* Allocate receive buffers for the ring*/
2362 if (ixgbe_allocate_receive_buffers(rxr)) {
2363 aprint_error_dev(dev,
2364 "Critical Failure setting up receive buffers\n");
2365 error = ENOMEM;
2366 goto err_rx_desc;
2367 }
2368 }
2369
2370 /*
2371 ** Finally set up the queue holding structs
2372 */
2373 for (int i = 0; i < adapter->num_queues; i++) {
2374 que = &adapter->queues[i];
2375 que->adapter = adapter;
2376 que->me = i;
2377 que->txr = &adapter->tx_rings[i];
2378 que->rxr = &adapter->rx_rings[i];
2379 }
2380
2381 return (0);
2382
2383 err_rx_desc:
2384 for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2385 ixgbe_dma_free(adapter, &rxr->rxdma);
2386 err_tx_desc:
2387 for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2388 ixgbe_dma_free(adapter, &txr->txdma);
2389 free(adapter->rx_rings, M_DEVBUF);
2390 rx_fail:
2391 free(adapter->tx_rings, M_DEVBUF);
2392 tx_fail:
2393 free(adapter->queues, M_DEVBUF);
2394 fail:
2395 return (error);
2396 }
2397
2398