ix_txrx.c revision 1.6 1 /******************************************************************************
2
3 Copyright (c) 2001-2015, Intel Corporation
4 All rights reserved.
5
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
8
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
11
12 2. Redistributions in binary form must reproduce the above copyright
13 notice, this list of conditions and the following disclaimer in the
14 documentation and/or other materials provided with the distribution.
15
16 3. Neither the name of the Intel Corporation nor the names of its
17 contributors may be used to endorse or promote products derived from
18 this software without specific prior written permission.
19
20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*
34 * Copyright (c) 2011 The NetBSD Foundation, Inc.
35 * All rights reserved.
36 *
37 * This code is derived from software contributed to The NetBSD Foundation
38 * by Coyote Point Systems, Inc.
39 *
40 * Redistribution and use in source and binary forms, with or without
41 * modification, are permitted provided that the following conditions
42 * are met:
43 * 1. Redistributions of source code must retain the above copyright
44 * notice, this list of conditions and the following disclaimer.
45 * 2. Redistributions in binary form must reproduce the above copyright
46 * notice, this list of conditions and the following disclaimer in the
47 * documentation and/or other materials provided with the distribution.
48 *
49 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
50 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
51 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
52 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
53 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
54 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
55 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
56 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
57 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
58 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
59 * POSSIBILITY OF SUCH DAMAGE.
60 */
61 /*$FreeBSD: head/sys/dev/ixgbe/ix_txrx.c 285528 2015-07-14 09:13:18Z hiren $*/
62 /*$NetBSD: ix_txrx.c,v 1.6 2016/12/02 10:24:31 msaitoh Exp $*/
63
64 #include "ixgbe.h"
65
66 #ifdef DEV_NETMAP
67 #include <net/netmap.h>
68 #include <sys/selinfo.h>
69 #include <dev/netmap/netmap_kern.h>
70
71 extern int ix_crcstrip;
72 #endif
73
74 /*
75 ** HW RSC control:
76 ** this feature only works with
77 ** IPv4, and only on 82599 and later.
78 ** Also this will cause IP forwarding to
79 ** fail and that can't be controlled by
80 ** the stack as LRO can. For all these
81 ** reasons I've deemed it best to leave
82 ** this off and not bother with a tuneable
83 ** interface, this would need to be compiled
84 ** to enable.
85 */
86 static bool ixgbe_rsc_enable = FALSE;
87
88 #ifdef IXGBE_FDIR
89 /*
90 ** For Flow Director: this is the
91 ** number of TX packets we sample
92 ** for the filter pool, this means
93 ** every 20th packet will be probed.
94 **
95 ** This feature can be disabled by
96 ** setting this to 0.
97 */
98 static int atr_sample_rate = 20;
99 #endif
100
101 /* Shared PCI config read/write */
102 u16
103 ixgbe_read_pci_cfg(struct ixgbe_hw *hw, u32 reg)
104 {
105 switch (reg % 4) {
106 case 0:
107 return pci_conf_read(hw->back->pc, hw->back->tag, reg) &
108 __BITS(15, 0);
109 case 2:
110 return __SHIFTOUT(pci_conf_read(hw->back->pc, hw->back->tag,
111 reg - 2), __BITS(31, 16));
112 default:
113 panic("%s: invalid register (%" PRIx32, __func__, reg);
114 break;
115 }
116 }
117
118 void
119 ixgbe_write_pci_cfg(struct ixgbe_hw *hw, u32 reg, u16 value)
120 {
121 pcireg_t old;
122
123 switch (reg % 4) {
124 case 0:
125 old = pci_conf_read(hw->back->pc, hw->back->tag, reg) &
126 __BITS(31, 16);
127 pci_conf_write(hw->back->pc, hw->back->tag, reg, value | old);
128 break;
129 case 2:
130 old = pci_conf_read(hw->back->pc, hw->back->tag, reg - 2) &
131 __BITS(15, 0);
132 pci_conf_write(hw->back->pc, hw->back->tag, reg - 2,
133 __SHIFTIN(value, __BITS(31, 16)) | old);
134 break;
135 default:
136 panic("%s: invalid register (%" PRIx32, __func__, reg);
137 break;
138 }
139
140 return;
141 }
142
143 /*********************************************************************
144 * Local Function prototypes
145 *********************************************************************/
146 static void ixgbe_setup_transmit_ring(struct tx_ring *);
147 static void ixgbe_free_transmit_buffers(struct tx_ring *);
148 static int ixgbe_setup_receive_ring(struct rx_ring *);
149 static void ixgbe_free_receive_buffers(struct rx_ring *);
150
151 static void ixgbe_rx_checksum(u32, struct mbuf *, u32,
152 struct ixgbe_hw_stats *);
153 static void ixgbe_refresh_mbufs(struct rx_ring *, int);
154 static int ixgbe_xmit(struct tx_ring *, struct mbuf *);
155 static int ixgbe_tx_ctx_setup(struct tx_ring *,
156 struct mbuf *, u32 *, u32 *);
157 static int ixgbe_tso_setup(struct tx_ring *,
158 struct mbuf *, u32 *, u32 *);
159 #ifdef IXGBE_FDIR
160 static void ixgbe_atr(struct tx_ring *, struct mbuf *);
161 #endif
162 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
163 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
164 struct mbuf *, u32);
165
166 static void ixgbe_setup_hw_rsc(struct rx_ring *);
167
168 #ifdef IXGBE_LEGACY_TX
169 /*********************************************************************
170 * Transmit entry point
171 *
172 * ixgbe_start is called by the stack to initiate a transmit.
173 * The driver will remain in this routine as long as there are
174 * packets to transmit and transmit resources are available.
175 * In case resources are not available stack is notified and
176 * the packet is requeued.
177 **********************************************************************/
178
179 void
180 ixgbe_start_locked(struct tx_ring *txr, struct ifnet * ifp)
181 {
182 int rc;
183 struct mbuf *m_head;
184 struct adapter *adapter = txr->adapter;
185
186 IXGBE_TX_LOCK_ASSERT(txr);
187
188 if ((ifp->if_flags & IFF_RUNNING) == 0)
189 return;
190 if (!adapter->link_active)
191 return;
192
193 while (!IFQ_IS_EMPTY(&ifp->if_snd)) {
194 if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
195 break;
196
197 IFQ_POLL(&ifp->if_snd, m_head);
198 if (m_head == NULL)
199 break;
200
201 if ((rc = ixgbe_xmit(txr, m_head)) == EAGAIN) {
202 break;
203 }
204 IFQ_DEQUEUE(&ifp->if_snd, m_head);
205 if (rc == EFBIG) {
206 struct mbuf *mtmp;
207
208 if ((mtmp = m_defrag(m_head, M_NOWAIT)) != NULL) {
209 m_head = mtmp;
210 rc = ixgbe_xmit(txr, m_head);
211 if (rc != 0)
212 adapter->efbig2_tx_dma_setup.ev_count++;
213 } else
214 adapter->m_defrag_failed.ev_count++;
215 }
216 if (rc != 0) {
217 m_freem(m_head);
218 continue;
219 }
220
221 /* Send a copy of the frame to the BPF listener */
222 bpf_mtap(ifp, m_head);
223 }
224 return;
225 }
226
227 /*
228 * Legacy TX start - called by the stack, this
229 * always uses the first tx ring, and should
230 * not be used with multiqueue tx enabled.
231 */
232 void
233 ixgbe_start(struct ifnet *ifp)
234 {
235 struct adapter *adapter = ifp->if_softc;
236 struct tx_ring *txr = adapter->tx_rings;
237
238 if (ifp->if_flags & IFF_RUNNING) {
239 IXGBE_TX_LOCK(txr);
240 ixgbe_start_locked(txr, ifp);
241 IXGBE_TX_UNLOCK(txr);
242 }
243 return;
244 }
245
246 #else /* ! IXGBE_LEGACY_TX */
247
248 /*
249 ** Multiqueue Transmit driver
250 **
251 */
252 int
253 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
254 {
255 struct adapter *adapter = ifp->if_softc;
256 struct ix_queue *que;
257 struct tx_ring *txr;
258 int i, err = 0;
259 #ifdef RSS
260 uint32_t bucket_id;
261 #endif
262
263 /*
264 * When doing RSS, map it to the same outbound queue
265 * as the incoming flow would be mapped to.
266 *
267 * If everything is setup correctly, it should be the
268 * same bucket that the current CPU we're on is.
269 */
270 #if __FreeBSD_version < 1100054
271 if (m->m_flags & M_FLOWID) {
272 #else
273 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
274 #endif
275 #ifdef RSS
276 if (rss_hash2bucket(m->m_pkthdr.flowid,
277 M_HASHTYPE_GET(m), &bucket_id) == 0)
278 /* TODO: spit out something if bucket_id > num_queues? */
279 i = bucket_id % adapter->num_queues;
280 else
281 #endif
282 i = m->m_pkthdr.flowid % adapter->num_queues;
283 } else
284 i = curcpu % adapter->num_queues;
285
286 /* Check for a hung queue and pick alternative */
287 if (((1 << i) & adapter->active_queues) == 0)
288 i = ffsl(adapter->active_queues);
289
290 txr = &adapter->tx_rings[i];
291 que = &adapter->queues[i];
292
293 err = drbr_enqueue(ifp, txr->br, m);
294 if (err)
295 return (err);
296 if (IXGBE_TX_TRYLOCK(txr)) {
297 ixgbe_mq_start_locked(ifp, txr);
298 IXGBE_TX_UNLOCK(txr);
299 } else
300 softint_schedule(txr->txq_si);
301
302 return (0);
303 }
304
305 int
306 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
307 {
308 struct adapter *adapter = txr->adapter;
309 struct mbuf *next;
310 int enqueued = 0, err = 0;
311
312 if (((ifp->if_flags & IFF_RUNNING) == 0) ||
313 adapter->link_active == 0)
314 return (ENETDOWN);
315
316 /* Process the queue */
317 #if __FreeBSD_version < 901504
318 next = drbr_dequeue(ifp, txr->br);
319 while (next != NULL) {
320 if ((err = ixgbe_xmit(txr, &next)) != 0) {
321 if (next != NULL)
322 err = drbr_enqueue(ifp, txr->br, next);
323 #else
324 while ((next = drbr_peek(ifp, txr->br)) != NULL) {
325 if ((err = ixgbe_xmit(txr, &next)) != 0) {
326 if (next == NULL) {
327 drbr_advance(ifp, txr->br);
328 } else {
329 drbr_putback(ifp, txr->br, next);
330 }
331 #endif
332 break;
333 }
334 #if __FreeBSD_version >= 901504
335 drbr_advance(ifp, txr->br);
336 #endif
337 enqueued++;
338 #if 0 // this is VF-only
339 #if __FreeBSD_version >= 1100036
340 /*
341 * Since we're looking at the tx ring, we can check
342 * to see if we're a VF by examing our tail register
343 * address.
344 */
345 if (txr->tail < IXGBE_TDT(0) && next->m_flags & M_MCAST)
346 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
347 #endif
348 #endif
349 /* Send a copy of the frame to the BPF listener */
350 bpf_mtap(ifp, next);
351 if ((ifp->if_flags & IFF_RUNNING) == 0)
352 break;
353 #if __FreeBSD_version < 901504
354 next = drbr_dequeue(ifp, txr->br);
355 #endif
356 }
357
358 if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD)
359 ixgbe_txeof(txr);
360
361 return (err);
362 }
363
364 /*
365 * Called from a taskqueue to drain queued transmit packets.
366 */
367 void
368 ixgbe_deferred_mq_start(void *arg, int pending)
369 {
370 struct tx_ring *txr = arg;
371 struct adapter *adapter = txr->adapter;
372 struct ifnet *ifp = adapter->ifp;
373
374 IXGBE_TX_LOCK(txr);
375 if (!drbr_empty(ifp, txr->br))
376 ixgbe_mq_start_locked(ifp, txr);
377 IXGBE_TX_UNLOCK(txr);
378 }
379
380 /*
381 * Flush all ring buffers
382 */
383 void
384 ixgbe_qflush(struct ifnet *ifp)
385 {
386 struct adapter *adapter = ifp->if_softc;
387 struct tx_ring *txr = adapter->tx_rings;
388 struct mbuf *m;
389
390 for (int i = 0; i < adapter->num_queues; i++, txr++) {
391 IXGBE_TX_LOCK(txr);
392 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
393 m_freem(m);
394 IXGBE_TX_UNLOCK(txr);
395 }
396 if_qflush(ifp);
397 }
398 #endif /* IXGBE_LEGACY_TX */
399
400
401 /*********************************************************************
402 *
403 * This routine maps the mbufs to tx descriptors, allowing the
404 * TX engine to transmit the packets.
405 * - return 0 on success, positive on failure
406 *
407 **********************************************************************/
408
409 static int
410 ixgbe_xmit(struct tx_ring *txr, struct mbuf *m_head)
411 {
412 struct m_tag *mtag;
413 struct adapter *adapter = txr->adapter;
414 struct ethercom *ec = &adapter->osdep.ec;
415 u32 olinfo_status = 0, cmd_type_len;
416 int i, j, error;
417 int first;
418 bus_dmamap_t map;
419 struct ixgbe_tx_buf *txbuf;
420 union ixgbe_adv_tx_desc *txd = NULL;
421
422 /* Basic descriptor defines */
423 cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
424 IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
425
426 if ((mtag = VLAN_OUTPUT_TAG(ec, m_head)) != NULL)
427 cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
428
429 /*
430 * Important to capture the first descriptor
431 * used because it will contain the index of
432 * the one we tell the hardware to report back
433 */
434 first = txr->next_avail_desc;
435 txbuf = &txr->tx_buffers[first];
436 map = txbuf->map;
437
438 /*
439 * Map the packet for DMA.
440 */
441 error = bus_dmamap_load_mbuf(txr->txtag->dt_dmat, map,
442 m_head, BUS_DMA_NOWAIT);
443
444 if (__predict_false(error)) {
445
446 switch (error) {
447 case EAGAIN:
448 adapter->eagain_tx_dma_setup.ev_count++;
449 return EAGAIN;
450 case ENOMEM:
451 adapter->enomem_tx_dma_setup.ev_count++;
452 return EAGAIN;
453 case EFBIG:
454 /*
455 * XXX Try it again?
456 * do m_defrag() and retry bus_dmamap_load_mbuf().
457 */
458 adapter->efbig_tx_dma_setup.ev_count++;
459 return error;
460 case EINVAL:
461 adapter->einval_tx_dma_setup.ev_count++;
462 return error;
463 default:
464 adapter->other_tx_dma_setup.ev_count++;
465 return error;
466 }
467 }
468
469 /* Make certain there are enough descriptors */
470 if (map->dm_nsegs > txr->tx_avail - 2) {
471 txr->no_desc_avail.ev_count++;
472 ixgbe_dmamap_unload(txr->txtag, txbuf->map);
473 return EAGAIN;
474 }
475
476 /*
477 * Set up the appropriate offload context
478 * this will consume the first descriptor
479 */
480 error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
481 if (__predict_false(error)) {
482 return (error);
483 }
484
485 #ifdef IXGBE_FDIR
486 /* Do the flow director magic */
487 if ((txr->atr_sample) && (!adapter->fdir_reinit)) {
488 ++txr->atr_count;
489 if (txr->atr_count >= atr_sample_rate) {
490 ixgbe_atr(txr, m_head);
491 txr->atr_count = 0;
492 }
493 }
494 #endif
495
496 i = txr->next_avail_desc;
497 for (j = 0; j < map->dm_nsegs; j++) {
498 bus_size_t seglen;
499 bus_addr_t segaddr;
500
501 txbuf = &txr->tx_buffers[i];
502 txd = &txr->tx_base[i];
503 seglen = map->dm_segs[j].ds_len;
504 segaddr = htole64(map->dm_segs[j].ds_addr);
505
506 txd->read.buffer_addr = segaddr;
507 txd->read.cmd_type_len = htole32(txr->txd_cmd |
508 cmd_type_len |seglen);
509 txd->read.olinfo_status = htole32(olinfo_status);
510
511 if (++i == txr->num_desc)
512 i = 0;
513 }
514
515 txd->read.cmd_type_len |=
516 htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
517 txr->tx_avail -= map->dm_nsegs;
518 txr->next_avail_desc = i;
519
520 txbuf->m_head = m_head;
521 /*
522 * Here we swap the map so the last descriptor,
523 * which gets the completion interrupt has the
524 * real map, and the first descriptor gets the
525 * unused map from this descriptor.
526 */
527 txr->tx_buffers[first].map = txbuf->map;
528 txbuf->map = map;
529 bus_dmamap_sync(txr->txtag->dt_dmat, map, 0, m_head->m_pkthdr.len,
530 BUS_DMASYNC_PREWRITE);
531
532 /* Set the EOP descriptor that will be marked done */
533 txbuf = &txr->tx_buffers[first];
534 txbuf->eop = txd;
535
536 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
537 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
538 /*
539 * Advance the Transmit Descriptor Tail (Tdt), this tells the
540 * hardware that this frame is available to transmit.
541 */
542 ++txr->total_packets.ev_count;
543 IXGBE_WRITE_REG(&adapter->hw, txr->tail, i);
544
545 /* Mark queue as having work */
546 if (txr->busy == 0)
547 txr->busy = 1;
548
549 return 0;
550 }
551
552 /*********************************************************************
553 *
554 * Allocate memory for tx_buffer structures. The tx_buffer stores all
555 * the information needed to transmit a packet on the wire. This is
556 * called only once at attach, setup is done every reset.
557 *
558 **********************************************************************/
559 int
560 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
561 {
562 struct adapter *adapter = txr->adapter;
563 device_t dev = adapter->dev;
564 struct ixgbe_tx_buf *txbuf;
565 int error, i;
566
567 /*
568 * Setup DMA descriptor areas.
569 */
570 if ((error = ixgbe_dma_tag_create(adapter->osdep.dmat, /* parent */
571 1, 0, /* alignment, bounds */
572 IXGBE_TSO_SIZE, /* maxsize */
573 adapter->num_segs, /* nsegments */
574 PAGE_SIZE, /* maxsegsize */
575 0, /* flags */
576 &txr->txtag))) {
577 aprint_error_dev(dev,"Unable to allocate TX DMA tag\n");
578 goto fail;
579 }
580
581 if (!(txr->tx_buffers =
582 (struct ixgbe_tx_buf *) malloc(sizeof(struct ixgbe_tx_buf) *
583 adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
584 aprint_error_dev(dev, "Unable to allocate tx_buffer memory\n");
585 error = ENOMEM;
586 goto fail;
587 }
588
589 /* Create the descriptor buffer dma maps */
590 txbuf = txr->tx_buffers;
591 for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
592 error = ixgbe_dmamap_create(txr->txtag, 0, &txbuf->map);
593 if (error != 0) {
594 aprint_error_dev(dev,
595 "Unable to create TX DMA map (%d)\n", error);
596 goto fail;
597 }
598 }
599
600 return 0;
601 fail:
602 /* We free all, it handles case where we are in the middle */
603 ixgbe_free_transmit_structures(adapter);
604 return (error);
605 }
606
607 /*********************************************************************
608 *
609 * Initialize a transmit ring.
610 *
611 **********************************************************************/
612 static void
613 ixgbe_setup_transmit_ring(struct tx_ring *txr)
614 {
615 struct adapter *adapter = txr->adapter;
616 struct ixgbe_tx_buf *txbuf;
617 #ifdef DEV_NETMAP
618 struct netmap_adapter *na = NA(adapter->ifp);
619 struct netmap_slot *slot;
620 #endif /* DEV_NETMAP */
621
622 /* Clear the old ring contents */
623 IXGBE_TX_LOCK(txr);
624 #ifdef DEV_NETMAP
625 /*
626 * (under lock): if in netmap mode, do some consistency
627 * checks and set slot to entry 0 of the netmap ring.
628 */
629 slot = netmap_reset(na, NR_TX, txr->me, 0);
630 #endif /* DEV_NETMAP */
631 bzero((void *)txr->tx_base,
632 (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
633 /* Reset indices */
634 txr->next_avail_desc = 0;
635 txr->next_to_clean = 0;
636
637 /* Free any existing tx buffers. */
638 txbuf = txr->tx_buffers;
639 for (int i = 0; i < txr->num_desc; i++, txbuf++) {
640 if (txbuf->m_head != NULL) {
641 bus_dmamap_sync(txr->txtag->dt_dmat, txbuf->map,
642 0, txbuf->m_head->m_pkthdr.len,
643 BUS_DMASYNC_POSTWRITE);
644 ixgbe_dmamap_unload(txr->txtag, txbuf->map);
645 m_freem(txbuf->m_head);
646 txbuf->m_head = NULL;
647 }
648 #ifdef DEV_NETMAP
649 /*
650 * In netmap mode, set the map for the packet buffer.
651 * NOTE: Some drivers (not this one) also need to set
652 * the physical buffer address in the NIC ring.
653 * Slots in the netmap ring (indexed by "si") are
654 * kring->nkr_hwofs positions "ahead" wrt the
655 * corresponding slot in the NIC ring. In some drivers
656 * (not here) nkr_hwofs can be negative. Function
657 * netmap_idx_n2k() handles wraparounds properly.
658 */
659 if (slot) {
660 int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
661 netmap_load_map(na, txr->txtag,
662 txbuf->map, NMB(na, slot + si));
663 }
664 #endif /* DEV_NETMAP */
665 /* Clear the EOP descriptor pointer */
666 txbuf->eop = NULL;
667 }
668
669 #ifdef IXGBE_FDIR
670 /* Set the rate at which we sample packets */
671 if (adapter->hw.mac.type != ixgbe_mac_82598EB)
672 txr->atr_sample = atr_sample_rate;
673 #endif
674
675 /* Set number of descriptors available */
676 txr->tx_avail = adapter->num_tx_desc;
677
678 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
679 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
680 IXGBE_TX_UNLOCK(txr);
681 }
682
683 /*********************************************************************
684 *
685 * Initialize all transmit rings.
686 *
687 **********************************************************************/
688 int
689 ixgbe_setup_transmit_structures(struct adapter *adapter)
690 {
691 struct tx_ring *txr = adapter->tx_rings;
692
693 for (int i = 0; i < adapter->num_queues; i++, txr++)
694 ixgbe_setup_transmit_ring(txr);
695
696 return (0);
697 }
698
699 /*********************************************************************
700 *
701 * Free all transmit rings.
702 *
703 **********************************************************************/
704 void
705 ixgbe_free_transmit_structures(struct adapter *adapter)
706 {
707 struct tx_ring *txr = adapter->tx_rings;
708
709 for (int i = 0; i < adapter->num_queues; i++, txr++) {
710 ixgbe_free_transmit_buffers(txr);
711 ixgbe_dma_free(adapter, &txr->txdma);
712 IXGBE_TX_LOCK_DESTROY(txr);
713 }
714 free(adapter->tx_rings, M_DEVBUF);
715 }
716
717 /*********************************************************************
718 *
719 * Free transmit ring related data structures.
720 *
721 **********************************************************************/
722 static void
723 ixgbe_free_transmit_buffers(struct tx_ring *txr)
724 {
725 struct adapter *adapter = txr->adapter;
726 struct ixgbe_tx_buf *tx_buffer;
727 int i;
728
729 INIT_DEBUGOUT("ixgbe_free_transmit_ring: begin");
730
731 if (txr->tx_buffers == NULL)
732 return;
733
734 tx_buffer = txr->tx_buffers;
735 for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
736 if (tx_buffer->m_head != NULL) {
737 bus_dmamap_sync(txr->txtag->dt_dmat, tx_buffer->map,
738 0, tx_buffer->m_head->m_pkthdr.len,
739 BUS_DMASYNC_POSTWRITE);
740 ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
741 m_freem(tx_buffer->m_head);
742 tx_buffer->m_head = NULL;
743 if (tx_buffer->map != NULL) {
744 ixgbe_dmamap_destroy(txr->txtag,
745 tx_buffer->map);
746 tx_buffer->map = NULL;
747 }
748 } else if (tx_buffer->map != NULL) {
749 ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
750 ixgbe_dmamap_destroy(txr->txtag, tx_buffer->map);
751 tx_buffer->map = NULL;
752 }
753 }
754 #ifndef IXGBE_LEGACY_TX
755 if (txr->br != NULL)
756 buf_ring_free(txr->br, M_DEVBUF);
757 #endif
758 if (txr->tx_buffers != NULL) {
759 free(txr->tx_buffers, M_DEVBUF);
760 txr->tx_buffers = NULL;
761 }
762 if (txr->txtag != NULL) {
763 ixgbe_dma_tag_destroy(txr->txtag);
764 txr->txtag = NULL;
765 }
766 return;
767 }
768
769 /*********************************************************************
770 *
771 * Advanced Context Descriptor setup for VLAN, CSUM or TSO
772 *
773 **********************************************************************/
774
775 static int
776 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
777 u32 *cmd_type_len, u32 *olinfo_status)
778 {
779 struct adapter *adapter = txr->adapter;
780 struct ethercom *ec = &adapter->osdep.ec;
781 struct m_tag *mtag;
782 struct ixgbe_adv_tx_context_desc *TXD;
783 struct ether_vlan_header *eh;
784 struct ip ip;
785 struct ip6_hdr ip6;
786 u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
787 int ehdrlen, ip_hlen = 0;
788 u16 etype;
789 u8 ipproto __diagused = 0;
790 int offload = TRUE;
791 int ctxd = txr->next_avail_desc;
792 u16 vtag = 0;
793
794 /* First check if TSO is to be used */
795 if (mp->m_pkthdr.csum_flags & (M_CSUM_TSOv4|M_CSUM_TSOv6))
796 return (ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status));
797
798 if ((mp->m_pkthdr.csum_flags & M_CSUM_OFFLOAD) == 0)
799 offload = FALSE;
800
801 /* Indicate the whole packet as payload when not doing TSO */
802 *olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
803
804 /* Now ready a context descriptor */
805 TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
806
807 /*
808 ** In advanced descriptors the vlan tag must
809 ** be placed into the context descriptor. Hence
810 ** we need to make one even if not doing offloads.
811 */
812 if ((mtag = VLAN_OUTPUT_TAG(ec, mp)) != NULL) {
813 vtag = htole16(VLAN_TAG_VALUE(mtag) & 0xffff);
814 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
815 } else if (!IXGBE_IS_X550VF(adapter) && (offload == FALSE))
816 return (0);
817
818 /*
819 * Determine where frame payload starts.
820 * Jump over vlan headers if already present,
821 * helpful for QinQ too.
822 */
823 KASSERT(mp->m_len >= offsetof(struct ether_vlan_header, evl_tag));
824 eh = mtod(mp, struct ether_vlan_header *);
825 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
826 KASSERT(mp->m_len >= sizeof(struct ether_vlan_header));
827 etype = ntohs(eh->evl_proto);
828 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
829 } else {
830 etype = ntohs(eh->evl_encap_proto);
831 ehdrlen = ETHER_HDR_LEN;
832 }
833
834 /* Set the ether header length */
835 vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
836
837 if (offload == FALSE)
838 goto no_offloads;
839
840 switch (etype) {
841 case ETHERTYPE_IP:
842 m_copydata(mp, ehdrlen, sizeof(ip), &ip);
843 ip_hlen = ip.ip_hl << 2;
844 ipproto = ip.ip_p;
845 #if 0
846 ip.ip_sum = 0;
847 m_copyback(mp, ehdrlen, sizeof(ip), &ip);
848 #else
849 KASSERT((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) == 0 ||
850 ip.ip_sum == 0);
851 #endif
852 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
853 break;
854 case ETHERTYPE_IPV6:
855 m_copydata(mp, ehdrlen, sizeof(ip6), &ip6);
856 ip_hlen = sizeof(ip6);
857 /* XXX-BZ this will go badly in case of ext hdrs. */
858 ipproto = ip6.ip6_nxt;
859 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
860 break;
861 default:
862 break;
863 }
864
865 if ((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) != 0)
866 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
867
868 vlan_macip_lens |= ip_hlen;
869
870 if (mp->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_TCPv6)) {
871 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
872 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
873 KASSERT(ipproto == IPPROTO_TCP);
874 } else if (mp->m_pkthdr.csum_flags & (M_CSUM_UDPv4|M_CSUM_UDPv6)) {
875 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
876 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
877 KASSERT(ipproto == IPPROTO_UDP);
878 }
879
880 no_offloads:
881 type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
882
883 /* Now copy bits into descriptor */
884 TXD->vlan_macip_lens = htole32(vlan_macip_lens);
885 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
886 TXD->seqnum_seed = htole32(0);
887 TXD->mss_l4len_idx = htole32(0);
888
889 /* We've consumed the first desc, adjust counters */
890 if (++ctxd == txr->num_desc)
891 ctxd = 0;
892 txr->next_avail_desc = ctxd;
893 --txr->tx_avail;
894
895 return 0;
896 }
897
898 /**********************************************************************
899 *
900 * Setup work for hardware segmentation offload (TSO) on
901 * adapters using advanced tx descriptors
902 *
903 **********************************************************************/
904 static int
905 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp,
906 u32 *cmd_type_len, u32 *olinfo_status)
907 {
908 struct m_tag *mtag;
909 struct adapter *adapter = txr->adapter;
910 struct ethercom *ec = &adapter->osdep.ec;
911 struct ixgbe_adv_tx_context_desc *TXD;
912 u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
913 u32 mss_l4len_idx = 0, paylen;
914 u16 vtag = 0, eh_type;
915 int ctxd, ehdrlen, ip_hlen, tcp_hlen;
916 struct ether_vlan_header *eh;
917 #ifdef INET6
918 struct ip6_hdr *ip6;
919 #endif
920 #ifdef INET
921 struct ip *ip;
922 #endif
923 struct tcphdr *th;
924
925
926 /*
927 * Determine where frame payload starts.
928 * Jump over vlan headers if already present
929 */
930 eh = mtod(mp, struct ether_vlan_header *);
931 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
932 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
933 eh_type = eh->evl_proto;
934 } else {
935 ehdrlen = ETHER_HDR_LEN;
936 eh_type = eh->evl_encap_proto;
937 }
938
939 switch (ntohs(eh_type)) {
940 #ifdef INET6
941 case ETHERTYPE_IPV6:
942 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
943 /* XXX-BZ For now we do not pretend to support ext. hdrs. */
944 if (ip6->ip6_nxt != IPPROTO_TCP)
945 return (ENXIO);
946 ip_hlen = sizeof(struct ip6_hdr);
947 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
948 th = (struct tcphdr *)((char *)ip6 + ip_hlen);
949 th->th_sum = in6_cksum_phdr(&ip6->ip6_src,
950 &ip6->ip6_dst, 0, htonl(IPPROTO_TCP));
951 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
952 break;
953 #endif
954 #ifdef INET
955 case ETHERTYPE_IP:
956 ip = (struct ip *)(mp->m_data + ehdrlen);
957 if (ip->ip_p != IPPROTO_TCP)
958 return (ENXIO);
959 ip->ip_sum = 0;
960 ip_hlen = ip->ip_hl << 2;
961 th = (struct tcphdr *)((char *)ip + ip_hlen);
962 th->th_sum = in_cksum_phdr(ip->ip_src.s_addr,
963 ip->ip_dst.s_addr, htons(IPPROTO_TCP));
964 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
965 /* Tell transmit desc to also do IPv4 checksum. */
966 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
967 break;
968 #endif
969 default:
970 panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
971 __func__, ntohs(eh_type));
972 break;
973 }
974
975 ctxd = txr->next_avail_desc;
976 TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
977
978 tcp_hlen = th->th_off << 2;
979
980 /* This is used in the transmit desc in encap */
981 paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
982
983 /* VLAN MACLEN IPLEN */
984 if ((mtag = VLAN_OUTPUT_TAG(ec, mp)) != NULL) {
985 vtag = htole16(VLAN_TAG_VALUE(mtag) & 0xffff);
986 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
987 }
988
989 vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
990 vlan_macip_lens |= ip_hlen;
991 TXD->vlan_macip_lens = htole32(vlan_macip_lens);
992
993 /* ADV DTYPE TUCMD */
994 type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
995 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
996 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
997
998 /* MSS L4LEN IDX */
999 mss_l4len_idx |= (mp->m_pkthdr.segsz << IXGBE_ADVTXD_MSS_SHIFT);
1000 mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
1001 TXD->mss_l4len_idx = htole32(mss_l4len_idx);
1002
1003 TXD->seqnum_seed = htole32(0);
1004
1005 if (++ctxd == txr->num_desc)
1006 ctxd = 0;
1007
1008 txr->tx_avail--;
1009 txr->next_avail_desc = ctxd;
1010 *cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
1011 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
1012 *olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
1013 ++txr->tso_tx.ev_count;
1014 return (0);
1015 }
1016
1017
1018 /**********************************************************************
1019 *
1020 * Examine each tx_buffer in the used queue. If the hardware is done
1021 * processing the packet then free associated resources. The
1022 * tx_buffer is put back on the free queue.
1023 *
1024 **********************************************************************/
1025 void
1026 ixgbe_txeof(struct tx_ring *txr)
1027 {
1028 struct adapter *adapter = txr->adapter;
1029 struct ifnet *ifp = adapter->ifp;
1030 u32 work, processed = 0;
1031 u16 limit = txr->process_limit;
1032 struct ixgbe_tx_buf *buf;
1033 union ixgbe_adv_tx_desc *txd;
1034
1035 KASSERT(mutex_owned(&txr->tx_mtx));
1036
1037 #ifdef DEV_NETMAP
1038 if (ifp->if_capenable & IFCAP_NETMAP) {
1039 struct netmap_adapter *na = NA(ifp);
1040 struct netmap_kring *kring = &na->tx_rings[txr->me];
1041 txd = txr->tx_base;
1042 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1043 BUS_DMASYNC_POSTREAD);
1044 /*
1045 * In netmap mode, all the work is done in the context
1046 * of the client thread. Interrupt handlers only wake up
1047 * clients, which may be sleeping on individual rings
1048 * or on a global resource for all rings.
1049 * To implement tx interrupt mitigation, we wake up the client
1050 * thread roughly every half ring, even if the NIC interrupts
1051 * more frequently. This is implemented as follows:
1052 * - ixgbe_txsync() sets kring->nr_kflags with the index of
1053 * the slot that should wake up the thread (nkr_num_slots
1054 * means the user thread should not be woken up);
1055 * - the driver ignores tx interrupts unless netmap_mitigate=0
1056 * or the slot has the DD bit set.
1057 */
1058 if (!netmap_mitigate ||
1059 (kring->nr_kflags < kring->nkr_num_slots &&
1060 txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD)) {
1061 netmap_tx_irq(ifp, txr->me);
1062 }
1063 return;
1064 }
1065 #endif /* DEV_NETMAP */
1066
1067 if (txr->tx_avail == txr->num_desc) {
1068 txr->busy = 0;
1069 return;
1070 }
1071
1072 /* Get work starting point */
1073 work = txr->next_to_clean;
1074 buf = &txr->tx_buffers[work];
1075 txd = &txr->tx_base[work];
1076 work -= txr->num_desc; /* The distance to ring end */
1077 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1078 BUS_DMASYNC_POSTREAD);
1079 do {
1080 union ixgbe_adv_tx_desc *eop= buf->eop;
1081 if (eop == NULL) /* No work */
1082 break;
1083
1084 if ((eop->wb.status & IXGBE_TXD_STAT_DD) == 0)
1085 break; /* I/O not complete */
1086
1087 if (buf->m_head) {
1088 txr->bytes +=
1089 buf->m_head->m_pkthdr.len;
1090 bus_dmamap_sync(txr->txtag->dt_dmat,
1091 buf->map,
1092 0, buf->m_head->m_pkthdr.len,
1093 BUS_DMASYNC_POSTWRITE);
1094 ixgbe_dmamap_unload(txr->txtag,
1095 buf->map);
1096 m_freem(buf->m_head);
1097 buf->m_head = NULL;
1098 }
1099 buf->eop = NULL;
1100 ++txr->tx_avail;
1101
1102 /* We clean the range if multi segment */
1103 while (txd != eop) {
1104 ++txd;
1105 ++buf;
1106 ++work;
1107 /* wrap the ring? */
1108 if (__predict_false(!work)) {
1109 work -= txr->num_desc;
1110 buf = txr->tx_buffers;
1111 txd = txr->tx_base;
1112 }
1113 if (buf->m_head) {
1114 txr->bytes +=
1115 buf->m_head->m_pkthdr.len;
1116 bus_dmamap_sync(txr->txtag->dt_dmat,
1117 buf->map,
1118 0, buf->m_head->m_pkthdr.len,
1119 BUS_DMASYNC_POSTWRITE);
1120 ixgbe_dmamap_unload(txr->txtag,
1121 buf->map);
1122 m_freem(buf->m_head);
1123 buf->m_head = NULL;
1124 }
1125 ++txr->tx_avail;
1126 buf->eop = NULL;
1127
1128 }
1129 ++txr->packets;
1130 ++processed;
1131 ++ifp->if_opackets;
1132
1133 /* Try the next packet */
1134 ++txd;
1135 ++buf;
1136 ++work;
1137 /* reset with a wrap */
1138 if (__predict_false(!work)) {
1139 work -= txr->num_desc;
1140 buf = txr->tx_buffers;
1141 txd = txr->tx_base;
1142 }
1143 prefetch(txd);
1144 } while (__predict_true(--limit));
1145
1146 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1147 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1148
1149 work += txr->num_desc;
1150 txr->next_to_clean = work;
1151
1152 /*
1153 ** Queue Hang detection, we know there's
1154 ** work outstanding or the first return
1155 ** would have been taken, so increment busy
1156 ** if nothing managed to get cleaned, then
1157 ** in local_timer it will be checked and
1158 ** marked as HUNG if it exceeds a MAX attempt.
1159 */
1160 if ((processed == 0) && (txr->busy != IXGBE_QUEUE_HUNG))
1161 ++txr->busy;
1162 /*
1163 ** If anything gets cleaned we reset state to 1,
1164 ** note this will turn off HUNG if its set.
1165 */
1166 if (processed)
1167 txr->busy = 1;
1168
1169 if (txr->tx_avail == txr->num_desc)
1170 txr->busy = 0;
1171
1172 return;
1173 }
1174
1175
1176 #ifdef IXGBE_FDIR
1177 /*
1178 ** This routine parses packet headers so that Flow
1179 ** Director can make a hashed filter table entry
1180 ** allowing traffic flows to be identified and kept
1181 ** on the same cpu. This would be a performance
1182 ** hit, but we only do it at IXGBE_FDIR_RATE of
1183 ** packets.
1184 */
1185 static void
1186 ixgbe_atr(struct tx_ring *txr, struct mbuf *mp)
1187 {
1188 struct adapter *adapter = txr->adapter;
1189 struct ix_queue *que;
1190 struct ip *ip;
1191 struct tcphdr *th;
1192 struct udphdr *uh;
1193 struct ether_vlan_header *eh;
1194 union ixgbe_atr_hash_dword input = {.dword = 0};
1195 union ixgbe_atr_hash_dword common = {.dword = 0};
1196 int ehdrlen, ip_hlen;
1197 u16 etype;
1198
1199 eh = mtod(mp, struct ether_vlan_header *);
1200 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
1201 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
1202 etype = eh->evl_proto;
1203 } else {
1204 ehdrlen = ETHER_HDR_LEN;
1205 etype = eh->evl_encap_proto;
1206 }
1207
1208 /* Only handling IPv4 */
1209 if (etype != htons(ETHERTYPE_IP))
1210 return;
1211
1212 ip = (struct ip *)(mp->m_data + ehdrlen);
1213 ip_hlen = ip->ip_hl << 2;
1214
1215 /* check if we're UDP or TCP */
1216 switch (ip->ip_p) {
1217 case IPPROTO_TCP:
1218 th = (struct tcphdr *)((char *)ip + ip_hlen);
1219 /* src and dst are inverted */
1220 common.port.dst ^= th->th_sport;
1221 common.port.src ^= th->th_dport;
1222 input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_TCPV4;
1223 break;
1224 case IPPROTO_UDP:
1225 uh = (struct udphdr *)((char *)ip + ip_hlen);
1226 /* src and dst are inverted */
1227 common.port.dst ^= uh->uh_sport;
1228 common.port.src ^= uh->uh_dport;
1229 input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_UDPV4;
1230 break;
1231 default:
1232 return;
1233 }
1234
1235 input.formatted.vlan_id = htobe16(mp->m_pkthdr.ether_vtag);
1236 if (mp->m_pkthdr.ether_vtag)
1237 common.flex_bytes ^= htons(ETHERTYPE_VLAN);
1238 else
1239 common.flex_bytes ^= etype;
1240 common.ip ^= ip->ip_src.s_addr ^ ip->ip_dst.s_addr;
1241
1242 que = &adapter->queues[txr->me];
1243 /*
1244 ** This assumes the Rx queue and Tx
1245 ** queue are bound to the same CPU
1246 */
1247 ixgbe_fdir_add_signature_filter_82599(&adapter->hw,
1248 input, common, que->msix);
1249 }
1250 #endif /* IXGBE_FDIR */
1251
1252 /*
1253 ** Used to detect a descriptor that has
1254 ** been merged by Hardware RSC.
1255 */
1256 static inline u32
1257 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1258 {
1259 return (le32toh(rx->wb.lower.lo_dword.data) &
1260 IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1261 }
1262
1263 /*********************************************************************
1264 *
1265 * Initialize Hardware RSC (LRO) feature on 82599
1266 * for an RX ring, this is toggled by the LRO capability
1267 * even though it is transparent to the stack.
1268 *
1269 * NOTE: since this HW feature only works with IPV4 and
1270 * our testing has shown soft LRO to be as effective
1271 * I have decided to disable this by default.
1272 *
1273 **********************************************************************/
1274 static void
1275 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
1276 {
1277 struct adapter *adapter = rxr->adapter;
1278 struct ixgbe_hw *hw = &adapter->hw;
1279 u32 rscctrl, rdrxctl;
1280
1281 /* If turning LRO/RSC off we need to disable it */
1282 if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) {
1283 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1284 rscctrl &= ~IXGBE_RSCCTL_RSCEN;
1285 return;
1286 }
1287
1288 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
1289 rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
1290 #ifdef DEV_NETMAP /* crcstrip is optional in netmap */
1291 if (adapter->ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip)
1292 #endif /* DEV_NETMAP */
1293 rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
1294 rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
1295 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
1296
1297 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1298 rscctrl |= IXGBE_RSCCTL_RSCEN;
1299 /*
1300 ** Limit the total number of descriptors that
1301 ** can be combined, so it does not exceed 64K
1302 */
1303 if (rxr->mbuf_sz == MCLBYTES)
1304 rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
1305 else if (rxr->mbuf_sz == MJUMPAGESIZE)
1306 rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
1307 else if (rxr->mbuf_sz == MJUM9BYTES)
1308 rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
1309 else /* Using 16K cluster */
1310 rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
1311
1312 IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
1313
1314 /* Enable TCP header recognition */
1315 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
1316 (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) |
1317 IXGBE_PSRTYPE_TCPHDR));
1318
1319 /* Disable RSC for ACK packets */
1320 IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
1321 (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
1322
1323 rxr->hw_rsc = TRUE;
1324 }
1325 /*********************************************************************
1326 *
1327 * Refresh mbuf buffers for RX descriptor rings
1328 * - now keeps its own state so discards due to resource
1329 * exhaustion are unnecessary, if an mbuf cannot be obtained
1330 * it just returns, keeping its placeholder, thus it can simply
1331 * be recalled to try again.
1332 *
1333 **********************************************************************/
1334 static void
1335 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
1336 {
1337 struct adapter *adapter = rxr->adapter;
1338 struct ixgbe_rx_buf *rxbuf;
1339 struct mbuf *mp;
1340 int i, j, error;
1341 bool refreshed = false;
1342
1343 i = j = rxr->next_to_refresh;
1344 /* Control the loop with one beyond */
1345 if (++j == rxr->num_desc)
1346 j = 0;
1347
1348 while (j != limit) {
1349 rxbuf = &rxr->rx_buffers[i];
1350 if (rxbuf->buf == NULL) {
1351 mp = ixgbe_getjcl(&adapter->jcl_head, M_NOWAIT,
1352 MT_DATA, M_PKTHDR, rxr->mbuf_sz);
1353 if (mp == NULL) {
1354 rxr->no_jmbuf.ev_count++;
1355 goto update;
1356 }
1357 if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
1358 m_adj(mp, ETHER_ALIGN);
1359 } else
1360 mp = rxbuf->buf;
1361
1362 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1363
1364 /* If we're dealing with an mbuf that was copied rather
1365 * than replaced, there's no need to go through busdma.
1366 */
1367 if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
1368 /* Get the memory mapping */
1369 ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
1370 error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat,
1371 rxbuf->pmap, mp, BUS_DMA_NOWAIT);
1372 if (error != 0) {
1373 printf("Refresh mbufs: payload dmamap load"
1374 " failure - %d\n", error);
1375 m_free(mp);
1376 rxbuf->buf = NULL;
1377 goto update;
1378 }
1379 rxbuf->buf = mp;
1380 bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
1381 0, mp->m_pkthdr.len, BUS_DMASYNC_PREREAD);
1382 rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
1383 htole64(rxbuf->pmap->dm_segs[0].ds_addr);
1384 } else {
1385 rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
1386 rxbuf->flags &= ~IXGBE_RX_COPY;
1387 }
1388
1389 refreshed = true;
1390 /* Next is precalculated */
1391 i = j;
1392 rxr->next_to_refresh = i;
1393 if (++j == rxr->num_desc)
1394 j = 0;
1395 }
1396 update:
1397 if (refreshed) /* Update hardware tail index */
1398 IXGBE_WRITE_REG(&adapter->hw,
1399 rxr->tail, rxr->next_to_refresh);
1400 return;
1401 }
1402
1403 /*********************************************************************
1404 *
1405 * Allocate memory for rx_buffer structures. Since we use one
1406 * rx_buffer per received packet, the maximum number of rx_buffer's
1407 * that we'll need is equal to the number of receive descriptors
1408 * that we've allocated.
1409 *
1410 **********************************************************************/
1411 int
1412 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
1413 {
1414 struct adapter *adapter = rxr->adapter;
1415 device_t dev = adapter->dev;
1416 struct ixgbe_rx_buf *rxbuf;
1417 int bsize, error;
1418
1419 bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
1420 if (!(rxr->rx_buffers =
1421 (struct ixgbe_rx_buf *) malloc(bsize,
1422 M_DEVBUF, M_NOWAIT | M_ZERO))) {
1423 aprint_error_dev(dev, "Unable to allocate rx_buffer memory\n");
1424 error = ENOMEM;
1425 goto fail;
1426 }
1427
1428 if ((error = ixgbe_dma_tag_create(adapter->osdep.dmat, /* parent */
1429 1, 0, /* alignment, bounds */
1430 MJUM16BYTES, /* maxsize */
1431 1, /* nsegments */
1432 MJUM16BYTES, /* maxsegsize */
1433 0, /* flags */
1434 &rxr->ptag))) {
1435 aprint_error_dev(dev, "Unable to create RX DMA tag\n");
1436 goto fail;
1437 }
1438
1439 for (int i = 0; i < rxr->num_desc; i++, rxbuf++) {
1440 rxbuf = &rxr->rx_buffers[i];
1441 error = ixgbe_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
1442 if (error) {
1443 aprint_error_dev(dev, "Unable to create RX dma map\n");
1444 goto fail;
1445 }
1446 }
1447
1448 return (0);
1449
1450 fail:
1451 /* Frees all, but can handle partial completion */
1452 ixgbe_free_receive_structures(adapter);
1453 return (error);
1454 }
1455
1456
1457 static void
1458 ixgbe_free_receive_ring(struct rx_ring *rxr)
1459 {
1460 struct ixgbe_rx_buf *rxbuf;
1461
1462 for (int i = 0; i < rxr->num_desc; i++) {
1463 rxbuf = &rxr->rx_buffers[i];
1464 if (rxbuf->buf != NULL) {
1465 bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
1466 0, rxbuf->buf->m_pkthdr.len,
1467 BUS_DMASYNC_POSTREAD);
1468 ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
1469 rxbuf->buf->m_flags |= M_PKTHDR;
1470 m_freem(rxbuf->buf);
1471 rxbuf->buf = NULL;
1472 rxbuf->flags = 0;
1473 }
1474 }
1475 }
1476
1477
1478 /*********************************************************************
1479 *
1480 * Initialize a receive ring and its buffers.
1481 *
1482 **********************************************************************/
1483 static int
1484 ixgbe_setup_receive_ring(struct rx_ring *rxr)
1485 {
1486 struct adapter *adapter;
1487 struct ixgbe_rx_buf *rxbuf;
1488 #ifdef LRO
1489 struct ifnet *ifp;
1490 struct lro_ctrl *lro = &rxr->lro;
1491 #endif /* LRO */
1492 int rsize, error = 0;
1493 #ifdef DEV_NETMAP
1494 struct netmap_adapter *na = NA(rxr->adapter->ifp);
1495 struct netmap_slot *slot;
1496 #endif /* DEV_NETMAP */
1497
1498 adapter = rxr->adapter;
1499 #ifdef LRO
1500 ifp = adapter->ifp;
1501 #endif /* LRO */
1502
1503 /* Clear the ring contents */
1504 IXGBE_RX_LOCK(rxr);
1505 #ifdef DEV_NETMAP
1506 /* same as in ixgbe_setup_transmit_ring() */
1507 slot = netmap_reset(na, NR_RX, rxr->me, 0);
1508 #endif /* DEV_NETMAP */
1509 rsize = roundup2(adapter->num_rx_desc *
1510 sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
1511 bzero((void *)rxr->rx_base, rsize);
1512 /* Cache the size */
1513 rxr->mbuf_sz = adapter->rx_mbuf_sz;
1514
1515 /* Free current RX buffer structs and their mbufs */
1516 ixgbe_free_receive_ring(rxr);
1517
1518 IXGBE_RX_UNLOCK(rxr);
1519
1520 /* Now reinitialize our supply of jumbo mbufs. The number
1521 * or size of jumbo mbufs may have changed.
1522 */
1523 ixgbe_jcl_reinit(&adapter->jcl_head, rxr->ptag->dt_dmat,
1524 2 * adapter->num_rx_desc, adapter->rx_mbuf_sz);
1525
1526 IXGBE_RX_LOCK(rxr);
1527
1528 /* Now replenish the mbufs */
1529 for (int j = 0; j != rxr->num_desc; ++j) {
1530 struct mbuf *mp;
1531
1532 rxbuf = &rxr->rx_buffers[j];
1533 #ifdef DEV_NETMAP
1534 /*
1535 * In netmap mode, fill the map and set the buffer
1536 * address in the NIC ring, considering the offset
1537 * between the netmap and NIC rings (see comment in
1538 * ixgbe_setup_transmit_ring() ). No need to allocate
1539 * an mbuf, so end the block with a continue;
1540 */
1541 if (slot) {
1542 int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
1543 uint64_t paddr;
1544 void *addr;
1545
1546 addr = PNMB(na, slot + sj, &paddr);
1547 netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
1548 /* Update descriptor and the cached value */
1549 rxr->rx_base[j].read.pkt_addr = htole64(paddr);
1550 rxbuf->addr = htole64(paddr);
1551 continue;
1552 }
1553 #endif /* DEV_NETMAP */
1554 rxbuf->flags = 0;
1555 rxbuf->buf = ixgbe_getjcl(&adapter->jcl_head, M_NOWAIT,
1556 MT_DATA, M_PKTHDR, adapter->rx_mbuf_sz);
1557 if (rxbuf->buf == NULL) {
1558 error = ENOBUFS;
1559 goto fail;
1560 }
1561 mp = rxbuf->buf;
1562 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1563 /* Get the memory mapping */
1564 error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat,
1565 rxbuf->pmap, mp, BUS_DMA_NOWAIT);
1566 if (error != 0)
1567 goto fail;
1568 bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
1569 0, adapter->rx_mbuf_sz, BUS_DMASYNC_PREREAD);
1570 /* Update the descriptor and the cached value */
1571 rxr->rx_base[j].read.pkt_addr =
1572 htole64(rxbuf->pmap->dm_segs[0].ds_addr);
1573 rxbuf->addr = htole64(rxbuf->pmap->dm_segs[0].ds_addr);
1574 }
1575
1576
1577 /* Setup our descriptor indices */
1578 rxr->next_to_check = 0;
1579 rxr->next_to_refresh = 0;
1580 rxr->lro_enabled = FALSE;
1581 rxr->rx_copies.ev_count = 0;
1582 rxr->rx_bytes.ev_count = 0;
1583 rxr->vtag_strip = FALSE;
1584
1585 ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1586 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1587
1588 /*
1589 ** Now set up the LRO interface:
1590 */
1591 if (ixgbe_rsc_enable)
1592 ixgbe_setup_hw_rsc(rxr);
1593 #ifdef LRO
1594 else if (ifp->if_capenable & IFCAP_LRO) {
1595 device_t dev = adapter->dev;
1596 int err = tcp_lro_init(lro);
1597 if (err) {
1598 device_printf(dev, "LRO Initialization failed!\n");
1599 goto fail;
1600 }
1601 INIT_DEBUGOUT("RX Soft LRO Initialized\n");
1602 rxr->lro_enabled = TRUE;
1603 lro->ifp = adapter->ifp;
1604 }
1605 #endif /* LRO */
1606
1607 IXGBE_RX_UNLOCK(rxr);
1608 return (0);
1609
1610 fail:
1611 ixgbe_free_receive_ring(rxr);
1612 IXGBE_RX_UNLOCK(rxr);
1613 return (error);
1614 }
1615
1616 /*********************************************************************
1617 *
1618 * Initialize all receive rings.
1619 *
1620 **********************************************************************/
1621 int
1622 ixgbe_setup_receive_structures(struct adapter *adapter)
1623 {
1624 struct rx_ring *rxr = adapter->rx_rings;
1625 int j;
1626
1627 for (j = 0; j < adapter->num_queues; j++, rxr++)
1628 if (ixgbe_setup_receive_ring(rxr))
1629 goto fail;
1630
1631 return (0);
1632 fail:
1633 /*
1634 * Free RX buffers allocated so far, we will only handle
1635 * the rings that completed, the failing case will have
1636 * cleaned up for itself. 'j' failed, so its the terminus.
1637 */
1638 for (int i = 0; i < j; ++i) {
1639 rxr = &adapter->rx_rings[i];
1640 ixgbe_free_receive_ring(rxr);
1641 }
1642
1643 return (ENOBUFS);
1644 }
1645
1646
1647 /*********************************************************************
1648 *
1649 * Free all receive rings.
1650 *
1651 **********************************************************************/
1652 void
1653 ixgbe_free_receive_structures(struct adapter *adapter)
1654 {
1655 struct rx_ring *rxr = adapter->rx_rings;
1656
1657 INIT_DEBUGOUT("ixgbe_free_receive_structures: begin");
1658
1659 for (int i = 0; i < adapter->num_queues; i++, rxr++) {
1660 #ifdef LRO
1661 struct lro_ctrl *lro = &rxr->lro;
1662 #endif /* LRO */
1663 ixgbe_free_receive_buffers(rxr);
1664 #ifdef LRO
1665 /* Free LRO memory */
1666 tcp_lro_free(lro);
1667 #endif /* LRO */
1668 /* Free the ring memory as well */
1669 ixgbe_dma_free(adapter, &rxr->rxdma);
1670 IXGBE_RX_LOCK_DESTROY(rxr);
1671 }
1672
1673 free(adapter->rx_rings, M_DEVBUF);
1674 }
1675
1676
1677 /*********************************************************************
1678 *
1679 * Free receive ring data structures
1680 *
1681 **********************************************************************/
1682 static void
1683 ixgbe_free_receive_buffers(struct rx_ring *rxr)
1684 {
1685 struct adapter *adapter = rxr->adapter;
1686 struct ixgbe_rx_buf *rxbuf;
1687
1688 INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin");
1689
1690 /* Cleanup any existing buffers */
1691 if (rxr->rx_buffers != NULL) {
1692 for (int i = 0; i < adapter->num_rx_desc; i++) {
1693 rxbuf = &rxr->rx_buffers[i];
1694 if (rxbuf->buf != NULL) {
1695 bus_dmamap_sync(rxr->ptag->dt_dmat,
1696 rxbuf->pmap, 0, rxbuf->buf->m_pkthdr.len,
1697 BUS_DMASYNC_POSTREAD);
1698 ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
1699 rxbuf->buf->m_flags |= M_PKTHDR;
1700 m_freem(rxbuf->buf);
1701 }
1702 rxbuf->buf = NULL;
1703 if (rxbuf->pmap != NULL) {
1704 ixgbe_dmamap_destroy(rxr->ptag, rxbuf->pmap);
1705 rxbuf->pmap = NULL;
1706 }
1707 }
1708 if (rxr->rx_buffers != NULL) {
1709 free(rxr->rx_buffers, M_DEVBUF);
1710 rxr->rx_buffers = NULL;
1711 }
1712 }
1713
1714 if (rxr->ptag != NULL) {
1715 ixgbe_dma_tag_destroy(rxr->ptag);
1716 rxr->ptag = NULL;
1717 }
1718
1719 return;
1720 }
1721
1722 static __inline void
1723 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
1724 {
1725 int s;
1726
1727 #ifdef LRO
1728 struct adapter *adapter = ifp->if_softc;
1729 struct ethercom *ec = &adapter->osdep.ec;
1730
1731 /*
1732 * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
1733 * should be computed by hardware. Also it should not have VLAN tag in
1734 * ethernet header. In case of IPv6 we do not yet support ext. hdrs.
1735 */
1736 if (rxr->lro_enabled &&
1737 (ec->ec_capenable & ETHERCAP_VLAN_HWTAGGING) != 0 &&
1738 (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
1739 ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1740 (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
1741 (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1742 (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
1743 (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
1744 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
1745 /*
1746 * Send to the stack if:
1747 ** - LRO not enabled, or
1748 ** - no LRO resources, or
1749 ** - lro enqueue fails
1750 */
1751 if (rxr->lro.lro_cnt != 0)
1752 if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
1753 return;
1754 }
1755 #endif /* LRO */
1756
1757 IXGBE_RX_UNLOCK(rxr);
1758
1759 s = splnet();
1760 /* Pass this up to any BPF listeners. */
1761 bpf_mtap(ifp, m);
1762 if_input(ifp, m);
1763 splx(s);
1764
1765 IXGBE_RX_LOCK(rxr);
1766 }
1767
1768 static __inline void
1769 ixgbe_rx_discard(struct rx_ring *rxr, int i)
1770 {
1771 struct ixgbe_rx_buf *rbuf;
1772
1773 rbuf = &rxr->rx_buffers[i];
1774
1775
1776 /*
1777 ** With advanced descriptors the writeback
1778 ** clobbers the buffer addrs, so its easier
1779 ** to just free the existing mbufs and take
1780 ** the normal refresh path to get new buffers
1781 ** and mapping.
1782 */
1783
1784 if (rbuf->buf != NULL) {/* Partial chain ? */
1785 rbuf->fmp->m_flags |= M_PKTHDR;
1786 m_freem(rbuf->fmp);
1787 rbuf->fmp = NULL;
1788 rbuf->buf = NULL; /* rbuf->buf is part of fmp's chain */
1789 } else if (rbuf->buf) {
1790 m_free(rbuf->buf);
1791 rbuf->buf = NULL;
1792 }
1793 ixgbe_dmamap_unload(rxr->ptag, rbuf->pmap);
1794
1795 rbuf->flags = 0;
1796
1797 return;
1798 }
1799
1800
1801 /*********************************************************************
1802 *
1803 * This routine executes in interrupt context. It replenishes
1804 * the mbufs in the descriptor and sends data which has been
1805 * dma'ed into host memory to upper layer.
1806 *
1807 * Return TRUE for more work, FALSE for all clean.
1808 *********************************************************************/
1809 bool
1810 ixgbe_rxeof(struct ix_queue *que)
1811 {
1812 struct adapter *adapter = que->adapter;
1813 struct rx_ring *rxr = que->rxr;
1814 struct ifnet *ifp = adapter->ifp;
1815 #ifdef LRO
1816 struct lro_ctrl *lro = &rxr->lro;
1817 struct lro_entry *queued;
1818 #endif /* LRO */
1819 int i, nextp, processed = 0;
1820 u32 staterr = 0;
1821 u16 count = rxr->process_limit;
1822 union ixgbe_adv_rx_desc *cur;
1823 struct ixgbe_rx_buf *rbuf, *nbuf;
1824 #ifdef RSS
1825 u16 pkt_info;
1826 #endif
1827
1828 IXGBE_RX_LOCK(rxr);
1829
1830 #ifdef DEV_NETMAP
1831 /* Same as the txeof routine: wakeup clients on intr. */
1832 if (netmap_rx_irq(ifp, rxr->me, &processed)) {
1833 IXGBE_RX_UNLOCK(rxr);
1834 return (FALSE);
1835 }
1836 #endif /* DEV_NETMAP */
1837
1838 for (i = rxr->next_to_check; count != 0;) {
1839 struct mbuf *sendmp, *mp;
1840 u32 rsc, ptype;
1841 u16 len;
1842 u16 vtag = 0;
1843 bool eop;
1844
1845 /* Sync the ring. */
1846 ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1847 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1848
1849 cur = &rxr->rx_base[i];
1850 staterr = le32toh(cur->wb.upper.status_error);
1851 #ifdef RSS
1852 pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
1853 #endif
1854
1855 if ((staterr & IXGBE_RXD_STAT_DD) == 0)
1856 break;
1857 if ((ifp->if_flags & IFF_RUNNING) == 0)
1858 break;
1859
1860 count--;
1861 sendmp = NULL;
1862 nbuf = NULL;
1863 rsc = 0;
1864 cur->wb.upper.status_error = 0;
1865 rbuf = &rxr->rx_buffers[i];
1866 mp = rbuf->buf;
1867
1868 len = le16toh(cur->wb.upper.length);
1869 ptype = le32toh(cur->wb.lower.lo_dword.data) &
1870 IXGBE_RXDADV_PKTTYPE_MASK;
1871 eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
1872
1873 /* Make sure bad packets are discarded */
1874 if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) {
1875 #if __FreeBSD_version >= 1100036
1876 if (IXGBE_IS_VF(adapter))
1877 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
1878 #endif
1879 rxr->rx_discarded.ev_count++;
1880 ixgbe_rx_discard(rxr, i);
1881 goto next_desc;
1882 }
1883
1884 /*
1885 ** On 82599 which supports a hardware
1886 ** LRO (called HW RSC), packets need
1887 ** not be fragmented across sequential
1888 ** descriptors, rather the next descriptor
1889 ** is indicated in bits of the descriptor.
1890 ** This also means that we might proceses
1891 ** more than one packet at a time, something
1892 ** that has never been true before, it
1893 ** required eliminating global chain pointers
1894 ** in favor of what we are doing here. -jfv
1895 */
1896 if (!eop) {
1897 /*
1898 ** Figure out the next descriptor
1899 ** of this frame.
1900 */
1901 if (rxr->hw_rsc == TRUE) {
1902 rsc = ixgbe_rsc_count(cur);
1903 rxr->rsc_num += (rsc - 1);
1904 }
1905 if (rsc) { /* Get hardware index */
1906 nextp = ((staterr &
1907 IXGBE_RXDADV_NEXTP_MASK) >>
1908 IXGBE_RXDADV_NEXTP_SHIFT);
1909 } else { /* Just sequential */
1910 nextp = i + 1;
1911 if (nextp == adapter->num_rx_desc)
1912 nextp = 0;
1913 }
1914 nbuf = &rxr->rx_buffers[nextp];
1915 prefetch(nbuf);
1916 }
1917 /*
1918 ** Rather than using the fmp/lmp global pointers
1919 ** we now keep the head of a packet chain in the
1920 ** buffer struct and pass this along from one
1921 ** descriptor to the next, until we get EOP.
1922 */
1923 mp->m_len = len;
1924 /*
1925 ** See if there is a stored head
1926 ** that determines what we are
1927 */
1928 sendmp = rbuf->fmp;
1929 if (sendmp != NULL) { /* secondary frag */
1930 rbuf->buf = rbuf->fmp = NULL;
1931 mp->m_flags &= ~M_PKTHDR;
1932 sendmp->m_pkthdr.len += mp->m_len;
1933 } else {
1934 /*
1935 * Optimize. This might be a small packet,
1936 * maybe just a TCP ACK. Do a fast copy that
1937 * is cache aligned into a new mbuf, and
1938 * leave the old mbuf+cluster for re-use.
1939 */
1940 if (eop && len <= IXGBE_RX_COPY_LEN) {
1941 sendmp = m_gethdr(M_NOWAIT, MT_DATA);
1942 if (sendmp != NULL) {
1943 sendmp->m_data +=
1944 IXGBE_RX_COPY_ALIGN;
1945 ixgbe_bcopy(mp->m_data,
1946 sendmp->m_data, len);
1947 sendmp->m_len = len;
1948 rxr->rx_copies.ev_count++;
1949 rbuf->flags |= IXGBE_RX_COPY;
1950 }
1951 }
1952 if (sendmp == NULL) {
1953 rbuf->buf = rbuf->fmp = NULL;
1954 sendmp = mp;
1955 }
1956
1957 /* first desc of a non-ps chain */
1958 sendmp->m_flags |= M_PKTHDR;
1959 sendmp->m_pkthdr.len = mp->m_len;
1960 }
1961 ++processed;
1962
1963 /* Pass the head pointer on */
1964 if (eop == 0) {
1965 nbuf->fmp = sendmp;
1966 sendmp = NULL;
1967 mp->m_next = nbuf->buf;
1968 } else { /* Sending this frame */
1969 m_set_rcvif(sendmp, ifp);
1970 ifp->if_ipackets++;
1971 rxr->rx_packets.ev_count++;
1972 /* capture data for AIM */
1973 rxr->bytes += sendmp->m_pkthdr.len;
1974 rxr->rx_bytes.ev_count += sendmp->m_pkthdr.len;
1975 /* Process vlan info */
1976 if ((rxr->vtag_strip) &&
1977 (staterr & IXGBE_RXD_STAT_VP))
1978 vtag = le16toh(cur->wb.upper.vlan);
1979 if (vtag) {
1980 VLAN_INPUT_TAG(ifp, sendmp, vtag,
1981 printf("%s: could not apply VLAN "
1982 "tag", __func__));
1983 }
1984 if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) {
1985 ixgbe_rx_checksum(staterr, sendmp, ptype,
1986 &adapter->stats.pf);
1987 }
1988 #if 0 /* FreeBSD */
1989 /*
1990 * In case of multiqueue, we have RXCSUM.PCSD bit set
1991 * and never cleared. This means we have RSS hash
1992 * available to be used.
1993 */
1994 if (adapter->num_queues > 1) {
1995 sendmp->m_pkthdr.flowid =
1996 le32toh(cur->wb.lower.hi_dword.rss);
1997 switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) {
1998 case IXGBE_RXDADV_RSSTYPE_IPV4:
1999 M_HASHTYPE_SET(sendmp,
2000 M_HASHTYPE_RSS_IPV4);
2001 break;
2002 case IXGBE_RXDADV_RSSTYPE_IPV4_TCP:
2003 M_HASHTYPE_SET(sendmp,
2004 M_HASHTYPE_RSS_TCP_IPV4);
2005 break;
2006 case IXGBE_RXDADV_RSSTYPE_IPV6:
2007 M_HASHTYPE_SET(sendmp,
2008 M_HASHTYPE_RSS_IPV6);
2009 break;
2010 case IXGBE_RXDADV_RSSTYPE_IPV6_TCP:
2011 M_HASHTYPE_SET(sendmp,
2012 M_HASHTYPE_RSS_TCP_IPV6);
2013 break;
2014 case IXGBE_RXDADV_RSSTYPE_IPV6_EX:
2015 M_HASHTYPE_SET(sendmp,
2016 M_HASHTYPE_RSS_IPV6_EX);
2017 break;
2018 case IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX:
2019 M_HASHTYPE_SET(sendmp,
2020 M_HASHTYPE_RSS_TCP_IPV6_EX);
2021 break;
2022 #if __FreeBSD_version > 1100000
2023 case IXGBE_RXDADV_RSSTYPE_IPV4_UDP:
2024 M_HASHTYPE_SET(sendmp,
2025 M_HASHTYPE_RSS_UDP_IPV4);
2026 break;
2027 case IXGBE_RXDADV_RSSTYPE_IPV6_UDP:
2028 M_HASHTYPE_SET(sendmp,
2029 M_HASHTYPE_RSS_UDP_IPV6);
2030 break;
2031 case IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX:
2032 M_HASHTYPE_SET(sendmp,
2033 M_HASHTYPE_RSS_UDP_IPV6_EX);
2034 break;
2035 #endif
2036 default:
2037 M_HASHTYPE_SET(sendmp,
2038 M_HASHTYPE_OPAQUE);
2039 }
2040 } else {
2041 sendmp->m_pkthdr.flowid = que->msix;
2042 M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
2043 }
2044 #endif /* FreeBSD_version */
2045 }
2046 next_desc:
2047 ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
2048 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2049
2050 /* Advance our pointers to the next descriptor. */
2051 if (++i == rxr->num_desc)
2052 i = 0;
2053
2054 /* Now send to the stack or do LRO */
2055 if (sendmp != NULL) {
2056 rxr->next_to_check = i;
2057 ixgbe_rx_input(rxr, ifp, sendmp, ptype);
2058 i = rxr->next_to_check;
2059 }
2060
2061 /* Every 8 descriptors we go to refresh mbufs */
2062 if (processed == 8) {
2063 ixgbe_refresh_mbufs(rxr, i);
2064 processed = 0;
2065 }
2066 }
2067
2068 /* Refresh any remaining buf structs */
2069 if (ixgbe_rx_unrefreshed(rxr))
2070 ixgbe_refresh_mbufs(rxr, i);
2071
2072 rxr->next_to_check = i;
2073
2074 #ifdef LRO
2075 /*
2076 * Flush any outstanding LRO work
2077 */
2078 while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
2079 SLIST_REMOVE_HEAD(&lro->lro_active, next);
2080 tcp_lro_flush(lro, queued);
2081 }
2082 #endif /* LRO */
2083
2084 IXGBE_RX_UNLOCK(rxr);
2085
2086 /*
2087 ** Still have cleaning to do?
2088 */
2089 if ((staterr & IXGBE_RXD_STAT_DD) != 0)
2090 return true;
2091 else
2092 return false;
2093 }
2094
2095
2096 /*********************************************************************
2097 *
2098 * Verify that the hardware indicated that the checksum is valid.
2099 * Inform the stack about the status of checksum so that stack
2100 * doesn't spend time verifying the checksum.
2101 *
2102 *********************************************************************/
2103 static void
2104 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype,
2105 struct ixgbe_hw_stats *stats)
2106 {
2107 u16 status = (u16) staterr;
2108 u8 errors = (u8) (staterr >> 24);
2109 #if 0
2110 bool sctp = FALSE;
2111
2112 if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
2113 (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
2114 sctp = TRUE;
2115 #endif
2116
2117 if (status & IXGBE_RXD_STAT_IPCS) {
2118 stats->ipcs.ev_count++;
2119 if (!(errors & IXGBE_RXD_ERR_IPE)) {
2120 /* IP Checksum Good */
2121 mp->m_pkthdr.csum_flags = M_CSUM_IPv4;
2122
2123 } else {
2124 stats->ipcs_bad.ev_count++;
2125 mp->m_pkthdr.csum_flags = M_CSUM_IPv4|M_CSUM_IPv4_BAD;
2126 }
2127 }
2128 if (status & IXGBE_RXD_STAT_L4CS) {
2129 stats->l4cs.ev_count++;
2130 int type = M_CSUM_TCPv4|M_CSUM_TCPv6|M_CSUM_UDPv4|M_CSUM_UDPv6;
2131 if (!(errors & IXGBE_RXD_ERR_TCPE)) {
2132 mp->m_pkthdr.csum_flags |= type;
2133 } else {
2134 stats->l4cs_bad.ev_count++;
2135 mp->m_pkthdr.csum_flags |= type | M_CSUM_TCP_UDP_BAD;
2136 }
2137 }
2138 return;
2139 }
2140
2141
2142 /********************************************************************
2143 * Manage DMA'able memory.
2144 *******************************************************************/
2145
2146 int
2147 ixgbe_dma_malloc(struct adapter *adapter, const bus_size_t size,
2148 struct ixgbe_dma_alloc *dma, const int mapflags)
2149 {
2150 device_t dev = adapter->dev;
2151 int r, rsegs;
2152
2153 r = ixgbe_dma_tag_create(adapter->osdep.dmat, /* parent */
2154 DBA_ALIGN, 0, /* alignment, bounds */
2155 size, /* maxsize */
2156 1, /* nsegments */
2157 size, /* maxsegsize */
2158 BUS_DMA_ALLOCNOW, /* flags */
2159 &dma->dma_tag);
2160 if (r != 0) {
2161 aprint_error_dev(dev,
2162 "%s: ixgbe_dma_tag_create failed; error %d\n", __func__, r);
2163 goto fail_0;
2164 }
2165
2166 r = bus_dmamem_alloc(dma->dma_tag->dt_dmat,
2167 size,
2168 dma->dma_tag->dt_alignment,
2169 dma->dma_tag->dt_boundary,
2170 &dma->dma_seg, 1, &rsegs, BUS_DMA_NOWAIT);
2171 if (r != 0) {
2172 aprint_error_dev(dev,
2173 "%s: bus_dmamem_alloc failed; error %d\n", __func__, r);
2174 goto fail_1;
2175 }
2176
2177 r = bus_dmamem_map(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs,
2178 size, &dma->dma_vaddr, BUS_DMA_NOWAIT);
2179 if (r != 0) {
2180 aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
2181 __func__, r);
2182 goto fail_2;
2183 }
2184
2185 r = ixgbe_dmamap_create(dma->dma_tag, 0, &dma->dma_map);
2186 if (r != 0) {
2187 aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
2188 __func__, r);
2189 goto fail_3;
2190 }
2191
2192 r = bus_dmamap_load(dma->dma_tag->dt_dmat, dma->dma_map, dma->dma_vaddr,
2193 size,
2194 NULL,
2195 mapflags | BUS_DMA_NOWAIT);
2196 if (r != 0) {
2197 aprint_error_dev(dev, "%s: bus_dmamap_load failed; error %d\n",
2198 __func__, r);
2199 goto fail_4;
2200 }
2201 dma->dma_paddr = dma->dma_map->dm_segs[0].ds_addr;
2202 dma->dma_size = size;
2203 return 0;
2204 fail_4:
2205 ixgbe_dmamap_destroy(dma->dma_tag, dma->dma_map);
2206 fail_3:
2207 bus_dmamem_unmap(dma->dma_tag->dt_dmat, dma->dma_vaddr, size);
2208 fail_2:
2209 bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs);
2210 fail_1:
2211 ixgbe_dma_tag_destroy(dma->dma_tag);
2212 fail_0:
2213 return r;
2214 }
2215
2216 void
2217 ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma)
2218 {
2219 bus_dmamap_sync(dma->dma_tag->dt_dmat, dma->dma_map, 0, dma->dma_size,
2220 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2221 ixgbe_dmamap_unload(dma->dma_tag, dma->dma_map);
2222 bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, 1);
2223 ixgbe_dma_tag_destroy(dma->dma_tag);
2224 }
2225
2226
2227 /*********************************************************************
2228 *
2229 * Allocate memory for the transmit and receive rings, and then
2230 * the descriptors associated with each, called only once at attach.
2231 *
2232 **********************************************************************/
2233 int
2234 ixgbe_allocate_queues(struct adapter *adapter)
2235 {
2236 device_t dev = adapter->dev;
2237 struct ix_queue *que;
2238 struct tx_ring *txr;
2239 struct rx_ring *rxr;
2240 int rsize, tsize, error = IXGBE_SUCCESS;
2241 int txconf = 0, rxconf = 0;
2242 #ifdef PCI_IOV
2243 enum ixgbe_iov_mode iov_mode;
2244 #endif
2245
2246 /* First allocate the top level queue structs */
2247 if (!(adapter->queues =
2248 (struct ix_queue *) malloc(sizeof(struct ix_queue) *
2249 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2250 aprint_error_dev(dev, "Unable to allocate queue memory\n");
2251 error = ENOMEM;
2252 goto fail;
2253 }
2254
2255 /* First allocate the TX ring struct memory */
2256 if (!(adapter->tx_rings =
2257 (struct tx_ring *) malloc(sizeof(struct tx_ring) *
2258 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2259 aprint_error_dev(dev, "Unable to allocate TX ring memory\n");
2260 error = ENOMEM;
2261 goto tx_fail;
2262 }
2263
2264 /* Next allocate the RX */
2265 if (!(adapter->rx_rings =
2266 (struct rx_ring *) malloc(sizeof(struct rx_ring) *
2267 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2268 aprint_error_dev(dev, "Unable to allocate RX ring memory\n");
2269 error = ENOMEM;
2270 goto rx_fail;
2271 }
2272
2273 /* For the ring itself */
2274 tsize = roundup2(adapter->num_tx_desc *
2275 sizeof(union ixgbe_adv_tx_desc), DBA_ALIGN);
2276
2277 #ifdef PCI_IOV
2278 iov_mode = ixgbe_get_iov_mode(adapter);
2279 adapter->pool = ixgbe_max_vfs(iov_mode);
2280 #else
2281 adapter->pool = 0;
2282 #endif
2283 /*
2284 * Now set up the TX queues, txconf is needed to handle the
2285 * possibility that things fail midcourse and we need to
2286 * undo memory gracefully
2287 */
2288 for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2289 /* Set up some basics */
2290 txr = &adapter->tx_rings[i];
2291 txr->adapter = adapter;
2292 #ifdef PCI_IOV
2293 txr->me = ixgbe_pf_que_index(iov_mode, i);
2294 #else
2295 txr->me = i;
2296 #endif
2297 txr->num_desc = adapter->num_tx_desc;
2298
2299 /* Initialize the TX side lock */
2300 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
2301 device_xname(dev), txr->me);
2302 mutex_init(&txr->tx_mtx, MUTEX_DEFAULT, IPL_NET);
2303
2304 if (ixgbe_dma_malloc(adapter, tsize,
2305 &txr->txdma, BUS_DMA_NOWAIT)) {
2306 aprint_error_dev(dev,
2307 "Unable to allocate TX Descriptor memory\n");
2308 error = ENOMEM;
2309 goto err_tx_desc;
2310 }
2311 txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
2312 bzero((void *)txr->tx_base, tsize);
2313
2314 /* Now allocate transmit buffers for the ring */
2315 if (ixgbe_allocate_transmit_buffers(txr)) {
2316 aprint_error_dev(dev,
2317 "Critical Failure setting up transmit buffers\n");
2318 error = ENOMEM;
2319 goto err_tx_desc;
2320 }
2321 #ifndef IXGBE_LEGACY_TX
2322 /* Allocate a buf ring */
2323 txr->br = buf_ring_alloc(IXGBE_BR_SIZE, M_DEVBUF,
2324 M_WAITOK, &txr->tx_mtx);
2325 if (txr->br == NULL) {
2326 aprint_error_dev(dev,
2327 "Critical Failure setting up buf ring\n");
2328 error = ENOMEM;
2329 goto err_tx_desc;
2330 }
2331 #endif
2332 }
2333
2334 /*
2335 * Next the RX queues...
2336 */
2337 rsize = roundup2(adapter->num_rx_desc *
2338 sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
2339 for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2340 rxr = &adapter->rx_rings[i];
2341 /* Set up some basics */
2342 rxr->adapter = adapter;
2343 #ifdef PCI_IOV
2344 rxr->me = ixgbe_pf_que_index(iov_mode, i);
2345 #else
2346 rxr->me = i;
2347 #endif
2348 rxr->num_desc = adapter->num_rx_desc;
2349
2350 /* Initialize the RX side lock */
2351 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
2352 device_xname(dev), rxr->me);
2353 mutex_init(&rxr->rx_mtx, MUTEX_DEFAULT, IPL_NET);
2354
2355 if (ixgbe_dma_malloc(adapter, rsize,
2356 &rxr->rxdma, BUS_DMA_NOWAIT)) {
2357 aprint_error_dev(dev,
2358 "Unable to allocate RxDescriptor memory\n");
2359 error = ENOMEM;
2360 goto err_rx_desc;
2361 }
2362 rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2363 bzero((void *)rxr->rx_base, rsize);
2364
2365 /* Allocate receive buffers for the ring*/
2366 if (ixgbe_allocate_receive_buffers(rxr)) {
2367 aprint_error_dev(dev,
2368 "Critical Failure setting up receive buffers\n");
2369 error = ENOMEM;
2370 goto err_rx_desc;
2371 }
2372 }
2373
2374 /*
2375 ** Finally set up the queue holding structs
2376 */
2377 for (int i = 0; i < adapter->num_queues; i++) {
2378 que = &adapter->queues[i];
2379 que->adapter = adapter;
2380 que->me = i;
2381 que->txr = &adapter->tx_rings[i];
2382 que->rxr = &adapter->rx_rings[i];
2383 }
2384
2385 return (0);
2386
2387 err_rx_desc:
2388 for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2389 ixgbe_dma_free(adapter, &rxr->rxdma);
2390 err_tx_desc:
2391 for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2392 ixgbe_dma_free(adapter, &txr->txdma);
2393 free(adapter->rx_rings, M_DEVBUF);
2394 rx_fail:
2395 free(adapter->tx_rings, M_DEVBUF);
2396 tx_fail:
2397 free(adapter->queues, M_DEVBUF);
2398 fail:
2399 return (error);
2400 }
2401
2402