ix_txrx.c revision 1.3 1 /******************************************************************************
2
3 Copyright (c) 2001-2014, Intel Corporation
4 All rights reserved.
5
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
8
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
11
12 2. Redistributions in binary form must reproduce the above copyright
13 notice, this list of conditions and the following disclaimer in the
14 documentation and/or other materials provided with the distribution.
15
16 3. Neither the name of the Intel Corporation nor the names of its
17 contributors may be used to endorse or promote products derived from
18 this software without specific prior written permission.
19
20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*
34 * Copyright (c) 2011 The NetBSD Foundation, Inc.
35 * All rights reserved.
36 *
37 * This code is derived from software contributed to The NetBSD Foundation
38 * by Coyote Point Systems, Inc.
39 *
40 * Redistribution and use in source and binary forms, with or without
41 * modification, are permitted provided that the following conditions
42 * are met:
43 * 1. Redistributions of source code must retain the above copyright
44 * notice, this list of conditions and the following disclaimer.
45 * 2. Redistributions in binary form must reproduce the above copyright
46 * notice, this list of conditions and the following disclaimer in the
47 * documentation and/or other materials provided with the distribution.
48 *
49 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
50 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
51 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
52 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
53 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
54 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
55 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
56 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
57 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
58 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
59 * POSSIBILITY OF SUCH DAMAGE.
60 */
61 /*$FreeBSD: head/sys/dev/ixgbe/ix_txrx.c 280182 2015-03-17 18:32:28Z jfv $*/
62 /*$NetBSD: ix_txrx.c,v 1.3 2016/12/01 06:27:18 msaitoh Exp $*/
63
64 #include "ixgbe.h"
65
66 /*
67 ** HW RSC control:
68 ** this feature only works with
69 ** IPv4, and only on 82599 and later.
70 ** Also this will cause IP forwarding to
71 ** fail and that can't be controlled by
72 ** the stack as LRO can. For all these
73 ** reasons I've deemed it best to leave
74 ** this off and not bother with a tuneable
75 ** interface, this would need to be compiled
76 ** to enable.
77 */
78 static bool ixgbe_rsc_enable = FALSE;
79
80 #ifdef IXGBE_FDIR
81 /*
82 ** For Flow Director: this is the
83 ** number of TX packets we sample
84 ** for the filter pool, this means
85 ** every 20th packet will be probed.
86 **
87 ** This feature can be disabled by
88 ** setting this to 0.
89 */
90 static int atr_sample_rate = 20;
91 #endif
92
93 /* Shared PCI config read/write */
94 u16
95 ixgbe_read_pci_cfg(struct ixgbe_hw *hw, u32 reg)
96 {
97 switch (reg % 4) {
98 case 0:
99 return pci_conf_read(hw->back->pc, hw->back->tag, reg) &
100 __BITS(15, 0);
101 case 2:
102 return __SHIFTOUT(pci_conf_read(hw->back->pc, hw->back->tag,
103 reg - 2), __BITS(31, 16));
104 default:
105 panic("%s: invalid register (%" PRIx32, __func__, reg);
106 break;
107 }
108 }
109
110 void
111 ixgbe_write_pci_cfg(struct ixgbe_hw *hw, u32 reg, u16 value)
112 {
113 pcireg_t old;
114
115 switch (reg % 4) {
116 case 0:
117 old = pci_conf_read(hw->back->pc, hw->back->tag, reg) &
118 __BITS(31, 16);
119 pci_conf_write(hw->back->pc, hw->back->tag, reg, value | old);
120 break;
121 case 2:
122 old = pci_conf_read(hw->back->pc, hw->back->tag, reg - 2) &
123 __BITS(15, 0);
124 pci_conf_write(hw->back->pc, hw->back->tag, reg - 2,
125 __SHIFTIN(value, __BITS(31, 16)) | old);
126 break;
127 default:
128 panic("%s: invalid register (%" PRIx32, __func__, reg);
129 break;
130 }
131
132 return;
133 }
134
135 /*********************************************************************
136 * Local Function prototypes
137 *********************************************************************/
138 static void ixgbe_setup_transmit_ring(struct tx_ring *);
139 static void ixgbe_free_transmit_buffers(struct tx_ring *);
140 static int ixgbe_setup_receive_ring(struct rx_ring *);
141 static void ixgbe_free_receive_buffers(struct rx_ring *);
142
143 static void ixgbe_rx_checksum(u32, struct mbuf *, u32,
144 struct ixgbe_hw_stats *);
145 static void ixgbe_refresh_mbufs(struct rx_ring *, int);
146 static int ixgbe_xmit(struct tx_ring *, struct mbuf *);
147 static int ixgbe_tx_ctx_setup(struct tx_ring *,
148 struct mbuf *, u32 *, u32 *);
149 static int ixgbe_tso_setup(struct tx_ring *,
150 struct mbuf *, u32 *, u32 *);
151 #ifdef IXGBE_FDIR
152 static void ixgbe_atr(struct tx_ring *, struct mbuf *);
153 #endif
154 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
155 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
156 struct mbuf *, u32);
157
158 static void ixgbe_setup_hw_rsc(struct rx_ring *);
159
160 #ifdef IXGBE_LEGACY_TX
161 /*********************************************************************
162 * Transmit entry point
163 *
164 * ixgbe_start is called by the stack to initiate a transmit.
165 * The driver will remain in this routine as long as there are
166 * packets to transmit and transmit resources are available.
167 * In case resources are not available stack is notified and
168 * the packet is requeued.
169 **********************************************************************/
170
171 void
172 ixgbe_start_locked(struct tx_ring *txr, struct ifnet * ifp)
173 {
174 int rc;
175 struct mbuf *m_head;
176 struct adapter *adapter = txr->adapter;
177
178 IXGBE_TX_LOCK_ASSERT(txr);
179
180 if ((ifp->if_flags & IFF_RUNNING) == 0)
181 return;
182 if (!adapter->link_active)
183 return;
184
185 while (!IFQ_IS_EMPTY(&ifp->if_snd)) {
186 if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
187 break;
188
189 IFQ_POLL(&ifp->if_snd, m_head);
190 if (m_head == NULL)
191 break;
192
193 if ((rc = ixgbe_xmit(txr, m_head)) == EAGAIN) {
194 break;
195 }
196 IFQ_DEQUEUE(&ifp->if_snd, m_head);
197 if (rc == EFBIG) {
198 struct mbuf *mtmp;
199
200 if ((mtmp = m_defrag(m_head, M_NOWAIT)) != NULL) {
201 m_head = mtmp;
202 rc = ixgbe_xmit(txr, m_head);
203 if (rc != 0)
204 adapter->efbig2_tx_dma_setup.ev_count++;
205 } else
206 adapter->m_defrag_failed.ev_count++;
207 }
208 if (rc != 0) {
209 m_freem(m_head);
210 continue;
211 }
212
213 /* Send a copy of the frame to the BPF listener */
214 bpf_mtap(ifp, m_head);
215 }
216 return;
217 }
218
219 /*
220 * Legacy TX start - called by the stack, this
221 * always uses the first tx ring, and should
222 * not be used with multiqueue tx enabled.
223 */
224 void
225 ixgbe_start(struct ifnet *ifp)
226 {
227 struct adapter *adapter = ifp->if_softc;
228 struct tx_ring *txr = adapter->tx_rings;
229
230 if (ifp->if_flags & IFF_RUNNING) {
231 IXGBE_TX_LOCK(txr);
232 ixgbe_start_locked(txr, ifp);
233 IXGBE_TX_UNLOCK(txr);
234 }
235 return;
236 }
237
238 #else /* ! IXGBE_LEGACY_TX */
239
240 /*
241 ** Multiqueue Transmit driver
242 **
243 */
244 int
245 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
246 {
247 struct adapter *adapter = ifp->if_softc;
248 struct ix_queue *que;
249 struct tx_ring *txr;
250 int i, err = 0;
251 #ifdef RSS
252 uint32_t bucket_id;
253 #endif
254
255 /*
256 * When doing RSS, map it to the same outbound queue
257 * as the incoming flow would be mapped to.
258 *
259 * If everything is setup correctly, it should be the
260 * same bucket that the current CPU we're on is.
261 */
262 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
263 #ifdef RSS
264 if (rss_hash2bucket(m->m_pkthdr.flowid,
265 M_HASHTYPE_GET(m), &bucket_id) == 0)
266 /* TODO: spit out something if bucket_id > num_queues? */
267 i = bucket_id % adapter->num_queues;
268 else
269 #endif
270 i = m->m_pkthdr.flowid % adapter->num_queues;
271 } else
272 i = curcpu % adapter->num_queues;
273
274 /* Check for a hung queue and pick alternative */
275 if (((1 << i) & adapter->active_queues) == 0)
276 i = ffsl(adapter->active_queues);
277
278 txr = &adapter->tx_rings[i];
279 que = &adapter->queues[i];
280
281 err = drbr_enqueue(ifp, txr->br, m);
282 if (err)
283 return (err);
284 if (IXGBE_TX_TRYLOCK(txr)) {
285 ixgbe_mq_start_locked(ifp, txr);
286 IXGBE_TX_UNLOCK(txr);
287 } else
288 softint_schedule(txr->txq_si);
289
290 return (0);
291 }
292
293 int
294 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
295 {
296 struct adapter *adapter = txr->adapter;
297 struct mbuf *next;
298 int enqueued = 0, err = 0;
299
300 if (((ifp->if_flags & IFF_RUNNING) == 0) ||
301 adapter->link_active == 0)
302 return (ENETDOWN);
303
304 /* Process the queue */
305 #if __FreeBSD_version < 901504
306 next = drbr_dequeue(ifp, txr->br);
307 while (next != NULL) {
308 if ((err = ixgbe_xmit(txr, &next)) != 0) {
309 if (next != NULL)
310 err = drbr_enqueue(ifp, txr->br, next);
311 #else
312 while ((next = drbr_peek(ifp, txr->br)) != NULL) {
313 if ((err = ixgbe_xmit(txr, &next)) != 0) {
314 if (next == NULL) {
315 drbr_advance(ifp, txr->br);
316 } else {
317 drbr_putback(ifp, txr->br, next);
318 }
319 #endif
320 break;
321 }
322 #if __FreeBSD_version >= 901504
323 drbr_advance(ifp, txr->br);
324 #endif
325 enqueued++;
326 #if 0 // this is VF-only
327 #if __FreeBSD_version >= 1100036
328 if (next->m_flags & M_MCAST)
329 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
330 #endif
331 #endif
332 /* Send a copy of the frame to the BPF listener */
333 bpf_mtap(ifp, next);
334 if ((ifp->if_flags & IFF_RUNNING) == 0)
335 break;
336 #if __FreeBSD_version < 901504
337 next = drbr_dequeue(ifp, txr->br);
338 #endif
339 }
340
341 if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD)
342 ixgbe_txeof(txr);
343
344 return (err);
345 }
346
347 /*
348 * Called from a taskqueue to drain queued transmit packets.
349 */
350 void
351 ixgbe_deferred_mq_start(void *arg, int pending)
352 {
353 struct tx_ring *txr = arg;
354 struct adapter *adapter = txr->adapter;
355 struct ifnet *ifp = adapter->ifp;
356
357 IXGBE_TX_LOCK(txr);
358 if (!drbr_empty(ifp, txr->br))
359 ixgbe_mq_start_locked(ifp, txr);
360 IXGBE_TX_UNLOCK(txr);
361 }
362
363 /*
364 ** Flush all ring buffers
365 */
366 void
367 ixgbe_qflush(struct ifnet *ifp)
368 {
369 struct adapter *adapter = ifp->if_softc;
370 struct tx_ring *txr = adapter->tx_rings;
371 struct mbuf *m;
372
373 for (int i = 0; i < adapter->num_queues; i++, txr++) {
374 IXGBE_TX_LOCK(txr);
375 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
376 m_freem(m);
377 IXGBE_TX_UNLOCK(txr);
378 }
379 if_qflush(ifp);
380 }
381 #endif /* IXGBE_LEGACY_TX */
382
383
384 /*********************************************************************
385 *
386 * This routine maps the mbufs to tx descriptors, allowing the
387 * TX engine to transmit the packets.
388 * - return 0 on success, positive on failure
389 *
390 **********************************************************************/
391
392 static int
393 ixgbe_xmit(struct tx_ring *txr, struct mbuf *m_head)
394 {
395 struct m_tag *mtag;
396 struct adapter *adapter = txr->adapter;
397 struct ethercom *ec = &adapter->osdep.ec;
398 u32 olinfo_status = 0, cmd_type_len;
399 int i, j, error;
400 int first;
401 bus_dmamap_t map;
402 struct ixgbe_tx_buf *txbuf;
403 union ixgbe_adv_tx_desc *txd = NULL;
404
405 /* Basic descriptor defines */
406 cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
407 IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
408
409 if ((mtag = VLAN_OUTPUT_TAG(ec, m_head)) != NULL)
410 cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
411
412 /*
413 * Important to capture the first descriptor
414 * used because it will contain the index of
415 * the one we tell the hardware to report back
416 */
417 first = txr->next_avail_desc;
418 txbuf = &txr->tx_buffers[first];
419 map = txbuf->map;
420
421 /*
422 * Map the packet for DMA.
423 */
424 error = bus_dmamap_load_mbuf(txr->txtag->dt_dmat, map,
425 m_head, BUS_DMA_NOWAIT);
426
427 if (__predict_false(error)) {
428
429 switch (error) {
430 case EAGAIN:
431 adapter->eagain_tx_dma_setup.ev_count++;
432 return EAGAIN;
433 case ENOMEM:
434 adapter->enomem_tx_dma_setup.ev_count++;
435 return EAGAIN;
436 case EFBIG:
437 /*
438 * XXX Try it again?
439 * do m_defrag() and retry bus_dmamap_load_mbuf().
440 */
441 adapter->efbig_tx_dma_setup.ev_count++;
442 return error;
443 case EINVAL:
444 adapter->einval_tx_dma_setup.ev_count++;
445 return error;
446 default:
447 adapter->other_tx_dma_setup.ev_count++;
448 return error;
449 }
450 }
451
452 /* Make certain there are enough descriptors */
453 if (map->dm_nsegs > txr->tx_avail - 2) {
454 txr->no_desc_avail.ev_count++;
455 ixgbe_dmamap_unload(txr->txtag, txbuf->map);
456 return EAGAIN;
457 }
458
459 /*
460 ** Set up the appropriate offload context
461 ** this will consume the first descriptor
462 */
463 error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
464 if (__predict_false(error)) {
465 return (error);
466 }
467
468 #ifdef IXGBE_FDIR
469 /* Do the flow director magic */
470 if ((txr->atr_sample) && (!adapter->fdir_reinit)) {
471 ++txr->atr_count;
472 if (txr->atr_count >= atr_sample_rate) {
473 ixgbe_atr(txr, m_head);
474 txr->atr_count = 0;
475 }
476 }
477 #endif
478
479 olinfo_status |= IXGBE_ADVTXD_CC;
480 i = txr->next_avail_desc;
481 for (j = 0; j < map->dm_nsegs; j++) {
482 bus_size_t seglen;
483 bus_addr_t segaddr;
484
485 txbuf = &txr->tx_buffers[i];
486 txd = &txr->tx_base[i];
487 seglen = map->dm_segs[j].ds_len;
488 segaddr = htole64(map->dm_segs[j].ds_addr);
489
490 txd->read.buffer_addr = segaddr;
491 txd->read.cmd_type_len = htole32(txr->txd_cmd |
492 cmd_type_len |seglen);
493 txd->read.olinfo_status = htole32(olinfo_status);
494
495 if (++i == txr->num_desc)
496 i = 0;
497 }
498
499 txd->read.cmd_type_len |=
500 htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
501 txr->tx_avail -= map->dm_nsegs;
502 txr->next_avail_desc = i;
503
504 txbuf->m_head = m_head;
505 /*
506 ** Here we swap the map so the last descriptor,
507 ** which gets the completion interrupt has the
508 ** real map, and the first descriptor gets the
509 ** unused map from this descriptor.
510 */
511 txr->tx_buffers[first].map = txbuf->map;
512 txbuf->map = map;
513 bus_dmamap_sync(txr->txtag->dt_dmat, map, 0, m_head->m_pkthdr.len,
514 BUS_DMASYNC_PREWRITE);
515
516 /* Set the EOP descriptor that will be marked done */
517 txbuf = &txr->tx_buffers[first];
518 txbuf->eop = txd;
519
520 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
521 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
522 /*
523 * Advance the Transmit Descriptor Tail (Tdt), this tells the
524 * hardware that this frame is available to transmit.
525 */
526 ++txr->total_packets.ev_count;
527 IXGBE_WRITE_REG(&adapter->hw, txr->tail, i);
528
529 /* Mark queue as having work */
530 if (txr->busy == 0)
531 txr->busy = 1;
532
533 return 0;
534 }
535
536 /*********************************************************************
537 *
538 * Allocate memory for tx_buffer structures. The tx_buffer stores all
539 * the information needed to transmit a packet on the wire. This is
540 * called only once at attach, setup is done every reset.
541 *
542 **********************************************************************/
543 int
544 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
545 {
546 struct adapter *adapter = txr->adapter;
547 device_t dev = adapter->dev;
548 struct ixgbe_tx_buf *txbuf;
549 int error, i;
550
551 /*
552 * Setup DMA descriptor areas.
553 */
554 if ((error = ixgbe_dma_tag_create(adapter->osdep.dmat, /* parent */
555 1, 0, /* alignment, bounds */
556 IXGBE_TSO_SIZE, /* maxsize */
557 adapter->num_segs, /* nsegments */
558 PAGE_SIZE, /* maxsegsize */
559 0, /* flags */
560 &txr->txtag))) {
561 aprint_error_dev(dev,"Unable to allocate TX DMA tag\n");
562 goto fail;
563 }
564
565 if (!(txr->tx_buffers =
566 (struct ixgbe_tx_buf *) malloc(sizeof(struct ixgbe_tx_buf) *
567 adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
568 aprint_error_dev(dev, "Unable to allocate tx_buffer memory\n");
569 error = ENOMEM;
570 goto fail;
571 }
572
573 /* Create the descriptor buffer dma maps */
574 txbuf = txr->tx_buffers;
575 for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
576 error = ixgbe_dmamap_create(txr->txtag, 0, &txbuf->map);
577 if (error != 0) {
578 aprint_error_dev(dev,
579 "Unable to create TX DMA map (%d)\n", error);
580 goto fail;
581 }
582 }
583
584 return 0;
585 fail:
586 /* We free all, it handles case where we are in the middle */
587 ixgbe_free_transmit_structures(adapter);
588 return (error);
589 }
590
591 /*********************************************************************
592 *
593 * Initialize a transmit ring.
594 *
595 **********************************************************************/
596 static void
597 ixgbe_setup_transmit_ring(struct tx_ring *txr)
598 {
599 struct adapter *adapter = txr->adapter;
600 struct ixgbe_tx_buf *txbuf;
601 int i;
602 #ifdef DEV_NETMAP
603 struct netmap_adapter *na = NA(adapter->ifp);
604 struct netmap_slot *slot;
605 #endif /* DEV_NETMAP */
606
607 /* Clear the old ring contents */
608 IXGBE_TX_LOCK(txr);
609 #ifdef DEV_NETMAP
610 /*
611 * (under lock): if in netmap mode, do some consistency
612 * checks and set slot to entry 0 of the netmap ring.
613 */
614 slot = netmap_reset(na, NR_TX, txr->me, 0);
615 #endif /* DEV_NETMAP */
616 bzero((void *)txr->tx_base,
617 (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
618 /* Reset indices */
619 txr->next_avail_desc = 0;
620 txr->next_to_clean = 0;
621
622 /* Free any existing tx buffers. */
623 txbuf = txr->tx_buffers;
624 for (i = 0; i < txr->num_desc; i++, txbuf++) {
625 if (txbuf->m_head != NULL) {
626 bus_dmamap_sync(txr->txtag->dt_dmat, txbuf->map,
627 0, txbuf->m_head->m_pkthdr.len,
628 BUS_DMASYNC_POSTWRITE);
629 ixgbe_dmamap_unload(txr->txtag, txbuf->map);
630 m_freem(txbuf->m_head);
631 txbuf->m_head = NULL;
632 }
633 #ifdef DEV_NETMAP
634 /*
635 * In netmap mode, set the map for the packet buffer.
636 * NOTE: Some drivers (not this one) also need to set
637 * the physical buffer address in the NIC ring.
638 * Slots in the netmap ring (indexed by "si") are
639 * kring->nkr_hwofs positions "ahead" wrt the
640 * corresponding slot in the NIC ring. In some drivers
641 * (not here) nkr_hwofs can be negative. Function
642 * netmap_idx_n2k() handles wraparounds properly.
643 */
644 if (slot) {
645 int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
646 netmap_load_map(na, txr->txtag, txbuf->map, NMB(na, slot + si));
647 }
648 #endif /* DEV_NETMAP */
649 /* Clear the EOP descriptor pointer */
650 txbuf->eop = NULL;
651 }
652
653 #ifdef IXGBE_FDIR
654 /* Set the rate at which we sample packets */
655 if (adapter->hw.mac.type != ixgbe_mac_82598EB)
656 txr->atr_sample = atr_sample_rate;
657 #endif
658
659 /* Set number of descriptors available */
660 txr->tx_avail = adapter->num_tx_desc;
661
662 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
663 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
664 IXGBE_TX_UNLOCK(txr);
665 }
666
667 /*********************************************************************
668 *
669 * Initialize all transmit rings.
670 *
671 **********************************************************************/
672 int
673 ixgbe_setup_transmit_structures(struct adapter *adapter)
674 {
675 struct tx_ring *txr = adapter->tx_rings;
676
677 for (int i = 0; i < adapter->num_queues; i++, txr++)
678 ixgbe_setup_transmit_ring(txr);
679
680 return (0);
681 }
682
683 /*********************************************************************
684 *
685 * Free all transmit rings.
686 *
687 **********************************************************************/
688 void
689 ixgbe_free_transmit_structures(struct adapter *adapter)
690 {
691 struct tx_ring *txr = adapter->tx_rings;
692
693 for (int i = 0; i < adapter->num_queues; i++, txr++) {
694 ixgbe_free_transmit_buffers(txr);
695 ixgbe_dma_free(adapter, &txr->txdma);
696 IXGBE_TX_LOCK_DESTROY(txr);
697 }
698 free(adapter->tx_rings, M_DEVBUF);
699 }
700
701 /*********************************************************************
702 *
703 * Free transmit ring related data structures.
704 *
705 **********************************************************************/
706 static void
707 ixgbe_free_transmit_buffers(struct tx_ring *txr)
708 {
709 struct adapter *adapter = txr->adapter;
710 struct ixgbe_tx_buf *tx_buffer;
711 int i;
712
713 INIT_DEBUGOUT("ixgbe_free_transmit_ring: begin");
714
715 if (txr->tx_buffers == NULL)
716 return;
717
718 tx_buffer = txr->tx_buffers;
719 for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
720 if (tx_buffer->m_head != NULL) {
721 bus_dmamap_sync(txr->txtag->dt_dmat, tx_buffer->map,
722 0, tx_buffer->m_head->m_pkthdr.len,
723 BUS_DMASYNC_POSTWRITE);
724 ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
725 m_freem(tx_buffer->m_head);
726 tx_buffer->m_head = NULL;
727 if (tx_buffer->map != NULL) {
728 ixgbe_dmamap_destroy(txr->txtag,
729 tx_buffer->map);
730 tx_buffer->map = NULL;
731 }
732 } else if (tx_buffer->map != NULL) {
733 ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
734 ixgbe_dmamap_destroy(txr->txtag, tx_buffer->map);
735 tx_buffer->map = NULL;
736 }
737 }
738 #ifndef IXGBE_LEGACY_TX
739 if (txr->br != NULL)
740 buf_ring_free(txr->br, M_DEVBUF);
741 #endif
742 if (txr->tx_buffers != NULL) {
743 free(txr->tx_buffers, M_DEVBUF);
744 txr->tx_buffers = NULL;
745 }
746 if (txr->txtag != NULL) {
747 ixgbe_dma_tag_destroy(txr->txtag);
748 txr->txtag = NULL;
749 }
750 return;
751 }
752
753 /*********************************************************************
754 *
755 * Advanced Context Descriptor setup for VLAN, CSUM or TSO
756 *
757 **********************************************************************/
758
759 static int
760 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
761 u32 *cmd_type_len, u32 *olinfo_status)
762 {
763 struct m_tag *mtag;
764 struct adapter *adapter = txr->adapter;
765 struct ethercom *ec = &adapter->osdep.ec;
766 struct ixgbe_adv_tx_context_desc *TXD;
767 struct ether_vlan_header *eh;
768 struct ip ip;
769 struct ip6_hdr ip6;
770 u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
771 int ehdrlen, ip_hlen = 0;
772 u16 etype;
773 u8 ipproto __diagused = 0;
774 int offload = TRUE;
775 int ctxd = txr->next_avail_desc;
776 u16 vtag = 0;
777
778 /* First check if TSO is to be used */
779 if (mp->m_pkthdr.csum_flags & (M_CSUM_TSOv4|M_CSUM_TSOv6))
780 return (ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status));
781
782 if ((mp->m_pkthdr.csum_flags & M_CSUM_OFFLOAD) == 0)
783 offload = FALSE;
784
785 /* Indicate the whole packet as payload when not doing TSO */
786 *olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
787
788 /* Now ready a context descriptor */
789 TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
790
791 /*
792 ** In advanced descriptors the vlan tag must
793 ** be placed into the context descriptor. Hence
794 ** we need to make one even if not doing offloads.
795 */
796 if ((mtag = VLAN_OUTPUT_TAG(ec, mp)) != NULL) {
797 vtag = htole16(VLAN_TAG_VALUE(mtag) & 0xffff);
798 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
799 }
800
801 /*
802 * Determine where frame payload starts.
803 * Jump over vlan headers if already present,
804 * helpful for QinQ too.
805 */
806 KASSERT(mp->m_len >= offsetof(struct ether_vlan_header, evl_tag));
807 eh = mtod(mp, struct ether_vlan_header *);
808 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
809 KASSERT(mp->m_len >= sizeof(struct ether_vlan_header));
810 etype = ntohs(eh->evl_proto);
811 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
812 } else {
813 etype = ntohs(eh->evl_encap_proto);
814 ehdrlen = ETHER_HDR_LEN;
815 }
816
817 /* Set the ether header length */
818 vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
819
820 if (offload == FALSE)
821 goto no_offloads;
822
823 switch (etype) {
824 case ETHERTYPE_IP:
825 m_copydata(mp, ehdrlen, sizeof(ip), &ip);
826 ip_hlen = ip.ip_hl << 2;
827 ipproto = ip.ip_p;
828 #if 0
829 ip.ip_sum = 0;
830 m_copyback(mp, ehdrlen, sizeof(ip), &ip);
831 #else
832 KASSERT((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) == 0 ||
833 ip.ip_sum == 0);
834 #endif
835 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
836 break;
837 case ETHERTYPE_IPV6:
838 m_copydata(mp, ehdrlen, sizeof(ip6), &ip6);
839 ip_hlen = sizeof(ip6);
840 /* XXX-BZ this will go badly in case of ext hdrs. */
841 ipproto = ip6.ip6_nxt;
842 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
843 break;
844 default:
845 break;
846 }
847
848 if ((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) != 0)
849 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
850
851 vlan_macip_lens |= ip_hlen;
852
853 if (mp->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_TCPv6)) {
854 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
855 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
856 KASSERT(ipproto == IPPROTO_TCP);
857 } else if (mp->m_pkthdr.csum_flags & (M_CSUM_UDPv4|M_CSUM_UDPv6)) {
858 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
859 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
860 KASSERT(ipproto == IPPROTO_UDP);
861 }
862
863 no_offloads:
864 type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
865
866 /* Now copy bits into descriptor */
867 TXD->vlan_macip_lens = htole32(vlan_macip_lens);
868 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
869 TXD->seqnum_seed = htole32(0);
870 TXD->mss_l4len_idx = htole32(0);
871
872 /* We've consumed the first desc, adjust counters */
873 if (++ctxd == txr->num_desc)
874 ctxd = 0;
875 txr->next_avail_desc = ctxd;
876 --txr->tx_avail;
877
878 return 0;
879 }
880
881 /**********************************************************************
882 *
883 * Setup work for hardware segmentation offload (TSO) on
884 * adapters using advanced tx descriptors
885 *
886 **********************************************************************/
887 static int
888 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp,
889 u32 *cmd_type_len, u32 *olinfo_status)
890 {
891 struct m_tag *mtag;
892 struct adapter *adapter = txr->adapter;
893 struct ethercom *ec = &adapter->osdep.ec;
894 struct ixgbe_adv_tx_context_desc *TXD;
895 u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
896 u32 mss_l4len_idx = 0, paylen;
897 u16 vtag = 0, eh_type;
898 int ctxd, ehdrlen, ip_hlen, tcp_hlen;
899 struct ether_vlan_header *eh;
900 #ifdef INET6
901 struct ip6_hdr *ip6;
902 #endif
903 #ifdef INET
904 struct ip *ip;
905 #endif
906 struct tcphdr *th;
907
908
909 /*
910 * Determine where frame payload starts.
911 * Jump over vlan headers if already present
912 */
913 eh = mtod(mp, struct ether_vlan_header *);
914 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
915 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
916 eh_type = eh->evl_proto;
917 } else {
918 ehdrlen = ETHER_HDR_LEN;
919 eh_type = eh->evl_encap_proto;
920 }
921
922 switch (ntohs(eh_type)) {
923 #ifdef INET6
924 case ETHERTYPE_IPV6:
925 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
926 /* XXX-BZ For now we do not pretend to support ext. hdrs. */
927 if (ip6->ip6_nxt != IPPROTO_TCP)
928 return (ENXIO);
929 ip_hlen = sizeof(struct ip6_hdr);
930 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
931 th = (struct tcphdr *)((char *)ip6 + ip_hlen);
932 th->th_sum = in6_cksum_phdr(&ip6->ip6_src,
933 &ip6->ip6_dst, 0, htonl(IPPROTO_TCP));
934 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
935 break;
936 #endif
937 #ifdef INET
938 case ETHERTYPE_IP:
939 ip = (struct ip *)(mp->m_data + ehdrlen);
940 if (ip->ip_p != IPPROTO_TCP)
941 return (ENXIO);
942 ip->ip_sum = 0;
943 ip_hlen = ip->ip_hl << 2;
944 th = (struct tcphdr *)((char *)ip + ip_hlen);
945 th->th_sum = in_cksum_phdr(ip->ip_src.s_addr,
946 ip->ip_dst.s_addr, htons(IPPROTO_TCP));
947 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
948 /* Tell transmit desc to also do IPv4 checksum. */
949 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
950 break;
951 #endif
952 default:
953 panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
954 __func__, ntohs(eh_type));
955 break;
956 }
957
958 ctxd = txr->next_avail_desc;
959 TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
960
961 tcp_hlen = th->th_off << 2;
962
963 /* This is used in the transmit desc in encap */
964 paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
965
966 /* VLAN MACLEN IPLEN */
967 if ((mtag = VLAN_OUTPUT_TAG(ec, mp)) != NULL) {
968 vtag = htole16(VLAN_TAG_VALUE(mtag) & 0xffff);
969 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
970 }
971
972 vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
973 vlan_macip_lens |= ip_hlen;
974 TXD->vlan_macip_lens = htole32(vlan_macip_lens);
975
976 /* ADV DTYPE TUCMD */
977 type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
978 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
979 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
980
981 /* MSS L4LEN IDX */
982 mss_l4len_idx |= (mp->m_pkthdr.segsz << IXGBE_ADVTXD_MSS_SHIFT);
983 mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
984 TXD->mss_l4len_idx = htole32(mss_l4len_idx);
985
986 TXD->seqnum_seed = htole32(0);
987
988 if (++ctxd == txr->num_desc)
989 ctxd = 0;
990
991 txr->tx_avail--;
992 txr->next_avail_desc = ctxd;
993 *cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
994 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
995 *olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
996 ++txr->tso_tx.ev_count;
997 return (0);
998 }
999
1000
1001 /**********************************************************************
1002 *
1003 * Examine each tx_buffer in the used queue. If the hardware is done
1004 * processing the packet then free associated resources. The
1005 * tx_buffer is put back on the free queue.
1006 *
1007 **********************************************************************/
1008 void
1009 ixgbe_txeof(struct tx_ring *txr)
1010 {
1011 struct adapter *adapter = txr->adapter;
1012 struct ifnet *ifp = adapter->ifp;
1013 u32 work, processed = 0;
1014 u16 limit = txr->process_limit;
1015 struct ixgbe_tx_buf *buf;
1016 union ixgbe_adv_tx_desc *txd;
1017
1018 KASSERT(mutex_owned(&txr->tx_mtx));
1019
1020 #ifdef DEV_NETMAP
1021 if (ifp->if_capenable & IFCAP_NETMAP) {
1022 struct netmap_adapter *na = NA(ifp);
1023 struct netmap_kring *kring = &na->tx_rings[txr->me];
1024 txd = txr->tx_base;
1025 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1026 BUS_DMASYNC_POSTREAD);
1027 /*
1028 * In netmap mode, all the work is done in the context
1029 * of the client thread. Interrupt handlers only wake up
1030 * clients, which may be sleeping on individual rings
1031 * or on a global resource for all rings.
1032 * To implement tx interrupt mitigation, we wake up the client
1033 * thread roughly every half ring, even if the NIC interrupts
1034 * more frequently. This is implemented as follows:
1035 * - ixgbe_txsync() sets kring->nr_kflags with the index of
1036 * the slot that should wake up the thread (nkr_num_slots
1037 * means the user thread should not be woken up);
1038 * - the driver ignores tx interrupts unless netmap_mitigate=0
1039 * or the slot has the DD bit set.
1040 */
1041 if (!netmap_mitigate ||
1042 (kring->nr_kflags < kring->nkr_num_slots &&
1043 txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD)) {
1044 netmap_tx_irq(ifp, txr->me);
1045 }
1046 return;
1047 }
1048 #endif /* DEV_NETMAP */
1049
1050 if (txr->tx_avail == txr->num_desc) {
1051 txr->busy = 0;
1052 return;
1053 }
1054
1055 /* Get work starting point */
1056 work = txr->next_to_clean;
1057 buf = &txr->tx_buffers[work];
1058 txd = &txr->tx_base[work];
1059 work -= txr->num_desc; /* The distance to ring end */
1060 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1061 BUS_DMASYNC_POSTREAD);
1062 do {
1063 union ixgbe_adv_tx_desc *eop= buf->eop;
1064 if (eop == NULL) /* No work */
1065 break;
1066
1067 if ((eop->wb.status & IXGBE_TXD_STAT_DD) == 0)
1068 break; /* I/O not complete */
1069
1070 if (buf->m_head) {
1071 txr->bytes +=
1072 buf->m_head->m_pkthdr.len;
1073 bus_dmamap_sync(txr->txtag->dt_dmat,
1074 buf->map,
1075 0, buf->m_head->m_pkthdr.len,
1076 BUS_DMASYNC_POSTWRITE);
1077 ixgbe_dmamap_unload(txr->txtag,
1078 buf->map);
1079 m_freem(buf->m_head);
1080 buf->m_head = NULL;
1081 }
1082 buf->eop = NULL;
1083 ++txr->tx_avail;
1084
1085 /* We clean the range if multi segment */
1086 while (txd != eop) {
1087 ++txd;
1088 ++buf;
1089 ++work;
1090 /* wrap the ring? */
1091 if (__predict_false(!work)) {
1092 work -= txr->num_desc;
1093 buf = txr->tx_buffers;
1094 txd = txr->tx_base;
1095 }
1096 if (buf->m_head) {
1097 txr->bytes +=
1098 buf->m_head->m_pkthdr.len;
1099 bus_dmamap_sync(txr->txtag->dt_dmat,
1100 buf->map,
1101 0, buf->m_head->m_pkthdr.len,
1102 BUS_DMASYNC_POSTWRITE);
1103 ixgbe_dmamap_unload(txr->txtag,
1104 buf->map);
1105 m_freem(buf->m_head);
1106 buf->m_head = NULL;
1107 }
1108 ++txr->tx_avail;
1109 buf->eop = NULL;
1110
1111 }
1112 ++txr->packets;
1113 ++processed;
1114 ++ifp->if_opackets;
1115
1116 /* Try the next packet */
1117 ++txd;
1118 ++buf;
1119 ++work;
1120 /* reset with a wrap */
1121 if (__predict_false(!work)) {
1122 work -= txr->num_desc;
1123 buf = txr->tx_buffers;
1124 txd = txr->tx_base;
1125 }
1126 prefetch(txd);
1127 } while (__predict_true(--limit));
1128
1129 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1130 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1131
1132 work += txr->num_desc;
1133 txr->next_to_clean = work;
1134
1135 /*
1136 ** Queue Hang detection, we know there's
1137 ** work outstanding or the first return
1138 ** would have been taken, so increment busy
1139 ** if nothing managed to get cleaned, then
1140 ** in local_timer it will be checked and
1141 ** marked as HUNG if it exceeds a MAX attempt.
1142 */
1143 if ((processed == 0) && (txr->busy != IXGBE_QUEUE_HUNG))
1144 ++txr->busy;
1145 /*
1146 ** If anything gets cleaned we reset state to 1,
1147 ** note this will turn off HUNG if its set.
1148 */
1149 if (processed)
1150 txr->busy = 1;
1151
1152 if (txr->tx_avail == txr->num_desc)
1153 txr->busy = 0;
1154
1155 return;
1156 }
1157
1158
1159 #ifdef IXGBE_FDIR
1160 /*
1161 ** This routine parses packet headers so that Flow
1162 ** Director can make a hashed filter table entry
1163 ** allowing traffic flows to be identified and kept
1164 ** on the same cpu. This would be a performance
1165 ** hit, but we only do it at IXGBE_FDIR_RATE of
1166 ** packets.
1167 */
1168 static void
1169 ixgbe_atr(struct tx_ring *txr, struct mbuf *mp)
1170 {
1171 struct adapter *adapter = txr->adapter;
1172 struct ix_queue *que;
1173 struct ip *ip;
1174 struct tcphdr *th;
1175 struct udphdr *uh;
1176 struct ether_vlan_header *eh;
1177 union ixgbe_atr_hash_dword input = {.dword = 0};
1178 union ixgbe_atr_hash_dword common = {.dword = 0};
1179 int ehdrlen, ip_hlen;
1180 u16 etype;
1181
1182 eh = mtod(mp, struct ether_vlan_header *);
1183 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
1184 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
1185 etype = eh->evl_proto;
1186 } else {
1187 ehdrlen = ETHER_HDR_LEN;
1188 etype = eh->evl_encap_proto;
1189 }
1190
1191 /* Only handling IPv4 */
1192 if (etype != htons(ETHERTYPE_IP))
1193 return;
1194
1195 ip = (struct ip *)(mp->m_data + ehdrlen);
1196 ip_hlen = ip->ip_hl << 2;
1197
1198 /* check if we're UDP or TCP */
1199 switch (ip->ip_p) {
1200 case IPPROTO_TCP:
1201 th = (struct tcphdr *)((char *)ip + ip_hlen);
1202 /* src and dst are inverted */
1203 common.port.dst ^= th->th_sport;
1204 common.port.src ^= th->th_dport;
1205 input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_TCPV4;
1206 break;
1207 case IPPROTO_UDP:
1208 uh = (struct udphdr *)((char *)ip + ip_hlen);
1209 /* src and dst are inverted */
1210 common.port.dst ^= uh->uh_sport;
1211 common.port.src ^= uh->uh_dport;
1212 input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_UDPV4;
1213 break;
1214 default:
1215 return;
1216 }
1217
1218 input.formatted.vlan_id = htobe16(mp->m_pkthdr.ether_vtag);
1219 if (mp->m_pkthdr.ether_vtag)
1220 common.flex_bytes ^= htons(ETHERTYPE_VLAN);
1221 else
1222 common.flex_bytes ^= etype;
1223 common.ip ^= ip->ip_src.s_addr ^ ip->ip_dst.s_addr;
1224
1225 que = &adapter->queues[txr->me];
1226 /*
1227 ** This assumes the Rx queue and Tx
1228 ** queue are bound to the same CPU
1229 */
1230 ixgbe_fdir_add_signature_filter_82599(&adapter->hw,
1231 input, common, que->msix);
1232 }
1233 #endif /* IXGBE_FDIR */
1234
1235 /*
1236 ** Used to detect a descriptor that has
1237 ** been merged by Hardware RSC.
1238 */
1239 static inline u32
1240 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1241 {
1242 return (le32toh(rx->wb.lower.lo_dword.data) &
1243 IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1244 }
1245
1246 /*********************************************************************
1247 *
1248 * Initialize Hardware RSC (LRO) feature on 82599
1249 * for an RX ring, this is toggled by the LRO capability
1250 * even though it is transparent to the stack.
1251 *
1252 * NOTE: since this HW feature only works with IPV4 and
1253 * our testing has shown soft LRO to be as effective
1254 * I have decided to disable this by default.
1255 *
1256 **********************************************************************/
1257 static void
1258 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
1259 {
1260 struct adapter *adapter = rxr->adapter;
1261 struct ixgbe_hw *hw = &adapter->hw;
1262 u32 rscctrl, rdrxctl;
1263
1264 /* If turning LRO/RSC off we need to disable it */
1265 if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) {
1266 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1267 rscctrl &= ~IXGBE_RSCCTL_RSCEN;
1268 return;
1269 }
1270
1271 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
1272 rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
1273 #ifdef DEV_NETMAP /* crcstrip is optional in netmap */
1274 extern int ix_crcstrip;
1275 if (adapter->ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip)
1276 #endif /* DEV_NETMAP */
1277 rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
1278 rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
1279 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
1280
1281 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1282 rscctrl |= IXGBE_RSCCTL_RSCEN;
1283 /*
1284 ** Limit the total number of descriptors that
1285 ** can be combined, so it does not exceed 64K
1286 */
1287 if (rxr->mbuf_sz == MCLBYTES)
1288 rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
1289 else if (rxr->mbuf_sz == MJUMPAGESIZE)
1290 rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
1291 else if (rxr->mbuf_sz == MJUM9BYTES)
1292 rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
1293 else /* Using 16K cluster */
1294 rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
1295
1296 IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
1297
1298 /* Enable TCP header recognition */
1299 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
1300 (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) |
1301 IXGBE_PSRTYPE_TCPHDR));
1302
1303 /* Disable RSC for ACK packets */
1304 IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
1305 (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
1306
1307 rxr->hw_rsc = TRUE;
1308 }
1309 /*********************************************************************
1310 *
1311 * Refresh mbuf buffers for RX descriptor rings
1312 * - now keeps its own state so discards due to resource
1313 * exhaustion are unnecessary, if an mbuf cannot be obtained
1314 * it just returns, keeping its placeholder, thus it can simply
1315 * be recalled to try again.
1316 *
1317 **********************************************************************/
1318 static void
1319 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
1320 {
1321 struct adapter *adapter = rxr->adapter;
1322 struct ixgbe_rx_buf *rxbuf;
1323 struct mbuf *mp;
1324 int i, j, error;
1325 bool refreshed = false;
1326
1327 i = j = rxr->next_to_refresh;
1328 /* Control the loop with one beyond */
1329 if (++j == rxr->num_desc)
1330 j = 0;
1331
1332 while (j != limit) {
1333 rxbuf = &rxr->rx_buffers[i];
1334 if (rxbuf->buf == NULL) {
1335 mp = ixgbe_getjcl(&adapter->jcl_head, M_NOWAIT,
1336 MT_DATA, M_PKTHDR, rxr->mbuf_sz);
1337 if (mp == NULL) {
1338 rxr->no_jmbuf.ev_count++;
1339 goto update;
1340 }
1341 if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
1342 m_adj(mp, ETHER_ALIGN);
1343 } else
1344 mp = rxbuf->buf;
1345
1346 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1347
1348 /* If we're dealing with an mbuf that was copied rather
1349 * than replaced, there's no need to go through busdma.
1350 */
1351 if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
1352 /* Get the memory mapping */
1353 error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat,
1354 rxbuf->pmap, mp, BUS_DMA_NOWAIT);
1355 if (error != 0) {
1356 printf("Refresh mbufs: payload dmamap load"
1357 " failure - %d\n", error);
1358 m_free(mp);
1359 rxbuf->buf = NULL;
1360 goto update;
1361 }
1362 rxbuf->buf = mp;
1363 bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
1364 0, mp->m_pkthdr.len, BUS_DMASYNC_PREREAD);
1365 rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
1366 htole64(rxbuf->pmap->dm_segs[0].ds_addr);
1367 } else {
1368 rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
1369 rxbuf->flags &= ~IXGBE_RX_COPY;
1370 }
1371
1372 refreshed = true;
1373 /* Next is precalculated */
1374 i = j;
1375 rxr->next_to_refresh = i;
1376 if (++j == rxr->num_desc)
1377 j = 0;
1378 }
1379 update:
1380 if (refreshed) /* Update hardware tail index */
1381 IXGBE_WRITE_REG(&adapter->hw,
1382 rxr->tail, rxr->next_to_refresh);
1383 return;
1384 }
1385
1386 /*********************************************************************
1387 *
1388 * Allocate memory for rx_buffer structures. Since we use one
1389 * rx_buffer per received packet, the maximum number of rx_buffer's
1390 * that we'll need is equal to the number of receive descriptors
1391 * that we've allocated.
1392 *
1393 **********************************************************************/
1394 int
1395 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
1396 {
1397 struct adapter *adapter = rxr->adapter;
1398 device_t dev = adapter->dev;
1399 struct ixgbe_rx_buf *rxbuf;
1400 int i, bsize, error;
1401
1402 bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
1403 if (!(rxr->rx_buffers =
1404 (struct ixgbe_rx_buf *) malloc(bsize,
1405 M_DEVBUF, M_NOWAIT | M_ZERO))) {
1406 aprint_error_dev(dev, "Unable to allocate rx_buffer memory\n");
1407 error = ENOMEM;
1408 goto fail;
1409 }
1410
1411 if ((error = ixgbe_dma_tag_create(adapter->osdep.dmat, /* parent */
1412 1, 0, /* alignment, bounds */
1413 MJUM16BYTES, /* maxsize */
1414 1, /* nsegments */
1415 MJUM16BYTES, /* maxsegsize */
1416 0, /* flags */
1417 &rxr->ptag))) {
1418 aprint_error_dev(dev, "Unable to create RX DMA tag\n");
1419 goto fail;
1420 }
1421
1422 for (i = 0; i < rxr->num_desc; i++, rxbuf++) {
1423 rxbuf = &rxr->rx_buffers[i];
1424 error = ixgbe_dmamap_create(rxr->ptag,
1425 BUS_DMA_NOWAIT, &rxbuf->pmap);
1426 if (error) {
1427 aprint_error_dev(dev, "Unable to create RX dma map\n");
1428 goto fail;
1429 }
1430 }
1431
1432 return (0);
1433
1434 fail:
1435 /* Frees all, but can handle partial completion */
1436 ixgbe_free_receive_structures(adapter);
1437 return (error);
1438 }
1439
1440
1441 static void
1442 ixgbe_free_receive_ring(struct rx_ring *rxr)
1443 {
1444 struct ixgbe_rx_buf *rxbuf;
1445 int i;
1446
1447 for (i = 0; i < rxr->num_desc; i++) {
1448 rxbuf = &rxr->rx_buffers[i];
1449 if (rxbuf->buf != NULL) {
1450 bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
1451 0, rxbuf->buf->m_pkthdr.len,
1452 BUS_DMASYNC_POSTREAD);
1453 ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
1454 rxbuf->buf->m_flags |= M_PKTHDR;
1455 m_freem(rxbuf->buf);
1456 rxbuf->buf = NULL;
1457 rxbuf->flags = 0;
1458 }
1459 }
1460 }
1461
1462
1463 /*********************************************************************
1464 *
1465 * Initialize a receive ring and its buffers.
1466 *
1467 **********************************************************************/
1468 static int
1469 ixgbe_setup_receive_ring(struct rx_ring *rxr)
1470 {
1471 struct adapter *adapter;
1472 struct ixgbe_rx_buf *rxbuf;
1473 #ifdef LRO
1474 struct ifnet *ifp;
1475 struct lro_ctrl *lro = &rxr->lro;
1476 #endif /* LRO */
1477 int rsize, error = 0;
1478 #ifdef DEV_NETMAP
1479 struct netmap_adapter *na = NA(rxr->adapter->ifp);
1480 struct netmap_slot *slot;
1481 #endif /* DEV_NETMAP */
1482
1483 adapter = rxr->adapter;
1484 #ifdef LRO
1485 ifp = adapter->ifp;
1486 #endif /* LRO */
1487
1488 /* Clear the ring contents */
1489 IXGBE_RX_LOCK(rxr);
1490 #ifdef DEV_NETMAP
1491 /* same as in ixgbe_setup_transmit_ring() */
1492 slot = netmap_reset(na, NR_RX, rxr->me, 0);
1493 #endif /* DEV_NETMAP */
1494 rsize = roundup2(adapter->num_rx_desc *
1495 sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
1496 bzero((void *)rxr->rx_base, rsize);
1497 /* Cache the size */
1498 rxr->mbuf_sz = adapter->rx_mbuf_sz;
1499
1500 /* Free current RX buffer structs and their mbufs */
1501 ixgbe_free_receive_ring(rxr);
1502
1503 IXGBE_RX_UNLOCK(rxr);
1504
1505 /* Now reinitialize our supply of jumbo mbufs. The number
1506 * or size of jumbo mbufs may have changed.
1507 */
1508 ixgbe_jcl_reinit(&adapter->jcl_head, rxr->ptag->dt_dmat,
1509 2 * adapter->num_rx_desc, adapter->rx_mbuf_sz);
1510
1511 IXGBE_RX_LOCK(rxr);
1512
1513 /* Now replenish the mbufs */
1514 for (int j = 0; j != rxr->num_desc; ++j) {
1515 struct mbuf *mp;
1516
1517 rxbuf = &rxr->rx_buffers[j];
1518 #ifdef DEV_NETMAP
1519 /*
1520 * In netmap mode, fill the map and set the buffer
1521 * address in the NIC ring, considering the offset
1522 * between the netmap and NIC rings (see comment in
1523 * ixgbe_setup_transmit_ring() ). No need to allocate
1524 * an mbuf, so end the block with a continue;
1525 */
1526 if (slot) {
1527 int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
1528 uint64_t paddr;
1529 void *addr;
1530
1531 addr = PNMB(na, slot + sj, &paddr);
1532 netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
1533 /* Update descriptor and the cached value */
1534 rxr->rx_base[j].read.pkt_addr = htole64(paddr);
1535 rxbuf->addr = htole64(paddr);
1536 continue;
1537 }
1538 #endif /* DEV_NETMAP */
1539 rxbuf->flags = 0;
1540 rxbuf->buf = ixgbe_getjcl(&adapter->jcl_head, M_NOWAIT,
1541 MT_DATA, M_PKTHDR, adapter->rx_mbuf_sz);
1542 if (rxbuf->buf == NULL) {
1543 error = ENOBUFS;
1544 goto fail;
1545 }
1546 mp = rxbuf->buf;
1547 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1548 /* Get the memory mapping */
1549 error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat,
1550 rxbuf->pmap, mp, BUS_DMA_NOWAIT);
1551 if (error != 0)
1552 goto fail;
1553 bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
1554 0, adapter->rx_mbuf_sz, BUS_DMASYNC_PREREAD);
1555 /* Update the descriptor and the cached value */
1556 rxr->rx_base[j].read.pkt_addr =
1557 htole64(rxbuf->pmap->dm_segs[0].ds_addr);
1558 rxbuf->addr = htole64(rxbuf->pmap->dm_segs[0].ds_addr);
1559 }
1560
1561
1562 /* Setup our descriptor indices */
1563 rxr->next_to_check = 0;
1564 rxr->next_to_refresh = 0;
1565 rxr->lro_enabled = FALSE;
1566 rxr->rx_copies.ev_count = 0;
1567 rxr->rx_bytes.ev_count = 0;
1568 rxr->vtag_strip = FALSE;
1569
1570 ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1571 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1572
1573 /*
1574 ** Now set up the LRO interface:
1575 */
1576 if (ixgbe_rsc_enable)
1577 ixgbe_setup_hw_rsc(rxr);
1578 #ifdef LRO
1579 else if (ifp->if_capenable & IFCAP_LRO) {
1580 device_t dev = adapter->dev;
1581 int err = tcp_lro_init(lro);
1582 if (err) {
1583 device_printf(dev, "LRO Initialization failed!\n");
1584 goto fail;
1585 }
1586 INIT_DEBUGOUT("RX Soft LRO Initialized\n");
1587 rxr->lro_enabled = TRUE;
1588 lro->ifp = adapter->ifp;
1589 }
1590 #endif /* LRO */
1591
1592 IXGBE_RX_UNLOCK(rxr);
1593 return (0);
1594
1595 fail:
1596 ixgbe_free_receive_ring(rxr);
1597 IXGBE_RX_UNLOCK(rxr);
1598 return (error);
1599 }
1600
1601 /*********************************************************************
1602 *
1603 * Initialize all receive rings.
1604 *
1605 **********************************************************************/
1606 int
1607 ixgbe_setup_receive_structures(struct adapter *adapter)
1608 {
1609 struct rx_ring *rxr = adapter->rx_rings;
1610 int j;
1611
1612 for (j = 0; j < adapter->num_queues; j++, rxr++)
1613 if (ixgbe_setup_receive_ring(rxr))
1614 goto fail;
1615
1616 return (0);
1617 fail:
1618 /*
1619 * Free RX buffers allocated so far, we will only handle
1620 * the rings that completed, the failing case will have
1621 * cleaned up for itself. 'j' failed, so its the terminus.
1622 */
1623 for (int i = 0; i < j; ++i) {
1624 rxr = &adapter->rx_rings[i];
1625 ixgbe_free_receive_ring(rxr);
1626 }
1627
1628 return (ENOBUFS);
1629 }
1630
1631
1632 /*********************************************************************
1633 *
1634 * Free all receive rings.
1635 *
1636 **********************************************************************/
1637 void
1638 ixgbe_free_receive_structures(struct adapter *adapter)
1639 {
1640 struct rx_ring *rxr = adapter->rx_rings;
1641
1642 INIT_DEBUGOUT("ixgbe_free_receive_structures: begin");
1643
1644 for (int i = 0; i < adapter->num_queues; i++, rxr++) {
1645 #ifdef LRO
1646 struct lro_ctrl *lro = &rxr->lro;
1647 #endif /* LRO */
1648 ixgbe_free_receive_buffers(rxr);
1649 #ifdef LRO
1650 /* Free LRO memory */
1651 tcp_lro_free(lro);
1652 #endif /* LRO */
1653 /* Free the ring memory as well */
1654 ixgbe_dma_free(adapter, &rxr->rxdma);
1655 IXGBE_RX_LOCK_DESTROY(rxr);
1656 }
1657
1658 free(adapter->rx_rings, M_DEVBUF);
1659 }
1660
1661
1662 /*********************************************************************
1663 *
1664 * Free receive ring data structures
1665 *
1666 **********************************************************************/
1667 static void
1668 ixgbe_free_receive_buffers(struct rx_ring *rxr)
1669 {
1670 struct adapter *adapter = rxr->adapter;
1671 struct ixgbe_rx_buf *rxbuf;
1672
1673 INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin");
1674
1675 /* Cleanup any existing buffers */
1676 if (rxr->rx_buffers != NULL) {
1677 for (int i = 0; i < adapter->num_rx_desc; i++) {
1678 rxbuf = &rxr->rx_buffers[i];
1679 if (rxbuf->buf != NULL) {
1680 bus_dmamap_sync(rxr->ptag->dt_dmat,
1681 rxbuf->pmap, 0, rxbuf->buf->m_pkthdr.len,
1682 BUS_DMASYNC_POSTREAD);
1683 ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
1684 rxbuf->buf->m_flags |= M_PKTHDR;
1685 m_freem(rxbuf->buf);
1686 }
1687 rxbuf->buf = NULL;
1688 if (rxbuf->pmap != NULL) {
1689 ixgbe_dmamap_destroy(rxr->ptag, rxbuf->pmap);
1690 rxbuf->pmap = NULL;
1691 }
1692 }
1693 if (rxr->rx_buffers != NULL) {
1694 free(rxr->rx_buffers, M_DEVBUF);
1695 rxr->rx_buffers = NULL;
1696 }
1697 }
1698
1699 if (rxr->ptag != NULL) {
1700 ixgbe_dma_tag_destroy(rxr->ptag);
1701 rxr->ptag = NULL;
1702 }
1703
1704 return;
1705 }
1706
1707 static __inline void
1708 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
1709 {
1710 int s;
1711
1712 #ifdef LRO
1713 struct adapter *adapter = ifp->if_softc;
1714 struct ethercom *ec = &adapter->osdep.ec;
1715
1716 /*
1717 * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
1718 * should be computed by hardware. Also it should not have VLAN tag in
1719 * ethernet header. In case of IPv6 we do not yet support ext. hdrs.
1720 */
1721 if (rxr->lro_enabled &&
1722 (ec->ec_capenable & ETHERCAP_VLAN_HWTAGGING) != 0 &&
1723 (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
1724 ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1725 (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
1726 (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1727 (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
1728 (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
1729 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
1730 /*
1731 * Send to the stack if:
1732 ** - LRO not enabled, or
1733 ** - no LRO resources, or
1734 ** - lro enqueue fails
1735 */
1736 if (rxr->lro.lro_cnt != 0)
1737 if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
1738 return;
1739 }
1740 #endif /* LRO */
1741
1742 IXGBE_RX_UNLOCK(rxr);
1743
1744 s = splnet();
1745 /* Pass this up to any BPF listeners. */
1746 bpf_mtap(ifp, m);
1747 if_input(ifp, m);
1748 splx(s);
1749
1750 IXGBE_RX_LOCK(rxr);
1751 }
1752
1753 static __inline void
1754 ixgbe_rx_discard(struct rx_ring *rxr, int i)
1755 {
1756 struct ixgbe_rx_buf *rbuf;
1757
1758 rbuf = &rxr->rx_buffers[i];
1759
1760
1761 /*
1762 ** With advanced descriptors the writeback
1763 ** clobbers the buffer addrs, so its easier
1764 ** to just free the existing mbufs and take
1765 ** the normal refresh path to get new buffers
1766 ** and mapping.
1767 */
1768
1769 if (rbuf->buf != NULL) {/* Partial chain ? */
1770 rbuf->fmp->m_flags |= M_PKTHDR;
1771 m_freem(rbuf->fmp);
1772 rbuf->fmp = NULL;
1773 rbuf->buf = NULL; /* rbuf->buf is part of fmp's chain */
1774 } else if (rbuf->buf) {
1775 m_free(rbuf->buf);
1776 rbuf->buf = NULL;
1777 }
1778
1779 rbuf->flags = 0;
1780
1781 return;
1782 }
1783
1784
1785 /*********************************************************************
1786 *
1787 * This routine executes in interrupt context. It replenishes
1788 * the mbufs in the descriptor and sends data which has been
1789 * dma'ed into host memory to upper layer.
1790 *
1791 * We loop at most count times if count is > 0, or until done if
1792 * count < 0.
1793 *
1794 * Return TRUE for more work, FALSE for all clean.
1795 *********************************************************************/
1796 bool
1797 ixgbe_rxeof(struct ix_queue *que)
1798 {
1799 struct adapter *adapter = que->adapter;
1800 struct rx_ring *rxr = que->rxr;
1801 struct ifnet *ifp = adapter->ifp;
1802 #ifdef LRO
1803 struct lro_ctrl *lro = &rxr->lro;
1804 struct lro_entry *queued;
1805 #endif /* LRO */
1806 int i, nextp, processed = 0;
1807 u32 staterr = 0;
1808 u16 count = rxr->process_limit;
1809 union ixgbe_adv_rx_desc *cur;
1810 struct ixgbe_rx_buf *rbuf, *nbuf;
1811 #ifdef RSS
1812 u16 pkt_info;
1813 #endif
1814
1815 IXGBE_RX_LOCK(rxr);
1816
1817 #ifdef DEV_NETMAP
1818 /* Same as the txeof routine: wakeup clients on intr. */
1819 if (netmap_rx_irq(ifp, rxr->me, &processed)) {
1820 IXGBE_RX_UNLOCK(rxr);
1821 return (FALSE);
1822 }
1823 #endif /* DEV_NETMAP */
1824
1825 for (i = rxr->next_to_check; count != 0;) {
1826 struct mbuf *sendmp, *mp;
1827 u32 rsc, ptype;
1828 u16 len;
1829 u16 vtag = 0;
1830 bool eop;
1831
1832 /* Sync the ring. */
1833 ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1834 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1835
1836 cur = &rxr->rx_base[i];
1837 staterr = le32toh(cur->wb.upper.status_error);
1838 #ifdef RSS
1839 pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
1840 #endif
1841
1842 if ((staterr & IXGBE_RXD_STAT_DD) == 0)
1843 break;
1844 if ((ifp->if_flags & IFF_RUNNING) == 0)
1845 break;
1846
1847 count--;
1848 sendmp = NULL;
1849 nbuf = NULL;
1850 rsc = 0;
1851 cur->wb.upper.status_error = 0;
1852 rbuf = &rxr->rx_buffers[i];
1853 mp = rbuf->buf;
1854
1855 len = le16toh(cur->wb.upper.length);
1856 ptype = le32toh(cur->wb.lower.lo_dword.data) &
1857 IXGBE_RXDADV_PKTTYPE_MASK;
1858 eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
1859
1860 /* Make sure bad packets are discarded */
1861 if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) {
1862 #if 0 // VF-only
1863 #if __FreeBSD_version >= 1100036
1864 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
1865 #endif
1866 #endif
1867 rxr->rx_discarded.ev_count++;
1868 ixgbe_rx_discard(rxr, i);
1869 goto next_desc;
1870 }
1871
1872 /*
1873 ** On 82599 which supports a hardware
1874 ** LRO (called HW RSC), packets need
1875 ** not be fragmented across sequential
1876 ** descriptors, rather the next descriptor
1877 ** is indicated in bits of the descriptor.
1878 ** This also means that we might proceses
1879 ** more than one packet at a time, something
1880 ** that has never been true before, it
1881 ** required eliminating global chain pointers
1882 ** in favor of what we are doing here. -jfv
1883 */
1884 if (!eop) {
1885 /*
1886 ** Figure out the next descriptor
1887 ** of this frame.
1888 */
1889 if (rxr->hw_rsc == TRUE) {
1890 rsc = ixgbe_rsc_count(cur);
1891 rxr->rsc_num += (rsc - 1);
1892 }
1893 if (rsc) { /* Get hardware index */
1894 nextp = ((staterr &
1895 IXGBE_RXDADV_NEXTP_MASK) >>
1896 IXGBE_RXDADV_NEXTP_SHIFT);
1897 } else { /* Just sequential */
1898 nextp = i + 1;
1899 if (nextp == adapter->num_rx_desc)
1900 nextp = 0;
1901 }
1902 nbuf = &rxr->rx_buffers[nextp];
1903 prefetch(nbuf);
1904 }
1905 /*
1906 ** Rather than using the fmp/lmp global pointers
1907 ** we now keep the head of a packet chain in the
1908 ** buffer struct and pass this along from one
1909 ** descriptor to the next, until we get EOP.
1910 */
1911 mp->m_len = len;
1912 /*
1913 ** See if there is a stored head
1914 ** that determines what we are
1915 */
1916 sendmp = rbuf->fmp;
1917 if (sendmp != NULL) { /* secondary frag */
1918 rbuf->buf = rbuf->fmp = NULL;
1919 mp->m_flags &= ~M_PKTHDR;
1920 sendmp->m_pkthdr.len += mp->m_len;
1921 } else {
1922 /*
1923 * Optimize. This might be a small packet,
1924 * maybe just a TCP ACK. Do a fast copy that
1925 * is cache aligned into a new mbuf, and
1926 * leave the old mbuf+cluster for re-use.
1927 */
1928 if (eop && len <= IXGBE_RX_COPY_LEN) {
1929 sendmp = m_gethdr(M_NOWAIT, MT_DATA);
1930 if (sendmp != NULL) {
1931 sendmp->m_data +=
1932 IXGBE_RX_COPY_ALIGN;
1933 ixgbe_bcopy(mp->m_data,
1934 sendmp->m_data, len);
1935 sendmp->m_len = len;
1936 rxr->rx_copies.ev_count++;
1937 rbuf->flags |= IXGBE_RX_COPY;
1938 }
1939 }
1940 if (sendmp == NULL) {
1941 rbuf->buf = rbuf->fmp = NULL;
1942 sendmp = mp;
1943 }
1944
1945 /* first desc of a non-ps chain */
1946 sendmp->m_flags |= M_PKTHDR;
1947 sendmp->m_pkthdr.len = mp->m_len;
1948 }
1949 ++processed;
1950
1951 /* Pass the head pointer on */
1952 if (eop == 0) {
1953 nbuf->fmp = sendmp;
1954 sendmp = NULL;
1955 mp->m_next = nbuf->buf;
1956 } else { /* Sending this frame */
1957 m_set_rcvif(sendmp, ifp);
1958 ifp->if_ipackets++;
1959 rxr->rx_packets.ev_count++;
1960 /* capture data for AIM */
1961 rxr->bytes += sendmp->m_pkthdr.len;
1962 rxr->rx_bytes.ev_count += sendmp->m_pkthdr.len;
1963 /* Process vlan info */
1964 if ((rxr->vtag_strip) &&
1965 (staterr & IXGBE_RXD_STAT_VP))
1966 vtag = le16toh(cur->wb.upper.vlan);
1967 if (vtag) {
1968 VLAN_INPUT_TAG(ifp, sendmp, vtag,
1969 printf("%s: could not apply VLAN "
1970 "tag", __func__));
1971 }
1972 if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) {
1973 ixgbe_rx_checksum(staterr, sendmp, ptype,
1974 &adapter->stats.pf);
1975 }
1976 #if __FreeBSD_version >= 800000
1977 #ifdef RSS
1978 sendmp->m_pkthdr.flowid =
1979 le32toh(cur->wb.lower.hi_dword.rss);
1980 switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) {
1981 case IXGBE_RXDADV_RSSTYPE_IPV4_TCP:
1982 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_TCP_IPV4);
1983 break;
1984 case IXGBE_RXDADV_RSSTYPE_IPV4:
1985 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_IPV4);
1986 break;
1987 case IXGBE_RXDADV_RSSTYPE_IPV6_TCP:
1988 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_TCP_IPV6);
1989 break;
1990 case IXGBE_RXDADV_RSSTYPE_IPV6_EX:
1991 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_IPV6_EX);
1992 break;
1993 case IXGBE_RXDADV_RSSTYPE_IPV6:
1994 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_IPV6);
1995 break;
1996 case IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX:
1997 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_TCP_IPV6_EX);
1998 break;
1999 case IXGBE_RXDADV_RSSTYPE_IPV4_UDP:
2000 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_UDP_IPV4);
2001 break;
2002 case IXGBE_RXDADV_RSSTYPE_IPV6_UDP:
2003 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_UDP_IPV6);
2004 break;
2005 case IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX:
2006 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_UDP_IPV6_EX);
2007 break;
2008 default:
2009 M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
2010 }
2011 #else /* RSS */
2012 sendmp->m_pkthdr.flowid = que->msix;
2013 M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
2014 #endif /* RSS */
2015 #endif /* FreeBSD_version */
2016 }
2017 next_desc:
2018 ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
2019 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2020
2021 /* Advance our pointers to the next descriptor. */
2022 if (++i == rxr->num_desc)
2023 i = 0;
2024
2025 /* Now send to the stack or do LRO */
2026 if (sendmp != NULL) {
2027 rxr->next_to_check = i;
2028 ixgbe_rx_input(rxr, ifp, sendmp, ptype);
2029 i = rxr->next_to_check;
2030 }
2031
2032 /* Every 8 descriptors we go to refresh mbufs */
2033 if (processed == 8) {
2034 ixgbe_refresh_mbufs(rxr, i);
2035 processed = 0;
2036 }
2037 }
2038
2039 /* Refresh any remaining buf structs */
2040 if (ixgbe_rx_unrefreshed(rxr))
2041 ixgbe_refresh_mbufs(rxr, i);
2042
2043 rxr->next_to_check = i;
2044
2045 #ifdef LRO
2046 /*
2047 * Flush any outstanding LRO work
2048 */
2049 while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
2050 SLIST_REMOVE_HEAD(&lro->lro_active, next);
2051 tcp_lro_flush(lro, queued);
2052 }
2053 #endif /* LRO */
2054
2055 IXGBE_RX_UNLOCK(rxr);
2056
2057 /*
2058 ** Still have cleaning to do?
2059 */
2060 if ((staterr & IXGBE_RXD_STAT_DD) != 0)
2061 return true;
2062 else
2063 return false;
2064 }
2065
2066
2067 /*********************************************************************
2068 *
2069 * Verify that the hardware indicated that the checksum is valid.
2070 * Inform the stack about the status of checksum so that stack
2071 * doesn't spend time verifying the checksum.
2072 *
2073 *********************************************************************/
2074 static void
2075 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype,
2076 struct ixgbe_hw_stats *stats)
2077 {
2078 u16 status = (u16) staterr;
2079 u8 errors = (u8) (staterr >> 24);
2080 #if 0
2081 bool sctp = FALSE;
2082
2083 if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
2084 (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
2085 sctp = TRUE;
2086 #endif
2087
2088 if (status & IXGBE_RXD_STAT_IPCS) {
2089 stats->ipcs.ev_count++;
2090 if (!(errors & IXGBE_RXD_ERR_IPE)) {
2091 /* IP Checksum Good */
2092 mp->m_pkthdr.csum_flags = M_CSUM_IPv4;
2093
2094 } else {
2095 stats->ipcs_bad.ev_count++;
2096 mp->m_pkthdr.csum_flags = M_CSUM_IPv4|M_CSUM_IPv4_BAD;
2097 }
2098 }
2099 if (status & IXGBE_RXD_STAT_L4CS) {
2100 stats->l4cs.ev_count++;
2101 int type = M_CSUM_TCPv4|M_CSUM_TCPv6|M_CSUM_UDPv4|M_CSUM_UDPv6;
2102 if (!(errors & IXGBE_RXD_ERR_TCPE)) {
2103 mp->m_pkthdr.csum_flags |= type;
2104 } else {
2105 stats->l4cs_bad.ev_count++;
2106 mp->m_pkthdr.csum_flags |= type | M_CSUM_TCP_UDP_BAD;
2107 }
2108 }
2109 return;
2110 }
2111
2112
2113 /********************************************************************
2114 * Manage DMA'able memory.
2115 *******************************************************************/
2116
2117 int
2118 ixgbe_dma_malloc(struct adapter *adapter, const bus_size_t size,
2119 struct ixgbe_dma_alloc *dma, const int mapflags)
2120 {
2121 device_t dev = adapter->dev;
2122 int r, rsegs;
2123
2124 r = ixgbe_dma_tag_create(adapter->osdep.dmat, /* parent */
2125 DBA_ALIGN, 0, /* alignment, bounds */
2126 size, /* maxsize */
2127 1, /* nsegments */
2128 size, /* maxsegsize */
2129 BUS_DMA_ALLOCNOW, /* flags */
2130 &dma->dma_tag);
2131 if (r != 0) {
2132 aprint_error_dev(dev,
2133 "%s: ixgbe_dma_tag_create failed; error %d\n", __func__, r);
2134 goto fail_0;
2135 }
2136
2137 r = bus_dmamem_alloc(dma->dma_tag->dt_dmat,
2138 size,
2139 dma->dma_tag->dt_alignment,
2140 dma->dma_tag->dt_boundary,
2141 &dma->dma_seg, 1, &rsegs, BUS_DMA_NOWAIT);
2142 if (r != 0) {
2143 aprint_error_dev(dev,
2144 "%s: bus_dmamem_alloc failed; error %d\n", __func__, r);
2145 goto fail_1;
2146 }
2147
2148 r = bus_dmamem_map(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs,
2149 size, &dma->dma_vaddr, BUS_DMA_NOWAIT);
2150 if (r != 0) {
2151 aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
2152 __func__, r);
2153 goto fail_2;
2154 }
2155
2156 r = ixgbe_dmamap_create(dma->dma_tag, 0, &dma->dma_map);
2157 if (r != 0) {
2158 aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
2159 __func__, r);
2160 goto fail_3;
2161 }
2162
2163 r = bus_dmamap_load(dma->dma_tag->dt_dmat, dma->dma_map, dma->dma_vaddr,
2164 size,
2165 NULL,
2166 mapflags | BUS_DMA_NOWAIT);
2167 if (r != 0) {
2168 aprint_error_dev(dev, "%s: bus_dmamap_load failed; error %d\n",
2169 __func__, r);
2170 goto fail_4;
2171 }
2172 dma->dma_paddr = dma->dma_map->dm_segs[0].ds_addr;
2173 dma->dma_size = size;
2174 return 0;
2175 fail_4:
2176 ixgbe_dmamap_destroy(dma->dma_tag, dma->dma_map);
2177 fail_3:
2178 bus_dmamem_unmap(dma->dma_tag->dt_dmat, dma->dma_vaddr, size);
2179 fail_2:
2180 bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs);
2181 fail_1:
2182 ixgbe_dma_tag_destroy(dma->dma_tag);
2183 fail_0:
2184 return r;
2185 }
2186
2187 void
2188 ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma)
2189 {
2190 bus_dmamap_sync(dma->dma_tag->dt_dmat, dma->dma_map, 0, dma->dma_size,
2191 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2192 ixgbe_dmamap_unload(dma->dma_tag, dma->dma_map);
2193 bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, 1);
2194 ixgbe_dma_tag_destroy(dma->dma_tag);
2195 }
2196
2197
2198 /*********************************************************************
2199 *
2200 * Allocate memory for the transmit and receive rings, and then
2201 * the descriptors associated with each, called only once at attach.
2202 *
2203 **********************************************************************/
2204 int
2205 ixgbe_allocate_queues(struct adapter *adapter)
2206 {
2207 device_t dev = adapter->dev;
2208 struct ix_queue *que;
2209 struct tx_ring *txr;
2210 struct rx_ring *rxr;
2211 int rsize, tsize, error = IXGBE_SUCCESS;
2212 int txconf = 0, rxconf = 0;
2213
2214 /* First allocate the top level queue structs */
2215 if (!(adapter->queues =
2216 (struct ix_queue *) malloc(sizeof(struct ix_queue) *
2217 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2218 aprint_error_dev(dev, "Unable to allocate queue memory\n");
2219 error = ENOMEM;
2220 goto fail;
2221 }
2222
2223 /* First allocate the TX ring struct memory */
2224 if (!(adapter->tx_rings =
2225 (struct tx_ring *) malloc(sizeof(struct tx_ring) *
2226 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2227 aprint_error_dev(dev, "Unable to allocate TX ring memory\n");
2228 error = ENOMEM;
2229 goto tx_fail;
2230 }
2231
2232 /* Next allocate the RX */
2233 if (!(adapter->rx_rings =
2234 (struct rx_ring *) malloc(sizeof(struct rx_ring) *
2235 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2236 aprint_error_dev(dev, "Unable to allocate RX ring memory\n");
2237 error = ENOMEM;
2238 goto rx_fail;
2239 }
2240
2241 /* For the ring itself */
2242 tsize = roundup2(adapter->num_tx_desc *
2243 sizeof(union ixgbe_adv_tx_desc), DBA_ALIGN);
2244
2245 /*
2246 * Now set up the TX queues, txconf is needed to handle the
2247 * possibility that things fail midcourse and we need to
2248 * undo memory gracefully
2249 */
2250 for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2251 /* Set up some basics */
2252 txr = &adapter->tx_rings[i];
2253 txr->adapter = adapter;
2254 txr->me = i;
2255 txr->num_desc = adapter->num_tx_desc;
2256
2257 /* Initialize the TX side lock */
2258 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
2259 device_xname(dev), txr->me);
2260 mutex_init(&txr->tx_mtx, MUTEX_DEFAULT, IPL_NET);
2261
2262 if (ixgbe_dma_malloc(adapter, tsize,
2263 &txr->txdma, BUS_DMA_NOWAIT)) {
2264 aprint_error_dev(dev,
2265 "Unable to allocate TX Descriptor memory\n");
2266 error = ENOMEM;
2267 goto err_tx_desc;
2268 }
2269 txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
2270 bzero((void *)txr->tx_base, tsize);
2271
2272 /* Now allocate transmit buffers for the ring */
2273 if (ixgbe_allocate_transmit_buffers(txr)) {
2274 aprint_error_dev(dev,
2275 "Critical Failure setting up transmit buffers\n");
2276 error = ENOMEM;
2277 goto err_tx_desc;
2278 }
2279 #ifndef IXGBE_LEGACY_TX
2280 /* Allocate a buf ring */
2281 txr->br = buf_ring_alloc(IXGBE_BR_SIZE, M_DEVBUF,
2282 M_WAITOK, &txr->tx_mtx);
2283 if (txr->br == NULL) {
2284 aprint_error_dev(dev,
2285 "Critical Failure setting up buf ring\n");
2286 error = ENOMEM;
2287 goto err_tx_desc;
2288 }
2289 #endif
2290 }
2291
2292 /*
2293 * Next the RX queues...
2294 */
2295 rsize = roundup2(adapter->num_rx_desc *
2296 sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
2297 for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2298 rxr = &adapter->rx_rings[i];
2299 /* Set up some basics */
2300 rxr->adapter = adapter;
2301 rxr->me = i;
2302 rxr->num_desc = adapter->num_rx_desc;
2303
2304 /* Initialize the RX side lock */
2305 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
2306 device_xname(dev), rxr->me);
2307 mutex_init(&rxr->rx_mtx, MUTEX_DEFAULT, IPL_NET);
2308
2309 if (ixgbe_dma_malloc(adapter, rsize,
2310 &rxr->rxdma, BUS_DMA_NOWAIT)) {
2311 aprint_error_dev(dev,
2312 "Unable to allocate RxDescriptor memory\n");
2313 error = ENOMEM;
2314 goto err_rx_desc;
2315 }
2316 rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2317 bzero((void *)rxr->rx_base, rsize);
2318
2319 /* Allocate receive buffers for the ring*/
2320 if (ixgbe_allocate_receive_buffers(rxr)) {
2321 aprint_error_dev(dev,
2322 "Critical Failure setting up receive buffers\n");
2323 error = ENOMEM;
2324 goto err_rx_desc;
2325 }
2326 }
2327
2328 /*
2329 ** Finally set up the queue holding structs
2330 */
2331 for (int i = 0; i < adapter->num_queues; i++) {
2332 que = &adapter->queues[i];
2333 que->adapter = adapter;
2334 que->me = i;
2335 que->txr = &adapter->tx_rings[i];
2336 que->rxr = &adapter->rx_rings[i];
2337 }
2338
2339 return (0);
2340
2341 err_rx_desc:
2342 for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2343 ixgbe_dma_free(adapter, &rxr->rxdma);
2344 err_tx_desc:
2345 for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2346 ixgbe_dma_free(adapter, &txr->txdma);
2347 free(adapter->rx_rings, M_DEVBUF);
2348 rx_fail:
2349 free(adapter->tx_rings, M_DEVBUF);
2350 tx_fail:
2351 free(adapter->queues, M_DEVBUF);
2352 fail:
2353 return (error);
2354 }
2355
2356