ix_txrx.c revision 1.45 1 /* $NetBSD: ix_txrx.c,v 1.45 2018/05/18 10:09:02 msaitoh Exp $ */
2
3 /******************************************************************************
4
5 Copyright (c) 2001-2017, Intel Corporation
6 All rights reserved.
7
8 Redistribution and use in source and binary forms, with or without
9 modification, are permitted provided that the following conditions are met:
10
11 1. Redistributions of source code must retain the above copyright notice,
12 this list of conditions and the following disclaimer.
13
14 2. Redistributions in binary form must reproduce the above copyright
15 notice, this list of conditions and the following disclaimer in the
16 documentation and/or other materials provided with the distribution.
17
18 3. Neither the name of the Intel Corporation nor the names of its
19 contributors may be used to endorse or promote products derived from
20 this software without specific prior written permission.
21
22 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
23 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
26 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
29 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
30 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32 POSSIBILITY OF SUCH DAMAGE.
33
34 ******************************************************************************/
35 /*$FreeBSD: head/sys/dev/ixgbe/ix_txrx.c 327031 2017-12-20 18:15:06Z erj $*/
36
37 /*
38 * Copyright (c) 2011 The NetBSD Foundation, Inc.
39 * All rights reserved.
40 *
41 * This code is derived from software contributed to The NetBSD Foundation
42 * by Coyote Point Systems, Inc.
43 *
44 * Redistribution and use in source and binary forms, with or without
45 * modification, are permitted provided that the following conditions
46 * are met:
47 * 1. Redistributions of source code must retain the above copyright
48 * notice, this list of conditions and the following disclaimer.
49 * 2. Redistributions in binary form must reproduce the above copyright
50 * notice, this list of conditions and the following disclaimer in the
51 * documentation and/or other materials provided with the distribution.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
54 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
55 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
56 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
57 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
58 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
59 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
60 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
61 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
62 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
63 * POSSIBILITY OF SUCH DAMAGE.
64 */
65
66 #include "opt_inet.h"
67 #include "opt_inet6.h"
68
69 #include "ixgbe.h"
70
71 /*
72 * HW RSC control:
73 * this feature only works with
74 * IPv4, and only on 82599 and later.
75 * Also this will cause IP forwarding to
76 * fail and that can't be controlled by
77 * the stack as LRO can. For all these
78 * reasons I've deemed it best to leave
79 * this off and not bother with a tuneable
80 * interface, this would need to be compiled
81 * to enable.
82 */
83 static bool ixgbe_rsc_enable = FALSE;
84
85 /*
86 * For Flow Director: this is the
87 * number of TX packets we sample
88 * for the filter pool, this means
89 * every 20th packet will be probed.
90 *
91 * This feature can be disabled by
92 * setting this to 0.
93 */
94 static int atr_sample_rate = 20;
95
96 /************************************************************************
97 * Local Function prototypes
98 ************************************************************************/
99 static void ixgbe_setup_transmit_ring(struct tx_ring *);
100 static void ixgbe_free_transmit_buffers(struct tx_ring *);
101 static int ixgbe_setup_receive_ring(struct rx_ring *);
102 static void ixgbe_free_receive_buffers(struct rx_ring *);
103 static void ixgbe_rx_checksum(u32, struct mbuf *, u32,
104 struct ixgbe_hw_stats *);
105 static void ixgbe_refresh_mbufs(struct rx_ring *, int);
106 static void ixgbe_drain(struct ifnet *, struct tx_ring *);
107 static int ixgbe_xmit(struct tx_ring *, struct mbuf *);
108 static int ixgbe_tx_ctx_setup(struct tx_ring *,
109 struct mbuf *, u32 *, u32 *);
110 static int ixgbe_tso_setup(struct tx_ring *,
111 struct mbuf *, u32 *, u32 *);
112 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
113 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
114 struct mbuf *, u32);
115 static int ixgbe_dma_malloc(struct adapter *, bus_size_t,
116 struct ixgbe_dma_alloc *, int);
117 static void ixgbe_dma_free(struct adapter *, struct ixgbe_dma_alloc *);
118
119 static void ixgbe_setup_hw_rsc(struct rx_ring *);
120
121 /************************************************************************
122 * ixgbe_legacy_start_locked - Transmit entry point
123 *
124 * Called by the stack to initiate a transmit.
125 * The driver will remain in this routine as long as there are
126 * packets to transmit and transmit resources are available.
127 * In case resources are not available, the stack is notified
128 * and the packet is requeued.
129 ************************************************************************/
130 int
131 ixgbe_legacy_start_locked(struct ifnet *ifp, struct tx_ring *txr)
132 {
133 int rc;
134 struct mbuf *m_head;
135 struct adapter *adapter = txr->adapter;
136
137 IXGBE_TX_LOCK_ASSERT(txr);
138
139 if (!adapter->link_active) {
140 /*
141 * discard all packets buffered in IFQ to avoid
142 * sending old packets at next link up timing.
143 */
144 ixgbe_drain(ifp, txr);
145 return (ENETDOWN);
146 }
147 if ((ifp->if_flags & IFF_RUNNING) == 0)
148 return (ENETDOWN);
149
150 while (!IFQ_IS_EMPTY(&ifp->if_snd)) {
151 if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
152 break;
153
154 IFQ_POLL(&ifp->if_snd, m_head);
155 if (m_head == NULL)
156 break;
157
158 if ((rc = ixgbe_xmit(txr, m_head)) == EAGAIN) {
159 break;
160 }
161 IFQ_DEQUEUE(&ifp->if_snd, m_head);
162 if (rc != 0) {
163 m_freem(m_head);
164 continue;
165 }
166
167 /* Send a copy of the frame to the BPF listener */
168 bpf_mtap(ifp, m_head);
169 }
170
171 return IXGBE_SUCCESS;
172 } /* ixgbe_legacy_start_locked */
173
174 /************************************************************************
175 * ixgbe_legacy_start
176 *
177 * Called by the stack, this always uses the first tx ring,
178 * and should not be used with multiqueue tx enabled.
179 ************************************************************************/
180 void
181 ixgbe_legacy_start(struct ifnet *ifp)
182 {
183 struct adapter *adapter = ifp->if_softc;
184 struct tx_ring *txr = adapter->tx_rings;
185
186 if (ifp->if_flags & IFF_RUNNING) {
187 IXGBE_TX_LOCK(txr);
188 ixgbe_legacy_start_locked(ifp, txr);
189 IXGBE_TX_UNLOCK(txr);
190 }
191 } /* ixgbe_legacy_start */
192
193 /************************************************************************
194 * ixgbe_mq_start - Multiqueue Transmit Entry Point
195 *
196 * (if_transmit function)
197 ************************************************************************/
198 int
199 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
200 {
201 struct adapter *adapter = ifp->if_softc;
202 struct tx_ring *txr;
203 int i, err = 0;
204 #ifdef RSS
205 uint32_t bucket_id;
206 #endif
207
208 /*
209 * When doing RSS, map it to the same outbound queue
210 * as the incoming flow would be mapped to.
211 *
212 * If everything is setup correctly, it should be the
213 * same bucket that the current CPU we're on is.
214 */
215 #ifdef RSS
216 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
217 if ((adapter->feat_en & IXGBE_FEATURE_RSS) &&
218 (rss_hash2bucket(m->m_pkthdr.flowid, M_HASHTYPE_GET(m),
219 &bucket_id) == 0)) {
220 i = bucket_id % adapter->num_queues;
221 #ifdef IXGBE_DEBUG
222 if (bucket_id > adapter->num_queues)
223 if_printf(ifp,
224 "bucket_id (%d) > num_queues (%d)\n",
225 bucket_id, adapter->num_queues);
226 #endif
227 } else
228 i = m->m_pkthdr.flowid % adapter->num_queues;
229 } else
230 #endif /* 0 */
231 i = cpu_index(curcpu()) % adapter->num_queues;
232
233 /* Check for a hung queue and pick alternative */
234 if (((1 << i) & adapter->active_queues) == 0)
235 i = ffs64(adapter->active_queues);
236
237 txr = &adapter->tx_rings[i];
238
239 err = pcq_put(txr->txr_interq, m);
240 if (err == false) {
241 m_freem(m);
242 txr->pcq_drops.ev_count++;
243 return (err);
244 }
245 if (IXGBE_TX_TRYLOCK(txr)) {
246 ixgbe_mq_start_locked(ifp, txr);
247 IXGBE_TX_UNLOCK(txr);
248 } else {
249 if (adapter->txrx_use_workqueue) {
250 u_int *enqueued;
251
252 /*
253 * This function itself is not called in interrupt
254 * context, however it can be called in fast softint
255 * context right after receiving forwarding packets.
256 * So, it is required to protect workqueue from twice
257 * enqueuing when the machine uses both spontaneous
258 * packets and forwarding packets.
259 */
260 enqueued = percpu_getref(adapter->txr_wq_enqueued);
261 if (*enqueued == 0) {
262 *enqueued = 1;
263 percpu_putref(adapter->txr_wq_enqueued);
264 workqueue_enqueue(adapter->txr_wq,
265 &txr->wq_cookie, curcpu());
266 } else
267 percpu_putref(adapter->txr_wq_enqueued);
268 } else
269 softint_schedule(txr->txr_si);
270 }
271
272 return (0);
273 } /* ixgbe_mq_start */
274
275 /************************************************************************
276 * ixgbe_mq_start_locked
277 ************************************************************************/
278 int
279 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
280 {
281 struct mbuf *next;
282 int enqueued = 0, err = 0;
283
284 if (!txr->adapter->link_active) {
285 /*
286 * discard all packets buffered in txr_interq to avoid
287 * sending old packets at next link up timing.
288 */
289 ixgbe_drain(ifp, txr);
290 return (ENETDOWN);
291 }
292 if ((ifp->if_flags & IFF_RUNNING) == 0)
293 return (ENETDOWN);
294
295 /* Process the queue */
296 while ((next = pcq_get(txr->txr_interq)) != NULL) {
297 if ((err = ixgbe_xmit(txr, next)) != 0) {
298 m_freem(next);
299 /* All errors are counted in ixgbe_xmit() */
300 break;
301 }
302 enqueued++;
303 #if __FreeBSD_version >= 1100036
304 /*
305 * Since we're looking at the tx ring, we can check
306 * to see if we're a VF by examing our tail register
307 * address.
308 */
309 if ((txr->adapter->feat_en & IXGBE_FEATURE_VF) &&
310 (next->m_flags & M_MCAST))
311 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
312 #endif
313 /* Send a copy of the frame to the BPF listener */
314 bpf_mtap(ifp, next);
315 if ((ifp->if_flags & IFF_RUNNING) == 0)
316 break;
317 }
318
319 if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD(txr->adapter))
320 ixgbe_txeof(txr);
321
322 return (err);
323 } /* ixgbe_mq_start_locked */
324
325 /************************************************************************
326 * ixgbe_deferred_mq_start
327 *
328 * Called from a softint and workqueue (indirectly) to drain queued
329 * transmit packets.
330 ************************************************************************/
331 void
332 ixgbe_deferred_mq_start(void *arg)
333 {
334 struct tx_ring *txr = arg;
335 struct adapter *adapter = txr->adapter;
336 struct ifnet *ifp = adapter->ifp;
337
338 IXGBE_TX_LOCK(txr);
339 if (pcq_peek(txr->txr_interq) != NULL)
340 ixgbe_mq_start_locked(ifp, txr);
341 IXGBE_TX_UNLOCK(txr);
342 } /* ixgbe_deferred_mq_start */
343
344 /************************************************************************
345 * ixgbe_deferred_mq_start_work
346 *
347 * Called from a workqueue to drain queued transmit packets.
348 ************************************************************************/
349 void
350 ixgbe_deferred_mq_start_work(struct work *wk, void *arg)
351 {
352 struct tx_ring *txr = container_of(wk, struct tx_ring, wq_cookie);
353 struct adapter *adapter = txr->adapter;
354 u_int *enqueued = percpu_getref(adapter->txr_wq_enqueued);
355 *enqueued = 0;
356 percpu_putref(adapter->txr_wq_enqueued);
357
358 ixgbe_deferred_mq_start(txr);
359 } /* ixgbe_deferred_mq_start */
360
361 /************************************************************************
362 * ixgbe_drain_all
363 ************************************************************************/
364 void
365 ixgbe_drain_all(struct adapter *adapter)
366 {
367 struct ifnet *ifp = adapter->ifp;
368 struct ix_queue *que = adapter->queues;
369
370 for (int i = 0; i < adapter->num_queues; i++, que++) {
371 struct tx_ring *txr = que->txr;
372
373 IXGBE_TX_LOCK(txr);
374 ixgbe_drain(ifp, txr);
375 IXGBE_TX_UNLOCK(txr);
376 }
377 }
378
379 /************************************************************************
380 * ixgbe_xmit
381 *
382 * Maps the mbufs to tx descriptors, allowing the
383 * TX engine to transmit the packets.
384 *
385 * Return 0 on success, positive on failure
386 ************************************************************************/
387 static int
388 ixgbe_xmit(struct tx_ring *txr, struct mbuf *m_head)
389 {
390 struct adapter *adapter = txr->adapter;
391 struct ixgbe_tx_buf *txbuf;
392 union ixgbe_adv_tx_desc *txd = NULL;
393 struct ifnet *ifp = adapter->ifp;
394 int i, j, error;
395 int first;
396 u32 olinfo_status = 0, cmd_type_len;
397 bool remap = TRUE;
398 bus_dmamap_t map;
399
400 /* Basic descriptor defines */
401 cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
402 IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
403
404 if (vlan_has_tag(m_head))
405 cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
406
407 /*
408 * Important to capture the first descriptor
409 * used because it will contain the index of
410 * the one we tell the hardware to report back
411 */
412 first = txr->next_avail_desc;
413 txbuf = &txr->tx_buffers[first];
414 map = txbuf->map;
415
416 /*
417 * Map the packet for DMA.
418 */
419 retry:
420 error = bus_dmamap_load_mbuf(txr->txtag->dt_dmat, map, m_head,
421 BUS_DMA_NOWAIT);
422
423 if (__predict_false(error)) {
424 struct mbuf *m;
425
426 switch (error) {
427 case EAGAIN:
428 txr->q_eagain_tx_dma_setup++;
429 return EAGAIN;
430 case ENOMEM:
431 txr->q_enomem_tx_dma_setup++;
432 return EAGAIN;
433 case EFBIG:
434 /* Try it again? - one try */
435 if (remap == TRUE) {
436 remap = FALSE;
437 /*
438 * XXX: m_defrag will choke on
439 * non-MCLBYTES-sized clusters
440 */
441 txr->q_efbig_tx_dma_setup++;
442 m = m_defrag(m_head, M_NOWAIT);
443 if (m == NULL) {
444 txr->q_mbuf_defrag_failed++;
445 return ENOBUFS;
446 }
447 m_head = m;
448 goto retry;
449 } else {
450 txr->q_efbig2_tx_dma_setup++;
451 return error;
452 }
453 case EINVAL:
454 txr->q_einval_tx_dma_setup++;
455 return error;
456 default:
457 txr->q_other_tx_dma_setup++;
458 return error;
459 }
460 }
461
462 /* Make certain there are enough descriptors */
463 if (txr->tx_avail < (map->dm_nsegs + 2)) {
464 txr->no_desc_avail.ev_count++;
465 ixgbe_dmamap_unload(txr->txtag, txbuf->map);
466 return EAGAIN;
467 }
468
469 /*
470 * Set up the appropriate offload context
471 * this will consume the first descriptor
472 */
473 error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
474 if (__predict_false(error)) {
475 return (error);
476 }
477
478 /* Do the flow director magic */
479 if ((adapter->feat_en & IXGBE_FEATURE_FDIR) &&
480 (txr->atr_sample) && (!adapter->fdir_reinit)) {
481 ++txr->atr_count;
482 if (txr->atr_count >= atr_sample_rate) {
483 ixgbe_atr(txr, m_head);
484 txr->atr_count = 0;
485 }
486 }
487
488 olinfo_status |= IXGBE_ADVTXD_CC;
489 i = txr->next_avail_desc;
490 for (j = 0; j < map->dm_nsegs; j++) {
491 bus_size_t seglen;
492 bus_addr_t segaddr;
493
494 txbuf = &txr->tx_buffers[i];
495 txd = &txr->tx_base[i];
496 seglen = map->dm_segs[j].ds_len;
497 segaddr = htole64(map->dm_segs[j].ds_addr);
498
499 txd->read.buffer_addr = segaddr;
500 txd->read.cmd_type_len = htole32(cmd_type_len | seglen);
501 txd->read.olinfo_status = htole32(olinfo_status);
502
503 if (++i == txr->num_desc)
504 i = 0;
505 }
506
507 txd->read.cmd_type_len |= htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
508 txr->tx_avail -= map->dm_nsegs;
509 txr->next_avail_desc = i;
510
511 txbuf->m_head = m_head;
512 /*
513 * Here we swap the map so the last descriptor,
514 * which gets the completion interrupt has the
515 * real map, and the first descriptor gets the
516 * unused map from this descriptor.
517 */
518 txr->tx_buffers[first].map = txbuf->map;
519 txbuf->map = map;
520 bus_dmamap_sync(txr->txtag->dt_dmat, map, 0, m_head->m_pkthdr.len,
521 BUS_DMASYNC_PREWRITE);
522
523 /* Set the EOP descriptor that will be marked done */
524 txbuf = &txr->tx_buffers[first];
525 txbuf->eop = txd;
526
527 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
528 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
529 /*
530 * Advance the Transmit Descriptor Tail (Tdt), this tells the
531 * hardware that this frame is available to transmit.
532 */
533 ++txr->total_packets.ev_count;
534 IXGBE_WRITE_REG(&adapter->hw, txr->tail, i);
535
536 /*
537 * XXXX NOMPSAFE: ifp->if_data should be percpu.
538 */
539 ifp->if_obytes += m_head->m_pkthdr.len;
540 if (m_head->m_flags & M_MCAST)
541 ifp->if_omcasts++;
542
543 /* Mark queue as having work */
544 if (txr->busy == 0)
545 txr->busy = 1;
546
547 return (0);
548 } /* ixgbe_xmit */
549
550 /************************************************************************
551 * ixgbe_drain
552 ************************************************************************/
553 static void
554 ixgbe_drain(struct ifnet *ifp, struct tx_ring *txr)
555 {
556 struct mbuf *m;
557
558 IXGBE_TX_LOCK_ASSERT(txr);
559
560 if (txr->me == 0) {
561 while (!IFQ_IS_EMPTY(&ifp->if_snd)) {
562 IFQ_DEQUEUE(&ifp->if_snd, m);
563 m_freem(m);
564 IF_DROP(&ifp->if_snd);
565 }
566 }
567
568 while ((m = pcq_get(txr->txr_interq)) != NULL) {
569 m_freem(m);
570 txr->pcq_drops.ev_count++;
571 }
572 }
573
574 /************************************************************************
575 * ixgbe_allocate_transmit_buffers
576 *
577 * Allocate memory for tx_buffer structures. The tx_buffer stores all
578 * the information needed to transmit a packet on the wire. This is
579 * called only once at attach, setup is done every reset.
580 ************************************************************************/
581 static int
582 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
583 {
584 struct adapter *adapter = txr->adapter;
585 device_t dev = adapter->dev;
586 struct ixgbe_tx_buf *txbuf;
587 int error, i;
588
589 /*
590 * Setup DMA descriptor areas.
591 */
592 error = ixgbe_dma_tag_create(
593 /* parent */ adapter->osdep.dmat,
594 /* alignment */ 1,
595 /* bounds */ 0,
596 /* maxsize */ IXGBE_TSO_SIZE,
597 /* nsegments */ adapter->num_segs,
598 /* maxsegsize */ PAGE_SIZE,
599 /* flags */ 0,
600 &txr->txtag);
601 if (error != 0) {
602 aprint_error_dev(dev,"Unable to allocate TX DMA tag\n");
603 goto fail;
604 }
605
606 txr->tx_buffers =
607 (struct ixgbe_tx_buf *) malloc(sizeof(struct ixgbe_tx_buf) *
608 adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
609 if (txr->tx_buffers == NULL) {
610 aprint_error_dev(dev, "Unable to allocate tx_buffer memory\n");
611 error = ENOMEM;
612 goto fail;
613 }
614
615 /* Create the descriptor buffer dma maps */
616 txbuf = txr->tx_buffers;
617 for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
618 error = ixgbe_dmamap_create(txr->txtag, 0, &txbuf->map);
619 if (error != 0) {
620 aprint_error_dev(dev,
621 "Unable to create TX DMA map (%d)\n", error);
622 goto fail;
623 }
624 }
625
626 return 0;
627 fail:
628 /* We free all, it handles case where we are in the middle */
629 #if 0 /* XXX was FreeBSD */
630 ixgbe_free_transmit_structures(adapter);
631 #else
632 ixgbe_free_transmit_buffers(txr);
633 #endif
634 return (error);
635 } /* ixgbe_allocate_transmit_buffers */
636
637 /************************************************************************
638 * ixgbe_setup_transmit_ring - Initialize a transmit ring.
639 ************************************************************************/
640 static void
641 ixgbe_setup_transmit_ring(struct tx_ring *txr)
642 {
643 struct adapter *adapter = txr->adapter;
644 struct ixgbe_tx_buf *txbuf;
645 #ifdef DEV_NETMAP
646 struct netmap_adapter *na = NA(adapter->ifp);
647 struct netmap_slot *slot;
648 #endif /* DEV_NETMAP */
649
650 /* Clear the old ring contents */
651 IXGBE_TX_LOCK(txr);
652
653 #ifdef DEV_NETMAP
654 if (adapter->feat_en & IXGBE_FEATURE_NETMAP) {
655 /*
656 * (under lock): if in netmap mode, do some consistency
657 * checks and set slot to entry 0 of the netmap ring.
658 */
659 slot = netmap_reset(na, NR_TX, txr->me, 0);
660 }
661 #endif /* DEV_NETMAP */
662
663 bzero((void *)txr->tx_base,
664 (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
665 /* Reset indices */
666 txr->next_avail_desc = 0;
667 txr->next_to_clean = 0;
668
669 /* Free any existing tx buffers. */
670 txbuf = txr->tx_buffers;
671 for (int i = 0; i < txr->num_desc; i++, txbuf++) {
672 if (txbuf->m_head != NULL) {
673 bus_dmamap_sync(txr->txtag->dt_dmat, txbuf->map,
674 0, txbuf->m_head->m_pkthdr.len,
675 BUS_DMASYNC_POSTWRITE);
676 ixgbe_dmamap_unload(txr->txtag, txbuf->map);
677 m_freem(txbuf->m_head);
678 txbuf->m_head = NULL;
679 }
680
681 #ifdef DEV_NETMAP
682 /*
683 * In netmap mode, set the map for the packet buffer.
684 * NOTE: Some drivers (not this one) also need to set
685 * the physical buffer address in the NIC ring.
686 * Slots in the netmap ring (indexed by "si") are
687 * kring->nkr_hwofs positions "ahead" wrt the
688 * corresponding slot in the NIC ring. In some drivers
689 * (not here) nkr_hwofs can be negative. Function
690 * netmap_idx_n2k() handles wraparounds properly.
691 */
692 if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) && slot) {
693 int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
694 netmap_load_map(na, txr->txtag,
695 txbuf->map, NMB(na, slot + si));
696 }
697 #endif /* DEV_NETMAP */
698
699 /* Clear the EOP descriptor pointer */
700 txbuf->eop = NULL;
701 }
702
703 /* Set the rate at which we sample packets */
704 if (adapter->feat_en & IXGBE_FEATURE_FDIR)
705 txr->atr_sample = atr_sample_rate;
706
707 /* Set number of descriptors available */
708 txr->tx_avail = adapter->num_tx_desc;
709
710 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
711 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
712 IXGBE_TX_UNLOCK(txr);
713 } /* ixgbe_setup_transmit_ring */
714
715 /************************************************************************
716 * ixgbe_setup_transmit_structures - Initialize all transmit rings.
717 ************************************************************************/
718 int
719 ixgbe_setup_transmit_structures(struct adapter *adapter)
720 {
721 struct tx_ring *txr = adapter->tx_rings;
722
723 for (int i = 0; i < adapter->num_queues; i++, txr++)
724 ixgbe_setup_transmit_ring(txr);
725
726 return (0);
727 } /* ixgbe_setup_transmit_structures */
728
729 /************************************************************************
730 * ixgbe_free_transmit_structures - Free all transmit rings.
731 ************************************************************************/
732 void
733 ixgbe_free_transmit_structures(struct adapter *adapter)
734 {
735 struct tx_ring *txr = adapter->tx_rings;
736
737 for (int i = 0; i < adapter->num_queues; i++, txr++) {
738 ixgbe_free_transmit_buffers(txr);
739 ixgbe_dma_free(adapter, &txr->txdma);
740 IXGBE_TX_LOCK_DESTROY(txr);
741 }
742 free(adapter->tx_rings, M_DEVBUF);
743 } /* ixgbe_free_transmit_structures */
744
745 /************************************************************************
746 * ixgbe_free_transmit_buffers
747 *
748 * Free transmit ring related data structures.
749 ************************************************************************/
750 static void
751 ixgbe_free_transmit_buffers(struct tx_ring *txr)
752 {
753 struct adapter *adapter = txr->adapter;
754 struct ixgbe_tx_buf *tx_buffer;
755 int i;
756
757 INIT_DEBUGOUT("ixgbe_free_transmit_buffers: begin");
758
759 if (txr->tx_buffers == NULL)
760 return;
761
762 tx_buffer = txr->tx_buffers;
763 for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
764 if (tx_buffer->m_head != NULL) {
765 bus_dmamap_sync(txr->txtag->dt_dmat, tx_buffer->map,
766 0, tx_buffer->m_head->m_pkthdr.len,
767 BUS_DMASYNC_POSTWRITE);
768 ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
769 m_freem(tx_buffer->m_head);
770 tx_buffer->m_head = NULL;
771 if (tx_buffer->map != NULL) {
772 ixgbe_dmamap_destroy(txr->txtag,
773 tx_buffer->map);
774 tx_buffer->map = NULL;
775 }
776 } else if (tx_buffer->map != NULL) {
777 ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
778 ixgbe_dmamap_destroy(txr->txtag, tx_buffer->map);
779 tx_buffer->map = NULL;
780 }
781 }
782 if (txr->txr_interq != NULL) {
783 struct mbuf *m;
784
785 while ((m = pcq_get(txr->txr_interq)) != NULL)
786 m_freem(m);
787 pcq_destroy(txr->txr_interq);
788 }
789 if (txr->tx_buffers != NULL) {
790 free(txr->tx_buffers, M_DEVBUF);
791 txr->tx_buffers = NULL;
792 }
793 if (txr->txtag != NULL) {
794 ixgbe_dma_tag_destroy(txr->txtag);
795 txr->txtag = NULL;
796 }
797 } /* ixgbe_free_transmit_buffers */
798
799 /************************************************************************
800 * ixgbe_tx_ctx_setup
801 *
802 * Advanced Context Descriptor setup for VLAN, CSUM or TSO
803 ************************************************************************/
804 static int
805 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
806 u32 *cmd_type_len, u32 *olinfo_status)
807 {
808 struct adapter *adapter = txr->adapter;
809 struct ixgbe_adv_tx_context_desc *TXD;
810 struct ether_vlan_header *eh;
811 #ifdef INET
812 struct ip *ip;
813 #endif
814 #ifdef INET6
815 struct ip6_hdr *ip6;
816 #endif
817 int ehdrlen, ip_hlen = 0;
818 int offload = TRUE;
819 int ctxd = txr->next_avail_desc;
820 u32 vlan_macip_lens = 0;
821 u32 type_tucmd_mlhl = 0;
822 u16 vtag = 0;
823 u16 etype;
824 u8 ipproto = 0;
825 char *l3d;
826
827
828 /* First check if TSO is to be used */
829 if (mp->m_pkthdr.csum_flags & (M_CSUM_TSOv4 | M_CSUM_TSOv6)) {
830 int rv = ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status);
831
832 if (rv != 0)
833 ++adapter->tso_err.ev_count;
834 return rv;
835 }
836
837 if ((mp->m_pkthdr.csum_flags & M_CSUM_OFFLOAD) == 0)
838 offload = FALSE;
839
840 /* Indicate the whole packet as payload when not doing TSO */
841 *olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
842
843 /* Now ready a context descriptor */
844 TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
845
846 /*
847 * In advanced descriptors the vlan tag must
848 * be placed into the context descriptor. Hence
849 * we need to make one even if not doing offloads.
850 */
851 if (vlan_has_tag(mp)) {
852 vtag = htole16(vlan_get_tag(mp));
853 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
854 } else if (!(txr->adapter->feat_en & IXGBE_FEATURE_NEEDS_CTXD) &&
855 (offload == FALSE))
856 return (0);
857
858 /*
859 * Determine where frame payload starts.
860 * Jump over vlan headers if already present,
861 * helpful for QinQ too.
862 */
863 KASSERT(mp->m_len >= offsetof(struct ether_vlan_header, evl_tag));
864 eh = mtod(mp, struct ether_vlan_header *);
865 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
866 KASSERT(mp->m_len >= sizeof(struct ether_vlan_header));
867 etype = ntohs(eh->evl_proto);
868 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
869 } else {
870 etype = ntohs(eh->evl_encap_proto);
871 ehdrlen = ETHER_HDR_LEN;
872 }
873
874 /* Set the ether header length */
875 vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
876
877 if (offload == FALSE)
878 goto no_offloads;
879
880 /*
881 * If the first mbuf only includes the ethernet header,
882 * jump to the next one
883 * XXX: This assumes the stack splits mbufs containing headers
884 * on header boundaries
885 * XXX: And assumes the entire IP header is contained in one mbuf
886 */
887 if (mp->m_len == ehdrlen && mp->m_next)
888 l3d = mtod(mp->m_next, char *);
889 else
890 l3d = mtod(mp, char *) + ehdrlen;
891
892 switch (etype) {
893 #ifdef INET
894 case ETHERTYPE_IP:
895 ip = (struct ip *)(l3d);
896 ip_hlen = ip->ip_hl << 2;
897 ipproto = ip->ip_p;
898 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
899 KASSERT((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) == 0 ||
900 ip->ip_sum == 0);
901 break;
902 #endif
903 #ifdef INET6
904 case ETHERTYPE_IPV6:
905 ip6 = (struct ip6_hdr *)(l3d);
906 ip_hlen = sizeof(struct ip6_hdr);
907 ipproto = ip6->ip6_nxt;
908 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
909 break;
910 #endif
911 default:
912 offload = false;
913 break;
914 }
915
916 if ((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) != 0)
917 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
918
919 vlan_macip_lens |= ip_hlen;
920
921 /* No support for offloads for non-L4 next headers */
922 switch (ipproto) {
923 case IPPROTO_TCP:
924 if (mp->m_pkthdr.csum_flags &
925 (M_CSUM_TCPv4 | M_CSUM_TCPv6))
926 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
927 else
928 offload = false;
929 break;
930 case IPPROTO_UDP:
931 if (mp->m_pkthdr.csum_flags &
932 (M_CSUM_UDPv4 | M_CSUM_UDPv6))
933 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
934 else
935 offload = false;
936 break;
937 default:
938 offload = false;
939 break;
940 }
941
942 if (offload) /* Insert L4 checksum into data descriptors */
943 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
944
945 no_offloads:
946 type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
947
948 /* Now copy bits into descriptor */
949 TXD->vlan_macip_lens = htole32(vlan_macip_lens);
950 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
951 TXD->seqnum_seed = htole32(0);
952 TXD->mss_l4len_idx = htole32(0);
953
954 /* We've consumed the first desc, adjust counters */
955 if (++ctxd == txr->num_desc)
956 ctxd = 0;
957 txr->next_avail_desc = ctxd;
958 --txr->tx_avail;
959
960 return (0);
961 } /* ixgbe_tx_ctx_setup */
962
963 /************************************************************************
964 * ixgbe_tso_setup
965 *
966 * Setup work for hardware segmentation offload (TSO) on
967 * adapters using advanced tx descriptors
968 ************************************************************************/
969 static int
970 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *cmd_type_len,
971 u32 *olinfo_status)
972 {
973 struct ixgbe_adv_tx_context_desc *TXD;
974 struct ether_vlan_header *eh;
975 #ifdef INET6
976 struct ip6_hdr *ip6;
977 #endif
978 #ifdef INET
979 struct ip *ip;
980 #endif
981 struct tcphdr *th;
982 int ctxd, ehdrlen, ip_hlen, tcp_hlen;
983 u32 vlan_macip_lens = 0;
984 u32 type_tucmd_mlhl = 0;
985 u32 mss_l4len_idx = 0, paylen;
986 u16 vtag = 0, eh_type;
987
988 /*
989 * Determine where frame payload starts.
990 * Jump over vlan headers if already present
991 */
992 eh = mtod(mp, struct ether_vlan_header *);
993 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
994 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
995 eh_type = eh->evl_proto;
996 } else {
997 ehdrlen = ETHER_HDR_LEN;
998 eh_type = eh->evl_encap_proto;
999 }
1000
1001 switch (ntohs(eh_type)) {
1002 #ifdef INET
1003 case ETHERTYPE_IP:
1004 ip = (struct ip *)(mp->m_data + ehdrlen);
1005 if (ip->ip_p != IPPROTO_TCP)
1006 return (ENXIO);
1007 ip->ip_sum = 0;
1008 ip_hlen = ip->ip_hl << 2;
1009 th = (struct tcphdr *)((char *)ip + ip_hlen);
1010 th->th_sum = in_cksum_phdr(ip->ip_src.s_addr,
1011 ip->ip_dst.s_addr, htons(IPPROTO_TCP));
1012 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
1013 /* Tell transmit desc to also do IPv4 checksum. */
1014 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
1015 break;
1016 #endif
1017 #ifdef INET6
1018 case ETHERTYPE_IPV6:
1019 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
1020 /* XXX-BZ For now we do not pretend to support ext. hdrs. */
1021 if (ip6->ip6_nxt != IPPROTO_TCP)
1022 return (ENXIO);
1023 ip_hlen = sizeof(struct ip6_hdr);
1024 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
1025 th = (struct tcphdr *)((char *)ip6 + ip_hlen);
1026 th->th_sum = in6_cksum_phdr(&ip6->ip6_src,
1027 &ip6->ip6_dst, 0, htonl(IPPROTO_TCP));
1028 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
1029 break;
1030 #endif
1031 default:
1032 panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
1033 __func__, ntohs(eh_type));
1034 break;
1035 }
1036
1037 ctxd = txr->next_avail_desc;
1038 TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
1039
1040 tcp_hlen = th->th_off << 2;
1041
1042 /* This is used in the transmit desc in encap */
1043 paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
1044
1045 /* VLAN MACLEN IPLEN */
1046 if (vlan_has_tag(mp)) {
1047 vtag = htole16(vlan_get_tag(mp));
1048 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
1049 }
1050
1051 vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
1052 vlan_macip_lens |= ip_hlen;
1053 TXD->vlan_macip_lens = htole32(vlan_macip_lens);
1054
1055 /* ADV DTYPE TUCMD */
1056 type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
1057 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
1058 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
1059
1060 /* MSS L4LEN IDX */
1061 mss_l4len_idx |= (mp->m_pkthdr.segsz << IXGBE_ADVTXD_MSS_SHIFT);
1062 mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
1063 TXD->mss_l4len_idx = htole32(mss_l4len_idx);
1064
1065 TXD->seqnum_seed = htole32(0);
1066
1067 if (++ctxd == txr->num_desc)
1068 ctxd = 0;
1069
1070 txr->tx_avail--;
1071 txr->next_avail_desc = ctxd;
1072 *cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
1073 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
1074 *olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
1075 ++txr->tso_tx.ev_count;
1076
1077 return (0);
1078 } /* ixgbe_tso_setup */
1079
1080
1081 /************************************************************************
1082 * ixgbe_txeof
1083 *
1084 * Examine each tx_buffer in the used queue. If the hardware is done
1085 * processing the packet then free associated resources. The
1086 * tx_buffer is put back on the free queue.
1087 ************************************************************************/
1088 bool
1089 ixgbe_txeof(struct tx_ring *txr)
1090 {
1091 struct adapter *adapter = txr->adapter;
1092 struct ifnet *ifp = adapter->ifp;
1093 struct ixgbe_tx_buf *buf;
1094 union ixgbe_adv_tx_desc *txd;
1095 u32 work, processed = 0;
1096 u32 limit = adapter->tx_process_limit;
1097
1098 KASSERT(mutex_owned(&txr->tx_mtx));
1099
1100 #ifdef DEV_NETMAP
1101 if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) &&
1102 (adapter->ifp->if_capenable & IFCAP_NETMAP)) {
1103 struct netmap_adapter *na = NA(adapter->ifp);
1104 struct netmap_kring *kring = &na->tx_rings[txr->me];
1105 txd = txr->tx_base;
1106 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1107 BUS_DMASYNC_POSTREAD);
1108 /*
1109 * In netmap mode, all the work is done in the context
1110 * of the client thread. Interrupt handlers only wake up
1111 * clients, which may be sleeping on individual rings
1112 * or on a global resource for all rings.
1113 * To implement tx interrupt mitigation, we wake up the client
1114 * thread roughly every half ring, even if the NIC interrupts
1115 * more frequently. This is implemented as follows:
1116 * - ixgbe_txsync() sets kring->nr_kflags with the index of
1117 * the slot that should wake up the thread (nkr_num_slots
1118 * means the user thread should not be woken up);
1119 * - the driver ignores tx interrupts unless netmap_mitigate=0
1120 * or the slot has the DD bit set.
1121 */
1122 if (!netmap_mitigate ||
1123 (kring->nr_kflags < kring->nkr_num_slots &&
1124 txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD)) {
1125 netmap_tx_irq(ifp, txr->me);
1126 }
1127 return false;
1128 }
1129 #endif /* DEV_NETMAP */
1130
1131 if (txr->tx_avail == txr->num_desc) {
1132 txr->busy = 0;
1133 return false;
1134 }
1135
1136 /* Get work starting point */
1137 work = txr->next_to_clean;
1138 buf = &txr->tx_buffers[work];
1139 txd = &txr->tx_base[work];
1140 work -= txr->num_desc; /* The distance to ring end */
1141 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1142 BUS_DMASYNC_POSTREAD);
1143
1144 do {
1145 union ixgbe_adv_tx_desc *eop = buf->eop;
1146 if (eop == NULL) /* No work */
1147 break;
1148
1149 if ((eop->wb.status & IXGBE_TXD_STAT_DD) == 0)
1150 break; /* I/O not complete */
1151
1152 if (buf->m_head) {
1153 txr->bytes += buf->m_head->m_pkthdr.len;
1154 bus_dmamap_sync(txr->txtag->dt_dmat, buf->map,
1155 0, buf->m_head->m_pkthdr.len,
1156 BUS_DMASYNC_POSTWRITE);
1157 ixgbe_dmamap_unload(txr->txtag, buf->map);
1158 m_freem(buf->m_head);
1159 buf->m_head = NULL;
1160 }
1161 buf->eop = NULL;
1162 ++txr->tx_avail;
1163
1164 /* We clean the range if multi segment */
1165 while (txd != eop) {
1166 ++txd;
1167 ++buf;
1168 ++work;
1169 /* wrap the ring? */
1170 if (__predict_false(!work)) {
1171 work -= txr->num_desc;
1172 buf = txr->tx_buffers;
1173 txd = txr->tx_base;
1174 }
1175 if (buf->m_head) {
1176 txr->bytes +=
1177 buf->m_head->m_pkthdr.len;
1178 bus_dmamap_sync(txr->txtag->dt_dmat,
1179 buf->map,
1180 0, buf->m_head->m_pkthdr.len,
1181 BUS_DMASYNC_POSTWRITE);
1182 ixgbe_dmamap_unload(txr->txtag,
1183 buf->map);
1184 m_freem(buf->m_head);
1185 buf->m_head = NULL;
1186 }
1187 ++txr->tx_avail;
1188 buf->eop = NULL;
1189
1190 }
1191 ++txr->packets;
1192 ++processed;
1193 ++ifp->if_opackets;
1194
1195 /* Try the next packet */
1196 ++txd;
1197 ++buf;
1198 ++work;
1199 /* reset with a wrap */
1200 if (__predict_false(!work)) {
1201 work -= txr->num_desc;
1202 buf = txr->tx_buffers;
1203 txd = txr->tx_base;
1204 }
1205 prefetch(txd);
1206 } while (__predict_true(--limit));
1207
1208 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1209 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1210
1211 work += txr->num_desc;
1212 txr->next_to_clean = work;
1213
1214 /*
1215 * Queue Hang detection, we know there's
1216 * work outstanding or the first return
1217 * would have been taken, so increment busy
1218 * if nothing managed to get cleaned, then
1219 * in local_timer it will be checked and
1220 * marked as HUNG if it exceeds a MAX attempt.
1221 */
1222 if ((processed == 0) && (txr->busy != IXGBE_QUEUE_HUNG))
1223 ++txr->busy;
1224 /*
1225 * If anything gets cleaned we reset state to 1,
1226 * note this will turn off HUNG if its set.
1227 */
1228 if (processed)
1229 txr->busy = 1;
1230
1231 if (txr->tx_avail == txr->num_desc)
1232 txr->busy = 0;
1233
1234 return ((limit > 0) ? false : true);
1235 } /* ixgbe_txeof */
1236
1237 /************************************************************************
1238 * ixgbe_rsc_count
1239 *
1240 * Used to detect a descriptor that has been merged by Hardware RSC.
1241 ************************************************************************/
1242 static inline u32
1243 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1244 {
1245 return (le32toh(rx->wb.lower.lo_dword.data) &
1246 IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1247 } /* ixgbe_rsc_count */
1248
1249 /************************************************************************
1250 * ixgbe_setup_hw_rsc
1251 *
1252 * Initialize Hardware RSC (LRO) feature on 82599
1253 * for an RX ring, this is toggled by the LRO capability
1254 * even though it is transparent to the stack.
1255 *
1256 * NOTE: Since this HW feature only works with IPv4 and
1257 * testing has shown soft LRO to be as effective,
1258 * this feature will be disabled by default.
1259 ************************************************************************/
1260 static void
1261 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
1262 {
1263 struct adapter *adapter = rxr->adapter;
1264 struct ixgbe_hw *hw = &adapter->hw;
1265 u32 rscctrl, rdrxctl;
1266
1267 /* If turning LRO/RSC off we need to disable it */
1268 if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) {
1269 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1270 rscctrl &= ~IXGBE_RSCCTL_RSCEN;
1271 return;
1272 }
1273
1274 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
1275 rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
1276 #ifdef DEV_NETMAP
1277 /* Always strip CRC unless Netmap disabled it */
1278 if (!(adapter->feat_en & IXGBE_FEATURE_NETMAP) ||
1279 !(adapter->ifp->if_capenable & IFCAP_NETMAP) ||
1280 ix_crcstrip)
1281 #endif /* DEV_NETMAP */
1282 rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
1283 rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
1284 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
1285
1286 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1287 rscctrl |= IXGBE_RSCCTL_RSCEN;
1288 /*
1289 * Limit the total number of descriptors that
1290 * can be combined, so it does not exceed 64K
1291 */
1292 if (rxr->mbuf_sz == MCLBYTES)
1293 rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
1294 else if (rxr->mbuf_sz == MJUMPAGESIZE)
1295 rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
1296 else if (rxr->mbuf_sz == MJUM9BYTES)
1297 rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
1298 else /* Using 16K cluster */
1299 rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
1300
1301 IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
1302
1303 /* Enable TCP header recognition */
1304 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
1305 (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) | IXGBE_PSRTYPE_TCPHDR));
1306
1307 /* Disable RSC for ACK packets */
1308 IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
1309 (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
1310
1311 rxr->hw_rsc = TRUE;
1312 } /* ixgbe_setup_hw_rsc */
1313
1314 /************************************************************************
1315 * ixgbe_refresh_mbufs
1316 *
1317 * Refresh mbuf buffers for RX descriptor rings
1318 * - now keeps its own state so discards due to resource
1319 * exhaustion are unnecessary, if an mbuf cannot be obtained
1320 * it just returns, keeping its placeholder, thus it can simply
1321 * be recalled to try again.
1322 ************************************************************************/
1323 static void
1324 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
1325 {
1326 struct adapter *adapter = rxr->adapter;
1327 struct ixgbe_rx_buf *rxbuf;
1328 struct mbuf *mp;
1329 int i, j, error;
1330 bool refreshed = false;
1331
1332 i = j = rxr->next_to_refresh;
1333 /* Control the loop with one beyond */
1334 if (++j == rxr->num_desc)
1335 j = 0;
1336
1337 while (j != limit) {
1338 rxbuf = &rxr->rx_buffers[i];
1339 if (rxbuf->buf == NULL) {
1340 mp = ixgbe_getjcl(&adapter->jcl_head, M_NOWAIT,
1341 MT_DATA, M_PKTHDR, rxr->mbuf_sz);
1342 if (mp == NULL) {
1343 rxr->no_jmbuf.ev_count++;
1344 goto update;
1345 }
1346 if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
1347 m_adj(mp, ETHER_ALIGN);
1348 } else
1349 mp = rxbuf->buf;
1350
1351 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1352
1353 /* If we're dealing with an mbuf that was copied rather
1354 * than replaced, there's no need to go through busdma.
1355 */
1356 if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
1357 /* Get the memory mapping */
1358 ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
1359 error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat,
1360 rxbuf->pmap, mp, BUS_DMA_NOWAIT);
1361 if (error != 0) {
1362 printf("Refresh mbufs: payload dmamap load failure - %d\n", error);
1363 m_free(mp);
1364 rxbuf->buf = NULL;
1365 goto update;
1366 }
1367 rxbuf->buf = mp;
1368 bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
1369 0, mp->m_pkthdr.len, BUS_DMASYNC_PREREAD);
1370 rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
1371 htole64(rxbuf->pmap->dm_segs[0].ds_addr);
1372 } else {
1373 rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
1374 rxbuf->flags &= ~IXGBE_RX_COPY;
1375 }
1376
1377 refreshed = true;
1378 /* Next is precalculated */
1379 i = j;
1380 rxr->next_to_refresh = i;
1381 if (++j == rxr->num_desc)
1382 j = 0;
1383 }
1384
1385 update:
1386 if (refreshed) /* Update hardware tail index */
1387 IXGBE_WRITE_REG(&adapter->hw, rxr->tail, rxr->next_to_refresh);
1388
1389 return;
1390 } /* ixgbe_refresh_mbufs */
1391
1392 /************************************************************************
1393 * ixgbe_allocate_receive_buffers
1394 *
1395 * Allocate memory for rx_buffer structures. Since we use one
1396 * rx_buffer per received packet, the maximum number of rx_buffer's
1397 * that we'll need is equal to the number of receive descriptors
1398 * that we've allocated.
1399 ************************************************************************/
1400 static int
1401 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
1402 {
1403 struct adapter *adapter = rxr->adapter;
1404 device_t dev = adapter->dev;
1405 struct ixgbe_rx_buf *rxbuf;
1406 int bsize, error;
1407
1408 bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
1409 rxr->rx_buffers = (struct ixgbe_rx_buf *)malloc(bsize, M_DEVBUF,
1410 M_NOWAIT | M_ZERO);
1411 if (rxr->rx_buffers == NULL) {
1412 aprint_error_dev(dev, "Unable to allocate rx_buffer memory\n");
1413 error = ENOMEM;
1414 goto fail;
1415 }
1416
1417 error = ixgbe_dma_tag_create(
1418 /* parent */ adapter->osdep.dmat,
1419 /* alignment */ 1,
1420 /* bounds */ 0,
1421 /* maxsize */ MJUM16BYTES,
1422 /* nsegments */ 1,
1423 /* maxsegsize */ MJUM16BYTES,
1424 /* flags */ 0,
1425 &rxr->ptag);
1426 if (error != 0) {
1427 aprint_error_dev(dev, "Unable to create RX DMA tag\n");
1428 goto fail;
1429 }
1430
1431 for (int i = 0; i < rxr->num_desc; i++, rxbuf++) {
1432 rxbuf = &rxr->rx_buffers[i];
1433 error = ixgbe_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
1434 if (error) {
1435 aprint_error_dev(dev, "Unable to create RX dma map\n");
1436 goto fail;
1437 }
1438 }
1439
1440 return (0);
1441
1442 fail:
1443 /* Frees all, but can handle partial completion */
1444 ixgbe_free_receive_structures(adapter);
1445
1446 return (error);
1447 } /* ixgbe_allocate_receive_buffers */
1448
1449 /************************************************************************
1450 * ixgbe_free_receive_ring
1451 ************************************************************************/
1452 static void
1453 ixgbe_free_receive_ring(struct rx_ring *rxr)
1454 {
1455 for (int i = 0; i < rxr->num_desc; i++) {
1456 ixgbe_rx_discard(rxr, i);
1457 }
1458 } /* ixgbe_free_receive_ring */
1459
1460 /************************************************************************
1461 * ixgbe_setup_receive_ring
1462 *
1463 * Initialize a receive ring and its buffers.
1464 ************************************************************************/
1465 static int
1466 ixgbe_setup_receive_ring(struct rx_ring *rxr)
1467 {
1468 struct adapter *adapter;
1469 struct ixgbe_rx_buf *rxbuf;
1470 #ifdef LRO
1471 struct ifnet *ifp;
1472 struct lro_ctrl *lro = &rxr->lro;
1473 #endif /* LRO */
1474 #ifdef DEV_NETMAP
1475 struct netmap_adapter *na = NA(rxr->adapter->ifp);
1476 struct netmap_slot *slot;
1477 #endif /* DEV_NETMAP */
1478 int rsize, error = 0;
1479
1480 adapter = rxr->adapter;
1481 #ifdef LRO
1482 ifp = adapter->ifp;
1483 #endif /* LRO */
1484
1485 /* Clear the ring contents */
1486 IXGBE_RX_LOCK(rxr);
1487
1488 #ifdef DEV_NETMAP
1489 if (adapter->feat_en & IXGBE_FEATURE_NETMAP)
1490 slot = netmap_reset(na, NR_RX, rxr->me, 0);
1491 #endif /* DEV_NETMAP */
1492
1493 rsize = roundup2(adapter->num_rx_desc *
1494 sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
1495 bzero((void *)rxr->rx_base, rsize);
1496 /* Cache the size */
1497 rxr->mbuf_sz = adapter->rx_mbuf_sz;
1498
1499 /* Free current RX buffer structs and their mbufs */
1500 ixgbe_free_receive_ring(rxr);
1501
1502 /* Now replenish the mbufs */
1503 for (int j = 0; j != rxr->num_desc; ++j) {
1504 struct mbuf *mp;
1505
1506 rxbuf = &rxr->rx_buffers[j];
1507
1508 #ifdef DEV_NETMAP
1509 /*
1510 * In netmap mode, fill the map and set the buffer
1511 * address in the NIC ring, considering the offset
1512 * between the netmap and NIC rings (see comment in
1513 * ixgbe_setup_transmit_ring() ). No need to allocate
1514 * an mbuf, so end the block with a continue;
1515 */
1516 if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) && slot) {
1517 int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
1518 uint64_t paddr;
1519 void *addr;
1520
1521 addr = PNMB(na, slot + sj, &paddr);
1522 netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
1523 /* Update descriptor and the cached value */
1524 rxr->rx_base[j].read.pkt_addr = htole64(paddr);
1525 rxbuf->addr = htole64(paddr);
1526 continue;
1527 }
1528 #endif /* DEV_NETMAP */
1529
1530 rxbuf->flags = 0;
1531 rxbuf->buf = ixgbe_getjcl(&adapter->jcl_head, M_NOWAIT,
1532 MT_DATA, M_PKTHDR, adapter->rx_mbuf_sz);
1533 if (rxbuf->buf == NULL) {
1534 error = ENOBUFS;
1535 goto fail;
1536 }
1537 mp = rxbuf->buf;
1538 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1539 /* Get the memory mapping */
1540 error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat, rxbuf->pmap,
1541 mp, BUS_DMA_NOWAIT);
1542 if (error != 0)
1543 goto fail;
1544 bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
1545 0, adapter->rx_mbuf_sz, BUS_DMASYNC_PREREAD);
1546 /* Update the descriptor and the cached value */
1547 rxr->rx_base[j].read.pkt_addr =
1548 htole64(rxbuf->pmap->dm_segs[0].ds_addr);
1549 rxbuf->addr = htole64(rxbuf->pmap->dm_segs[0].ds_addr);
1550 }
1551
1552
1553 /* Setup our descriptor indices */
1554 rxr->next_to_check = 0;
1555 rxr->next_to_refresh = 0;
1556 rxr->lro_enabled = FALSE;
1557 rxr->rx_copies.ev_count = 0;
1558 #if 0 /* NetBSD */
1559 rxr->rx_bytes.ev_count = 0;
1560 #if 1 /* Fix inconsistency */
1561 rxr->rx_packets.ev_count = 0;
1562 #endif
1563 #endif
1564 rxr->vtag_strip = FALSE;
1565
1566 ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1567 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1568
1569 /*
1570 * Now set up the LRO interface
1571 */
1572 if (ixgbe_rsc_enable)
1573 ixgbe_setup_hw_rsc(rxr);
1574 #ifdef LRO
1575 else if (ifp->if_capenable & IFCAP_LRO) {
1576 device_t dev = adapter->dev;
1577 int err = tcp_lro_init(lro);
1578 if (err) {
1579 device_printf(dev, "LRO Initialization failed!\n");
1580 goto fail;
1581 }
1582 INIT_DEBUGOUT("RX Soft LRO Initialized\n");
1583 rxr->lro_enabled = TRUE;
1584 lro->ifp = adapter->ifp;
1585 }
1586 #endif /* LRO */
1587
1588 IXGBE_RX_UNLOCK(rxr);
1589
1590 return (0);
1591
1592 fail:
1593 ixgbe_free_receive_ring(rxr);
1594 IXGBE_RX_UNLOCK(rxr);
1595
1596 return (error);
1597 } /* ixgbe_setup_receive_ring */
1598
1599 /************************************************************************
1600 * ixgbe_setup_receive_structures - Initialize all receive rings.
1601 ************************************************************************/
1602 int
1603 ixgbe_setup_receive_structures(struct adapter *adapter)
1604 {
1605 struct rx_ring *rxr = adapter->rx_rings;
1606 int j;
1607
1608 /*
1609 * Now reinitialize our supply of jumbo mbufs. The number
1610 * or size of jumbo mbufs may have changed.
1611 * Assume all of rxr->ptag are the same.
1612 */
1613 ixgbe_jcl_reinit(adapter, rxr->ptag->dt_dmat,
1614 (2 * adapter->num_rx_desc) * adapter->num_queues,
1615 adapter->rx_mbuf_sz);
1616
1617 for (j = 0; j < adapter->num_queues; j++, rxr++)
1618 if (ixgbe_setup_receive_ring(rxr))
1619 goto fail;
1620
1621 return (0);
1622 fail:
1623 /*
1624 * Free RX buffers allocated so far, we will only handle
1625 * the rings that completed, the failing case will have
1626 * cleaned up for itself. 'j' failed, so its the terminus.
1627 */
1628 for (int i = 0; i < j; ++i) {
1629 rxr = &adapter->rx_rings[i];
1630 IXGBE_RX_LOCK(rxr);
1631 ixgbe_free_receive_ring(rxr);
1632 IXGBE_RX_UNLOCK(rxr);
1633 }
1634
1635 return (ENOBUFS);
1636 } /* ixgbe_setup_receive_structures */
1637
1638
1639 /************************************************************************
1640 * ixgbe_free_receive_structures - Free all receive rings.
1641 ************************************************************************/
1642 void
1643 ixgbe_free_receive_structures(struct adapter *adapter)
1644 {
1645 struct rx_ring *rxr = adapter->rx_rings;
1646
1647 INIT_DEBUGOUT("ixgbe_free_receive_structures: begin");
1648
1649 for (int i = 0; i < adapter->num_queues; i++, rxr++) {
1650 ixgbe_free_receive_buffers(rxr);
1651 #ifdef LRO
1652 /* Free LRO memory */
1653 tcp_lro_free(&rxr->lro);
1654 #endif /* LRO */
1655 /* Free the ring memory as well */
1656 ixgbe_dma_free(adapter, &rxr->rxdma);
1657 IXGBE_RX_LOCK_DESTROY(rxr);
1658 }
1659
1660 free(adapter->rx_rings, M_DEVBUF);
1661 } /* ixgbe_free_receive_structures */
1662
1663
1664 /************************************************************************
1665 * ixgbe_free_receive_buffers - Free receive ring data structures
1666 ************************************************************************/
1667 static void
1668 ixgbe_free_receive_buffers(struct rx_ring *rxr)
1669 {
1670 struct adapter *adapter = rxr->adapter;
1671 struct ixgbe_rx_buf *rxbuf;
1672
1673 INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin");
1674
1675 /* Cleanup any existing buffers */
1676 if (rxr->rx_buffers != NULL) {
1677 for (int i = 0; i < adapter->num_rx_desc; i++) {
1678 rxbuf = &rxr->rx_buffers[i];
1679 ixgbe_rx_discard(rxr, i);
1680 if (rxbuf->pmap != NULL) {
1681 ixgbe_dmamap_destroy(rxr->ptag, rxbuf->pmap);
1682 rxbuf->pmap = NULL;
1683 }
1684 }
1685 if (rxr->rx_buffers != NULL) {
1686 free(rxr->rx_buffers, M_DEVBUF);
1687 rxr->rx_buffers = NULL;
1688 }
1689 }
1690
1691 if (rxr->ptag != NULL) {
1692 ixgbe_dma_tag_destroy(rxr->ptag);
1693 rxr->ptag = NULL;
1694 }
1695
1696 return;
1697 } /* ixgbe_free_receive_buffers */
1698
1699 /************************************************************************
1700 * ixgbe_rx_input
1701 ************************************************************************/
1702 static __inline void
1703 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m,
1704 u32 ptype)
1705 {
1706 struct adapter *adapter = ifp->if_softc;
1707
1708 #ifdef LRO
1709 struct ethercom *ec = &adapter->osdep.ec;
1710
1711 /*
1712 * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
1713 * should be computed by hardware. Also it should not have VLAN tag in
1714 * ethernet header. In case of IPv6 we do not yet support ext. hdrs.
1715 */
1716 if (rxr->lro_enabled &&
1717 (ec->ec_capenable & ETHERCAP_VLAN_HWTAGGING) != 0 &&
1718 (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
1719 ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1720 (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
1721 (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1722 (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
1723 (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
1724 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
1725 /*
1726 * Send to the stack if:
1727 ** - LRO not enabled, or
1728 ** - no LRO resources, or
1729 ** - lro enqueue fails
1730 */
1731 if (rxr->lro.lro_cnt != 0)
1732 if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
1733 return;
1734 }
1735 #endif /* LRO */
1736
1737 if_percpuq_enqueue(adapter->ipq, m);
1738 } /* ixgbe_rx_input */
1739
1740 /************************************************************************
1741 * ixgbe_rx_discard
1742 ************************************************************************/
1743 static __inline void
1744 ixgbe_rx_discard(struct rx_ring *rxr, int i)
1745 {
1746 struct ixgbe_rx_buf *rbuf;
1747
1748 rbuf = &rxr->rx_buffers[i];
1749
1750 /*
1751 * With advanced descriptors the writeback
1752 * clobbers the buffer addrs, so its easier
1753 * to just free the existing mbufs and take
1754 * the normal refresh path to get new buffers
1755 * and mapping.
1756 */
1757
1758 if (rbuf->fmp != NULL) {/* Partial chain ? */
1759 bus_dmamap_sync(rxr->ptag->dt_dmat, rbuf->pmap, 0,
1760 rbuf->buf->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
1761 m_freem(rbuf->fmp);
1762 rbuf->fmp = NULL;
1763 rbuf->buf = NULL; /* rbuf->buf is part of fmp's chain */
1764 } else if (rbuf->buf) {
1765 bus_dmamap_sync(rxr->ptag->dt_dmat, rbuf->pmap, 0,
1766 rbuf->buf->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
1767 m_free(rbuf->buf);
1768 rbuf->buf = NULL;
1769 }
1770 ixgbe_dmamap_unload(rxr->ptag, rbuf->pmap);
1771
1772 rbuf->flags = 0;
1773
1774 return;
1775 } /* ixgbe_rx_discard */
1776
1777
1778 /************************************************************************
1779 * ixgbe_rxeof
1780 *
1781 * Executes in interrupt context. It replenishes the
1782 * mbufs in the descriptor and sends data which has
1783 * been dma'ed into host memory to upper layer.
1784 *
1785 * Return TRUE for more work, FALSE for all clean.
1786 ************************************************************************/
1787 bool
1788 ixgbe_rxeof(struct ix_queue *que)
1789 {
1790 struct adapter *adapter = que->adapter;
1791 struct rx_ring *rxr = que->rxr;
1792 struct ifnet *ifp = adapter->ifp;
1793 #ifdef LRO
1794 struct lro_ctrl *lro = &rxr->lro;
1795 #endif /* LRO */
1796 union ixgbe_adv_rx_desc *cur;
1797 struct ixgbe_rx_buf *rbuf, *nbuf;
1798 int i, nextp, processed = 0;
1799 u32 staterr = 0;
1800 u32 count = adapter->rx_process_limit;
1801 #ifdef RSS
1802 u16 pkt_info;
1803 #endif
1804
1805 IXGBE_RX_LOCK(rxr);
1806
1807 #ifdef DEV_NETMAP
1808 if (adapter->feat_en & IXGBE_FEATURE_NETMAP) {
1809 /* Same as the txeof routine: wakeup clients on intr. */
1810 if (netmap_rx_irq(ifp, rxr->me, &processed)) {
1811 IXGBE_RX_UNLOCK(rxr);
1812 return (FALSE);
1813 }
1814 }
1815 #endif /* DEV_NETMAP */
1816
1817 for (i = rxr->next_to_check; count != 0;) {
1818 struct mbuf *sendmp, *mp;
1819 u32 rsc, ptype;
1820 u16 len;
1821 u16 vtag = 0;
1822 bool eop;
1823
1824 /* Sync the ring. */
1825 ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1826 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1827
1828 cur = &rxr->rx_base[i];
1829 staterr = le32toh(cur->wb.upper.status_error);
1830 #ifdef RSS
1831 pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
1832 #endif
1833
1834 if ((staterr & IXGBE_RXD_STAT_DD) == 0)
1835 break;
1836 if ((ifp->if_flags & IFF_RUNNING) == 0)
1837 break;
1838
1839 count--;
1840 sendmp = NULL;
1841 nbuf = NULL;
1842 rsc = 0;
1843 cur->wb.upper.status_error = 0;
1844 rbuf = &rxr->rx_buffers[i];
1845 mp = rbuf->buf;
1846
1847 len = le16toh(cur->wb.upper.length);
1848 ptype = le32toh(cur->wb.lower.lo_dword.data) &
1849 IXGBE_RXDADV_PKTTYPE_MASK;
1850 eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
1851
1852 /* Make sure bad packets are discarded */
1853 if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) {
1854 #if __FreeBSD_version >= 1100036
1855 if (adapter->feat_en & IXGBE_FEATURE_VF)
1856 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
1857 #endif
1858 rxr->rx_discarded.ev_count++;
1859 ixgbe_rx_discard(rxr, i);
1860 goto next_desc;
1861 }
1862
1863 bus_dmamap_sync(rxr->ptag->dt_dmat, rbuf->pmap, 0,
1864 rbuf->buf->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
1865
1866 /*
1867 * On 82599 which supports a hardware
1868 * LRO (called HW RSC), packets need
1869 * not be fragmented across sequential
1870 * descriptors, rather the next descriptor
1871 * is indicated in bits of the descriptor.
1872 * This also means that we might proceses
1873 * more than one packet at a time, something
1874 * that has never been true before, it
1875 * required eliminating global chain pointers
1876 * in favor of what we are doing here. -jfv
1877 */
1878 if (!eop) {
1879 /*
1880 * Figure out the next descriptor
1881 * of this frame.
1882 */
1883 if (rxr->hw_rsc == TRUE) {
1884 rsc = ixgbe_rsc_count(cur);
1885 rxr->rsc_num += (rsc - 1);
1886 }
1887 if (rsc) { /* Get hardware index */
1888 nextp = ((staterr & IXGBE_RXDADV_NEXTP_MASK) >>
1889 IXGBE_RXDADV_NEXTP_SHIFT);
1890 } else { /* Just sequential */
1891 nextp = i + 1;
1892 if (nextp == adapter->num_rx_desc)
1893 nextp = 0;
1894 }
1895 nbuf = &rxr->rx_buffers[nextp];
1896 prefetch(nbuf);
1897 }
1898 /*
1899 * Rather than using the fmp/lmp global pointers
1900 * we now keep the head of a packet chain in the
1901 * buffer struct and pass this along from one
1902 * descriptor to the next, until we get EOP.
1903 */
1904 mp->m_len = len;
1905 /*
1906 * See if there is a stored head
1907 * that determines what we are
1908 */
1909 sendmp = rbuf->fmp;
1910 if (sendmp != NULL) { /* secondary frag */
1911 rbuf->buf = rbuf->fmp = NULL;
1912 mp->m_flags &= ~M_PKTHDR;
1913 sendmp->m_pkthdr.len += mp->m_len;
1914 } else {
1915 /*
1916 * Optimize. This might be a small packet,
1917 * maybe just a TCP ACK. Do a fast copy that
1918 * is cache aligned into a new mbuf, and
1919 * leave the old mbuf+cluster for re-use.
1920 */
1921 if (eop && len <= IXGBE_RX_COPY_LEN) {
1922 sendmp = m_gethdr(M_NOWAIT, MT_DATA);
1923 if (sendmp != NULL) {
1924 sendmp->m_data += IXGBE_RX_COPY_ALIGN;
1925 ixgbe_bcopy(mp->m_data, sendmp->m_data,
1926 len);
1927 sendmp->m_len = len;
1928 rxr->rx_copies.ev_count++;
1929 rbuf->flags |= IXGBE_RX_COPY;
1930 }
1931 }
1932 if (sendmp == NULL) {
1933 rbuf->buf = rbuf->fmp = NULL;
1934 sendmp = mp;
1935 }
1936
1937 /* first desc of a non-ps chain */
1938 sendmp->m_flags |= M_PKTHDR;
1939 sendmp->m_pkthdr.len = mp->m_len;
1940 }
1941 ++processed;
1942
1943 /* Pass the head pointer on */
1944 if (eop == 0) {
1945 nbuf->fmp = sendmp;
1946 sendmp = NULL;
1947 mp->m_next = nbuf->buf;
1948 } else { /* Sending this frame */
1949 m_set_rcvif(sendmp, ifp);
1950 ++rxr->packets;
1951 rxr->rx_packets.ev_count++;
1952 /* capture data for AIM */
1953 rxr->bytes += sendmp->m_pkthdr.len;
1954 rxr->rx_bytes.ev_count += sendmp->m_pkthdr.len;
1955 /* Process vlan info */
1956 if ((rxr->vtag_strip) && (staterr & IXGBE_RXD_STAT_VP))
1957 vtag = le16toh(cur->wb.upper.vlan);
1958 if (vtag) {
1959 vlan_set_tag(sendmp, vtag);
1960 }
1961 if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) {
1962 ixgbe_rx_checksum(staterr, sendmp, ptype,
1963 &adapter->stats.pf);
1964 }
1965
1966 #if 0 /* FreeBSD */
1967 /*
1968 * In case of multiqueue, we have RXCSUM.PCSD bit set
1969 * and never cleared. This means we have RSS hash
1970 * available to be used.
1971 */
1972 if (adapter->num_queues > 1) {
1973 sendmp->m_pkthdr.flowid =
1974 le32toh(cur->wb.lower.hi_dword.rss);
1975 switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) {
1976 case IXGBE_RXDADV_RSSTYPE_IPV4:
1977 M_HASHTYPE_SET(sendmp,
1978 M_HASHTYPE_RSS_IPV4);
1979 break;
1980 case IXGBE_RXDADV_RSSTYPE_IPV4_TCP:
1981 M_HASHTYPE_SET(sendmp,
1982 M_HASHTYPE_RSS_TCP_IPV4);
1983 break;
1984 case IXGBE_RXDADV_RSSTYPE_IPV6:
1985 M_HASHTYPE_SET(sendmp,
1986 M_HASHTYPE_RSS_IPV6);
1987 break;
1988 case IXGBE_RXDADV_RSSTYPE_IPV6_TCP:
1989 M_HASHTYPE_SET(sendmp,
1990 M_HASHTYPE_RSS_TCP_IPV6);
1991 break;
1992 case IXGBE_RXDADV_RSSTYPE_IPV6_EX:
1993 M_HASHTYPE_SET(sendmp,
1994 M_HASHTYPE_RSS_IPV6_EX);
1995 break;
1996 case IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX:
1997 M_HASHTYPE_SET(sendmp,
1998 M_HASHTYPE_RSS_TCP_IPV6_EX);
1999 break;
2000 #if __FreeBSD_version > 1100000
2001 case IXGBE_RXDADV_RSSTYPE_IPV4_UDP:
2002 M_HASHTYPE_SET(sendmp,
2003 M_HASHTYPE_RSS_UDP_IPV4);
2004 break;
2005 case IXGBE_RXDADV_RSSTYPE_IPV6_UDP:
2006 M_HASHTYPE_SET(sendmp,
2007 M_HASHTYPE_RSS_UDP_IPV6);
2008 break;
2009 case IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX:
2010 M_HASHTYPE_SET(sendmp,
2011 M_HASHTYPE_RSS_UDP_IPV6_EX);
2012 break;
2013 #endif
2014 default:
2015 M_HASHTYPE_SET(sendmp,
2016 M_HASHTYPE_OPAQUE_HASH);
2017 }
2018 } else {
2019 sendmp->m_pkthdr.flowid = que->msix;
2020 M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
2021 }
2022 #endif
2023 }
2024 next_desc:
2025 ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
2026 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2027
2028 /* Advance our pointers to the next descriptor. */
2029 if (++i == rxr->num_desc)
2030 i = 0;
2031
2032 /* Now send to the stack or do LRO */
2033 if (sendmp != NULL) {
2034 rxr->next_to_check = i;
2035 IXGBE_RX_UNLOCK(rxr);
2036 ixgbe_rx_input(rxr, ifp, sendmp, ptype);
2037 IXGBE_RX_LOCK(rxr);
2038 i = rxr->next_to_check;
2039 }
2040
2041 /* Every 8 descriptors we go to refresh mbufs */
2042 if (processed == 8) {
2043 ixgbe_refresh_mbufs(rxr, i);
2044 processed = 0;
2045 }
2046 }
2047
2048 /* Refresh any remaining buf structs */
2049 if (ixgbe_rx_unrefreshed(rxr))
2050 ixgbe_refresh_mbufs(rxr, i);
2051
2052 rxr->next_to_check = i;
2053
2054 IXGBE_RX_UNLOCK(rxr);
2055
2056 #ifdef LRO
2057 /*
2058 * Flush any outstanding LRO work
2059 */
2060 tcp_lro_flush_all(lro);
2061 #endif /* LRO */
2062
2063 /*
2064 * Still have cleaning to do?
2065 */
2066 if ((staterr & IXGBE_RXD_STAT_DD) != 0)
2067 return (TRUE);
2068
2069 return (FALSE);
2070 } /* ixgbe_rxeof */
2071
2072
2073 /************************************************************************
2074 * ixgbe_rx_checksum
2075 *
2076 * Verify that the hardware indicated that the checksum is valid.
2077 * Inform the stack about the status of checksum so that stack
2078 * doesn't spend time verifying the checksum.
2079 ************************************************************************/
2080 static void
2081 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype,
2082 struct ixgbe_hw_stats *stats)
2083 {
2084 u16 status = (u16)staterr;
2085 u8 errors = (u8)(staterr >> 24);
2086 #if 0
2087 bool sctp = false;
2088
2089 if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
2090 (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
2091 sctp = true;
2092 #endif
2093
2094 /* IPv4 checksum */
2095 if (status & IXGBE_RXD_STAT_IPCS) {
2096 stats->ipcs.ev_count++;
2097 if (!(errors & IXGBE_RXD_ERR_IPE)) {
2098 /* IP Checksum Good */
2099 mp->m_pkthdr.csum_flags = M_CSUM_IPv4;
2100 } else {
2101 stats->ipcs_bad.ev_count++;
2102 mp->m_pkthdr.csum_flags = M_CSUM_IPv4|M_CSUM_IPv4_BAD;
2103 }
2104 }
2105 /* TCP/UDP/SCTP checksum */
2106 if (status & IXGBE_RXD_STAT_L4CS) {
2107 stats->l4cs.ev_count++;
2108 int type = M_CSUM_TCPv4|M_CSUM_TCPv6|M_CSUM_UDPv4|M_CSUM_UDPv6;
2109 if (!(errors & IXGBE_RXD_ERR_TCPE)) {
2110 mp->m_pkthdr.csum_flags |= type;
2111 } else {
2112 stats->l4cs_bad.ev_count++;
2113 mp->m_pkthdr.csum_flags |= type | M_CSUM_TCP_UDP_BAD;
2114 }
2115 }
2116 } /* ixgbe_rx_checksum */
2117
2118 /************************************************************************
2119 * ixgbe_dma_malloc
2120 ************************************************************************/
2121 int
2122 ixgbe_dma_malloc(struct adapter *adapter, const bus_size_t size,
2123 struct ixgbe_dma_alloc *dma, const int mapflags)
2124 {
2125 device_t dev = adapter->dev;
2126 int r, rsegs;
2127
2128 r = ixgbe_dma_tag_create(
2129 /* parent */ adapter->osdep.dmat,
2130 /* alignment */ DBA_ALIGN,
2131 /* bounds */ 0,
2132 /* maxsize */ size,
2133 /* nsegments */ 1,
2134 /* maxsegsize */ size,
2135 /* flags */ BUS_DMA_ALLOCNOW,
2136 &dma->dma_tag);
2137 if (r != 0) {
2138 aprint_error_dev(dev,
2139 "%s: ixgbe_dma_tag_create failed; error %d\n", __func__,
2140 r);
2141 goto fail_0;
2142 }
2143
2144 r = bus_dmamem_alloc(dma->dma_tag->dt_dmat, size,
2145 dma->dma_tag->dt_alignment, dma->dma_tag->dt_boundary,
2146 &dma->dma_seg, 1, &rsegs, BUS_DMA_NOWAIT);
2147 if (r != 0) {
2148 aprint_error_dev(dev,
2149 "%s: bus_dmamem_alloc failed; error %d\n", __func__, r);
2150 goto fail_1;
2151 }
2152
2153 r = bus_dmamem_map(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs,
2154 size, &dma->dma_vaddr, BUS_DMA_NOWAIT);
2155 if (r != 0) {
2156 aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
2157 __func__, r);
2158 goto fail_2;
2159 }
2160
2161 r = ixgbe_dmamap_create(dma->dma_tag, 0, &dma->dma_map);
2162 if (r != 0) {
2163 aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
2164 __func__, r);
2165 goto fail_3;
2166 }
2167
2168 r = bus_dmamap_load(dma->dma_tag->dt_dmat, dma->dma_map,
2169 dma->dma_vaddr, size, NULL, mapflags | BUS_DMA_NOWAIT);
2170 if (r != 0) {
2171 aprint_error_dev(dev, "%s: bus_dmamap_load failed; error %d\n",
2172 __func__, r);
2173 goto fail_4;
2174 }
2175 dma->dma_paddr = dma->dma_map->dm_segs[0].ds_addr;
2176 dma->dma_size = size;
2177 return 0;
2178 fail_4:
2179 ixgbe_dmamap_destroy(dma->dma_tag, dma->dma_map);
2180 fail_3:
2181 bus_dmamem_unmap(dma->dma_tag->dt_dmat, dma->dma_vaddr, size);
2182 fail_2:
2183 bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs);
2184 fail_1:
2185 ixgbe_dma_tag_destroy(dma->dma_tag);
2186 fail_0:
2187
2188 return (r);
2189 } /* ixgbe_dma_malloc */
2190
2191 /************************************************************************
2192 * ixgbe_dma_free
2193 ************************************************************************/
2194 void
2195 ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma)
2196 {
2197 bus_dmamap_sync(dma->dma_tag->dt_dmat, dma->dma_map, 0, dma->dma_size,
2198 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2199 ixgbe_dmamap_unload(dma->dma_tag, dma->dma_map);
2200 bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, 1);
2201 ixgbe_dma_tag_destroy(dma->dma_tag);
2202 } /* ixgbe_dma_free */
2203
2204
2205 /************************************************************************
2206 * ixgbe_allocate_queues
2207 *
2208 * Allocate memory for the transmit and receive rings, and then
2209 * the descriptors associated with each, called only once at attach.
2210 ************************************************************************/
2211 int
2212 ixgbe_allocate_queues(struct adapter *adapter)
2213 {
2214 device_t dev = adapter->dev;
2215 struct ix_queue *que;
2216 struct tx_ring *txr;
2217 struct rx_ring *rxr;
2218 int rsize, tsize, error = IXGBE_SUCCESS;
2219 int txconf = 0, rxconf = 0;
2220
2221 /* First, allocate the top level queue structs */
2222 adapter->queues = (struct ix_queue *)malloc(sizeof(struct ix_queue) *
2223 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO);
2224 if (adapter->queues == NULL) {
2225 aprint_error_dev(dev, "Unable to allocate queue memory\n");
2226 error = ENOMEM;
2227 goto fail;
2228 }
2229
2230 /* Second, allocate the TX ring struct memory */
2231 adapter->tx_rings = (struct tx_ring *)malloc(sizeof(struct tx_ring) *
2232 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO);
2233 if (adapter->tx_rings == NULL) {
2234 aprint_error_dev(dev, "Unable to allocate TX ring memory\n");
2235 error = ENOMEM;
2236 goto tx_fail;
2237 }
2238
2239 /* Third, allocate the RX ring */
2240 adapter->rx_rings = (struct rx_ring *)malloc(sizeof(struct rx_ring) *
2241 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO);
2242 if (adapter->rx_rings == NULL) {
2243 aprint_error_dev(dev, "Unable to allocate RX ring memory\n");
2244 error = ENOMEM;
2245 goto rx_fail;
2246 }
2247
2248 /* For the ring itself */
2249 tsize = roundup2(adapter->num_tx_desc * sizeof(union ixgbe_adv_tx_desc),
2250 DBA_ALIGN);
2251
2252 /*
2253 * Now set up the TX queues, txconf is needed to handle the
2254 * possibility that things fail midcourse and we need to
2255 * undo memory gracefully
2256 */
2257 for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2258 /* Set up some basics */
2259 txr = &adapter->tx_rings[i];
2260 txr->adapter = adapter;
2261 txr->txr_interq = NULL;
2262 /* In case SR-IOV is enabled, align the index properly */
2263 #ifdef PCI_IOV
2264 txr->me = ixgbe_vf_que_index(adapter->iov_mode, adapter->pool,
2265 i);
2266 #else
2267 txr->me = i;
2268 #endif
2269 txr->num_desc = adapter->num_tx_desc;
2270
2271 /* Initialize the TX side lock */
2272 mutex_init(&txr->tx_mtx, MUTEX_DEFAULT, IPL_NET);
2273
2274 if (ixgbe_dma_malloc(adapter, tsize, &txr->txdma,
2275 BUS_DMA_NOWAIT)) {
2276 aprint_error_dev(dev,
2277 "Unable to allocate TX Descriptor memory\n");
2278 error = ENOMEM;
2279 goto err_tx_desc;
2280 }
2281 txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
2282 bzero((void *)txr->tx_base, tsize);
2283
2284 /* Now allocate transmit buffers for the ring */
2285 if (ixgbe_allocate_transmit_buffers(txr)) {
2286 aprint_error_dev(dev,
2287 "Critical Failure setting up transmit buffers\n");
2288 error = ENOMEM;
2289 goto err_tx_desc;
2290 }
2291 if (!(adapter->feat_en & IXGBE_FEATURE_LEGACY_TX)) {
2292 /* Allocate a buf ring */
2293 txr->txr_interq = pcq_create(IXGBE_BR_SIZE, KM_SLEEP);
2294 if (txr->txr_interq == NULL) {
2295 aprint_error_dev(dev,
2296 "Critical Failure setting up buf ring\n");
2297 error = ENOMEM;
2298 goto err_tx_desc;
2299 }
2300 }
2301 }
2302
2303 /*
2304 * Next the RX queues...
2305 */
2306 rsize = roundup2(adapter->num_rx_desc * sizeof(union ixgbe_adv_rx_desc),
2307 DBA_ALIGN);
2308 for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2309 rxr = &adapter->rx_rings[i];
2310 /* Set up some basics */
2311 rxr->adapter = adapter;
2312 #ifdef PCI_IOV
2313 /* In case SR-IOV is enabled, align the index properly */
2314 rxr->me = ixgbe_vf_que_index(adapter->iov_mode, adapter->pool,
2315 i);
2316 #else
2317 rxr->me = i;
2318 #endif
2319 rxr->num_desc = adapter->num_rx_desc;
2320
2321 /* Initialize the RX side lock */
2322 mutex_init(&rxr->rx_mtx, MUTEX_DEFAULT, IPL_NET);
2323
2324 if (ixgbe_dma_malloc(adapter, rsize, &rxr->rxdma,
2325 BUS_DMA_NOWAIT)) {
2326 aprint_error_dev(dev,
2327 "Unable to allocate RxDescriptor memory\n");
2328 error = ENOMEM;
2329 goto err_rx_desc;
2330 }
2331 rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2332 bzero((void *)rxr->rx_base, rsize);
2333
2334 /* Allocate receive buffers for the ring */
2335 if (ixgbe_allocate_receive_buffers(rxr)) {
2336 aprint_error_dev(dev,
2337 "Critical Failure setting up receive buffers\n");
2338 error = ENOMEM;
2339 goto err_rx_desc;
2340 }
2341 }
2342
2343 /*
2344 * Finally set up the queue holding structs
2345 */
2346 for (int i = 0; i < adapter->num_queues; i++) {
2347 que = &adapter->queues[i];
2348 que->adapter = adapter;
2349 que->me = i;
2350 que->txr = &adapter->tx_rings[i];
2351 que->rxr = &adapter->rx_rings[i];
2352
2353 mutex_init(&que->dc_mtx, MUTEX_DEFAULT, IPL_NET);
2354 que->disabled_count = 0;
2355 }
2356
2357 return (0);
2358
2359 err_rx_desc:
2360 for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2361 ixgbe_dma_free(adapter, &rxr->rxdma);
2362 err_tx_desc:
2363 for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2364 ixgbe_dma_free(adapter, &txr->txdma);
2365 free(adapter->rx_rings, M_DEVBUF);
2366 rx_fail:
2367 free(adapter->tx_rings, M_DEVBUF);
2368 tx_fail:
2369 free(adapter->queues, M_DEVBUF);
2370 fail:
2371 return (error);
2372 } /* ixgbe_allocate_queues */
2373