ix_txrx.c revision 1.38 1 /* $NetBSD: ix_txrx.c,v 1.38 2018/04/02 05:02:55 knakahara Exp $ */
2
3 /******************************************************************************
4
5 Copyright (c) 2001-2017, Intel Corporation
6 All rights reserved.
7
8 Redistribution and use in source and binary forms, with or without
9 modification, are permitted provided that the following conditions are met:
10
11 1. Redistributions of source code must retain the above copyright notice,
12 this list of conditions and the following disclaimer.
13
14 2. Redistributions in binary form must reproduce the above copyright
15 notice, this list of conditions and the following disclaimer in the
16 documentation and/or other materials provided with the distribution.
17
18 3. Neither the name of the Intel Corporation nor the names of its
19 contributors may be used to endorse or promote products derived from
20 this software without specific prior written permission.
21
22 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
23 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
26 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
29 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
30 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32 POSSIBILITY OF SUCH DAMAGE.
33
34 ******************************************************************************/
35 /*$FreeBSD: head/sys/dev/ixgbe/ix_txrx.c 321476 2017-07-25 14:38:30Z sbruno $*/
36
37 /*
38 * Copyright (c) 2011 The NetBSD Foundation, Inc.
39 * All rights reserved.
40 *
41 * This code is derived from software contributed to The NetBSD Foundation
42 * by Coyote Point Systems, Inc.
43 *
44 * Redistribution and use in source and binary forms, with or without
45 * modification, are permitted provided that the following conditions
46 * are met:
47 * 1. Redistributions of source code must retain the above copyright
48 * notice, this list of conditions and the following disclaimer.
49 * 2. Redistributions in binary form must reproduce the above copyright
50 * notice, this list of conditions and the following disclaimer in the
51 * documentation and/or other materials provided with the distribution.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
54 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
55 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
56 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
57 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
58 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
59 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
60 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
61 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
62 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
63 * POSSIBILITY OF SUCH DAMAGE.
64 */
65
66 #include "opt_inet.h"
67 #include "opt_inet6.h"
68
69 #include "ixgbe.h"
70
71 /*
72 * HW RSC control:
73 * this feature only works with
74 * IPv4, and only on 82599 and later.
75 * Also this will cause IP forwarding to
76 * fail and that can't be controlled by
77 * the stack as LRO can. For all these
78 * reasons I've deemed it best to leave
79 * this off and not bother with a tuneable
80 * interface, this would need to be compiled
81 * to enable.
82 */
83 static bool ixgbe_rsc_enable = FALSE;
84
85 /*
86 * For Flow Director: this is the
87 * number of TX packets we sample
88 * for the filter pool, this means
89 * every 20th packet will be probed.
90 *
91 * This feature can be disabled by
92 * setting this to 0.
93 */
94 static int atr_sample_rate = 20;
95
96 /************************************************************************
97 * Local Function prototypes
98 ************************************************************************/
99 static void ixgbe_setup_transmit_ring(struct tx_ring *);
100 static void ixgbe_free_transmit_buffers(struct tx_ring *);
101 static int ixgbe_setup_receive_ring(struct rx_ring *);
102 static void ixgbe_free_receive_buffers(struct rx_ring *);
103 static void ixgbe_rx_checksum(u32, struct mbuf *, u32,
104 struct ixgbe_hw_stats *);
105 static void ixgbe_refresh_mbufs(struct rx_ring *, int);
106 static void ixgbe_drain(struct ifnet *, struct tx_ring *);
107 static int ixgbe_xmit(struct tx_ring *, struct mbuf *);
108 static int ixgbe_tx_ctx_setup(struct tx_ring *,
109 struct mbuf *, u32 *, u32 *);
110 static int ixgbe_tso_setup(struct tx_ring *,
111 struct mbuf *, u32 *, u32 *);
112 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
113 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
114 struct mbuf *, u32);
115 static int ixgbe_dma_malloc(struct adapter *, bus_size_t,
116 struct ixgbe_dma_alloc *, int);
117 static void ixgbe_dma_free(struct adapter *, struct ixgbe_dma_alloc *);
118
119 static void ixgbe_setup_hw_rsc(struct rx_ring *);
120
121 /************************************************************************
122 * ixgbe_legacy_start_locked - Transmit entry point
123 *
124 * Called by the stack to initiate a transmit.
125 * The driver will remain in this routine as long as there are
126 * packets to transmit and transmit resources are available.
127 * In case resources are not available, the stack is notified
128 * and the packet is requeued.
129 ************************************************************************/
130 int
131 ixgbe_legacy_start_locked(struct ifnet *ifp, struct tx_ring *txr)
132 {
133 int rc;
134 struct mbuf *m_head;
135 struct adapter *adapter = txr->adapter;
136
137 IXGBE_TX_LOCK_ASSERT(txr);
138
139 if (!adapter->link_active) {
140 /*
141 * discard all packets buffered in IFQ to avoid
142 * sending old packets at next link up timing.
143 */
144 ixgbe_drain(ifp, txr);
145 return (ENETDOWN);
146 }
147 if ((ifp->if_flags & IFF_RUNNING) == 0)
148 return (ENETDOWN);
149
150 while (!IFQ_IS_EMPTY(&ifp->if_snd)) {
151 if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
152 break;
153
154 IFQ_POLL(&ifp->if_snd, m_head);
155 if (m_head == NULL)
156 break;
157
158 if ((rc = ixgbe_xmit(txr, m_head)) == EAGAIN) {
159 break;
160 }
161 IFQ_DEQUEUE(&ifp->if_snd, m_head);
162 if (rc != 0) {
163 m_freem(m_head);
164 continue;
165 }
166
167 /* Send a copy of the frame to the BPF listener */
168 bpf_mtap(ifp, m_head);
169 }
170
171 return IXGBE_SUCCESS;
172 } /* ixgbe_legacy_start_locked */
173
174 /************************************************************************
175 * ixgbe_legacy_start
176 *
177 * Called by the stack, this always uses the first tx ring,
178 * and should not be used with multiqueue tx enabled.
179 ************************************************************************/
180 void
181 ixgbe_legacy_start(struct ifnet *ifp)
182 {
183 struct adapter *adapter = ifp->if_softc;
184 struct tx_ring *txr = adapter->tx_rings;
185
186 if (ifp->if_flags & IFF_RUNNING) {
187 IXGBE_TX_LOCK(txr);
188 ixgbe_legacy_start_locked(ifp, txr);
189 IXGBE_TX_UNLOCK(txr);
190 }
191 } /* ixgbe_legacy_start */
192
193 /************************************************************************
194 * ixgbe_mq_start - Multiqueue Transmit Entry Point
195 *
196 * (if_transmit function)
197 ************************************************************************/
198 int
199 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
200 {
201 struct adapter *adapter = ifp->if_softc;
202 struct tx_ring *txr;
203 int i, err = 0;
204 #ifdef RSS
205 uint32_t bucket_id;
206 #endif
207
208 /*
209 * When doing RSS, map it to the same outbound queue
210 * as the incoming flow would be mapped to.
211 *
212 * If everything is setup correctly, it should be the
213 * same bucket that the current CPU we're on is.
214 */
215 #ifdef RSS
216 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
217 if ((adapter->feat_en & IXGBE_FEATURE_RSS) &&
218 (rss_hash2bucket(m->m_pkthdr.flowid, M_HASHTYPE_GET(m),
219 &bucket_id) == 0)) {
220 i = bucket_id % adapter->num_queues;
221 #ifdef IXGBE_DEBUG
222 if (bucket_id > adapter->num_queues)
223 if_printf(ifp,
224 "bucket_id (%d) > num_queues (%d)\n",
225 bucket_id, adapter->num_queues);
226 #endif
227 } else
228 i = m->m_pkthdr.flowid % adapter->num_queues;
229 } else
230 #endif /* 0 */
231 i = cpu_index(curcpu()) % adapter->num_queues;
232
233 /* Check for a hung queue and pick alternative */
234 if (((1 << i) & adapter->active_queues) == 0)
235 i = ffs64(adapter->active_queues);
236
237 txr = &adapter->tx_rings[i];
238
239 err = pcq_put(txr->txr_interq, m);
240 if (err == false) {
241 m_freem(m);
242 txr->pcq_drops.ev_count++;
243 return (err);
244 }
245 if (IXGBE_TX_TRYLOCK(txr)) {
246 ixgbe_mq_start_locked(ifp, txr);
247 IXGBE_TX_UNLOCK(txr);
248 } else {
249 if (adapter->txrx_use_workqueue) {
250 /*
251 * This function itself is not called in interrupt
252 * context, however it can be called in fast softint
253 * context right after receiving forwarding packets.
254 * So, it is required to protect workqueue from twice
255 * enqueuing when the machine uses both spontaneous
256 * packets and forwarding packets.
257 */
258 u_int *enqueued = percpu_getref(adapter->txr_wq_enqueued);
259 if (*enqueued == 0) {
260 *enqueued = 1;
261 percpu_putref(adapter->txr_wq_enqueued);
262 workqueue_enqueue(adapter->txr_wq, &txr->wq_cookie, curcpu());
263 } else
264 percpu_putref(adapter->txr_wq_enqueued);
265 } else
266 softint_schedule(txr->txr_si);
267 }
268
269 return (0);
270 } /* ixgbe_mq_start */
271
272 /************************************************************************
273 * ixgbe_mq_start_locked
274 ************************************************************************/
275 int
276 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
277 {
278 struct mbuf *next;
279 int enqueued = 0, err = 0;
280
281 if (!txr->adapter->link_active) {
282 /*
283 * discard all packets buffered in txr_interq to avoid
284 * sending old packets at next link up timing.
285 */
286 ixgbe_drain(ifp, txr);
287 return (ENETDOWN);
288 }
289 if ((ifp->if_flags & IFF_RUNNING) == 0)
290 return (ENETDOWN);
291
292 /* Process the queue */
293 while ((next = pcq_get(txr->txr_interq)) != NULL) {
294 if ((err = ixgbe_xmit(txr, next)) != 0) {
295 m_freem(next);
296 /* All errors are counted in ixgbe_xmit() */
297 break;
298 }
299 enqueued++;
300 #if __FreeBSD_version >= 1100036
301 /*
302 * Since we're looking at the tx ring, we can check
303 * to see if we're a VF by examing our tail register
304 * address.
305 */
306 if ((txr->adapter->feat_en & IXGBE_FEATURE_VF) &&
307 (next->m_flags & M_MCAST))
308 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
309 #endif
310 /* Send a copy of the frame to the BPF listener */
311 bpf_mtap(ifp, next);
312 if ((ifp->if_flags & IFF_RUNNING) == 0)
313 break;
314 }
315
316 if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD(txr->adapter))
317 ixgbe_txeof(txr);
318
319 return (err);
320 } /* ixgbe_mq_start_locked */
321
322 /************************************************************************
323 * ixgbe_deferred_mq_start
324 *
325 * Called from a softint and workqueue (indirectly) to drain queued
326 * transmit packets.
327 ************************************************************************/
328 void
329 ixgbe_deferred_mq_start(void *arg)
330 {
331 struct tx_ring *txr = arg;
332 struct adapter *adapter = txr->adapter;
333 struct ifnet *ifp = adapter->ifp;
334
335 IXGBE_TX_LOCK(txr);
336 if (pcq_peek(txr->txr_interq) != NULL)
337 ixgbe_mq_start_locked(ifp, txr);
338 IXGBE_TX_UNLOCK(txr);
339 } /* ixgbe_deferred_mq_start */
340
341 /************************************************************************
342 * ixgbe_deferred_mq_start_work
343 *
344 * Called from a workqueue to drain queued transmit packets.
345 ************************************************************************/
346 void
347 ixgbe_deferred_mq_start_work(struct work *wk, void *arg)
348 {
349 struct tx_ring *txr = container_of(wk, struct tx_ring, wq_cookie);
350 struct adapter *adapter = txr->adapter;
351 u_int *enqueued = percpu_getref(adapter->txr_wq_enqueued);
352 *enqueued = 0;
353 percpu_putref(adapter->txr_wq_enqueued);
354
355 ixgbe_deferred_mq_start(txr);
356 } /* ixgbe_deferred_mq_start */
357
358 /************************************************************************
359 * ixgbe_drain_all
360 ************************************************************************/
361 void
362 ixgbe_drain_all(struct adapter *adapter)
363 {
364 struct ifnet *ifp = adapter->ifp;
365 struct ix_queue *que = adapter->queues;
366
367 for (int i = 0; i < adapter->num_queues; i++, que++) {
368 struct tx_ring *txr = que->txr;
369
370 IXGBE_TX_LOCK(txr);
371 ixgbe_drain(ifp, txr);
372 IXGBE_TX_UNLOCK(txr);
373 }
374 }
375
376 /************************************************************************
377 * ixgbe_xmit
378 *
379 * Maps the mbufs to tx descriptors, allowing the
380 * TX engine to transmit the packets.
381 *
382 * Return 0 on success, positive on failure
383 ************************************************************************/
384 static int
385 ixgbe_xmit(struct tx_ring *txr, struct mbuf *m_head)
386 {
387 struct adapter *adapter = txr->adapter;
388 struct ixgbe_tx_buf *txbuf;
389 union ixgbe_adv_tx_desc *txd = NULL;
390 struct ifnet *ifp = adapter->ifp;
391 int i, j, error;
392 int first;
393 u32 olinfo_status = 0, cmd_type_len;
394 bool remap = TRUE;
395 bus_dmamap_t map;
396
397 /* Basic descriptor defines */
398 cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
399 IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
400
401 if (vlan_has_tag(m_head))
402 cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
403
404 /*
405 * Important to capture the first descriptor
406 * used because it will contain the index of
407 * the one we tell the hardware to report back
408 */
409 first = txr->next_avail_desc;
410 txbuf = &txr->tx_buffers[first];
411 map = txbuf->map;
412
413 /*
414 * Map the packet for DMA.
415 */
416 retry:
417 error = bus_dmamap_load_mbuf(txr->txtag->dt_dmat, map, m_head,
418 BUS_DMA_NOWAIT);
419
420 if (__predict_false(error)) {
421 struct mbuf *m;
422
423 switch (error) {
424 case EAGAIN:
425 txr->q_eagain_tx_dma_setup++;
426 return EAGAIN;
427 case ENOMEM:
428 txr->q_enomem_tx_dma_setup++;
429 return EAGAIN;
430 case EFBIG:
431 /* Try it again? - one try */
432 if (remap == TRUE) {
433 remap = FALSE;
434 /*
435 * XXX: m_defrag will choke on
436 * non-MCLBYTES-sized clusters
437 */
438 txr->q_efbig_tx_dma_setup++;
439 m = m_defrag(m_head, M_NOWAIT);
440 if (m == NULL) {
441 txr->q_mbuf_defrag_failed++;
442 return ENOBUFS;
443 }
444 m_head = m;
445 goto retry;
446 } else {
447 txr->q_efbig2_tx_dma_setup++;
448 return error;
449 }
450 case EINVAL:
451 txr->q_einval_tx_dma_setup++;
452 return error;
453 default:
454 txr->q_other_tx_dma_setup++;
455 return error;
456 }
457 }
458
459 /* Make certain there are enough descriptors */
460 if (txr->tx_avail < (map->dm_nsegs + 2)) {
461 txr->no_desc_avail.ev_count++;
462 ixgbe_dmamap_unload(txr->txtag, txbuf->map);
463 return EAGAIN;
464 }
465
466 /*
467 * Set up the appropriate offload context
468 * this will consume the first descriptor
469 */
470 error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
471 if (__predict_false(error)) {
472 return (error);
473 }
474
475 /* Do the flow director magic */
476 if ((adapter->feat_en & IXGBE_FEATURE_FDIR) &&
477 (txr->atr_sample) && (!adapter->fdir_reinit)) {
478 ++txr->atr_count;
479 if (txr->atr_count >= atr_sample_rate) {
480 ixgbe_atr(txr, m_head);
481 txr->atr_count = 0;
482 }
483 }
484
485 olinfo_status |= IXGBE_ADVTXD_CC;
486 i = txr->next_avail_desc;
487 for (j = 0; j < map->dm_nsegs; j++) {
488 bus_size_t seglen;
489 bus_addr_t segaddr;
490
491 txbuf = &txr->tx_buffers[i];
492 txd = &txr->tx_base[i];
493 seglen = map->dm_segs[j].ds_len;
494 segaddr = htole64(map->dm_segs[j].ds_addr);
495
496 txd->read.buffer_addr = segaddr;
497 txd->read.cmd_type_len = htole32(txr->txd_cmd |
498 cmd_type_len | seglen);
499 txd->read.olinfo_status = htole32(olinfo_status);
500
501 if (++i == txr->num_desc)
502 i = 0;
503 }
504
505 txd->read.cmd_type_len |= htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
506 txr->tx_avail -= map->dm_nsegs;
507 txr->next_avail_desc = i;
508
509 txbuf->m_head = m_head;
510 /*
511 * Here we swap the map so the last descriptor,
512 * which gets the completion interrupt has the
513 * real map, and the first descriptor gets the
514 * unused map from this descriptor.
515 */
516 txr->tx_buffers[first].map = txbuf->map;
517 txbuf->map = map;
518 bus_dmamap_sync(txr->txtag->dt_dmat, map, 0, m_head->m_pkthdr.len,
519 BUS_DMASYNC_PREWRITE);
520
521 /* Set the EOP descriptor that will be marked done */
522 txbuf = &txr->tx_buffers[first];
523 txbuf->eop = txd;
524
525 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
526 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
527 /*
528 * Advance the Transmit Descriptor Tail (Tdt), this tells the
529 * hardware that this frame is available to transmit.
530 */
531 ++txr->total_packets.ev_count;
532 IXGBE_WRITE_REG(&adapter->hw, txr->tail, i);
533
534 /*
535 * XXXX NOMPSAFE: ifp->if_data should be percpu.
536 */
537 ifp->if_obytes += m_head->m_pkthdr.len;
538 if (m_head->m_flags & M_MCAST)
539 ifp->if_omcasts++;
540
541 /* Mark queue as having work */
542 if (txr->busy == 0)
543 txr->busy = 1;
544
545 return (0);
546 } /* ixgbe_xmit */
547
548 /************************************************************************
549 * ixgbe_drain
550 ************************************************************************/
551 static void
552 ixgbe_drain(struct ifnet *ifp, struct tx_ring *txr)
553 {
554 struct mbuf *m;
555
556 IXGBE_TX_LOCK_ASSERT(txr);
557
558 if (txr->me == 0) {
559 while (!IFQ_IS_EMPTY(&ifp->if_snd)) {
560 IFQ_DEQUEUE(&ifp->if_snd, m);
561 m_freem(m);
562 IF_DROP(&ifp->if_snd);
563 }
564 }
565
566 while ((m = pcq_get(txr->txr_interq)) != NULL) {
567 m_freem(m);
568 txr->pcq_drops.ev_count++;
569 }
570 }
571
572 /************************************************************************
573 * ixgbe_allocate_transmit_buffers
574 *
575 * Allocate memory for tx_buffer structures. The tx_buffer stores all
576 * the information needed to transmit a packet on the wire. This is
577 * called only once at attach, setup is done every reset.
578 ************************************************************************/
579 static int
580 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
581 {
582 struct adapter *adapter = txr->adapter;
583 device_t dev = adapter->dev;
584 struct ixgbe_tx_buf *txbuf;
585 int error, i;
586
587 /*
588 * Setup DMA descriptor areas.
589 */
590 error = ixgbe_dma_tag_create(
591 /* parent */ adapter->osdep.dmat,
592 /* alignment */ 1,
593 /* bounds */ 0,
594 /* maxsize */ IXGBE_TSO_SIZE,
595 /* nsegments */ adapter->num_segs,
596 /* maxsegsize */ PAGE_SIZE,
597 /* flags */ 0,
598 &txr->txtag);
599 if (error != 0) {
600 aprint_error_dev(dev,"Unable to allocate TX DMA tag\n");
601 goto fail;
602 }
603
604 txr->tx_buffers =
605 (struct ixgbe_tx_buf *) malloc(sizeof(struct ixgbe_tx_buf) *
606 adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
607 if (txr->tx_buffers == NULL) {
608 aprint_error_dev(dev, "Unable to allocate tx_buffer memory\n");
609 error = ENOMEM;
610 goto fail;
611 }
612
613 /* Create the descriptor buffer dma maps */
614 txbuf = txr->tx_buffers;
615 for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
616 error = ixgbe_dmamap_create(txr->txtag, 0, &txbuf->map);
617 if (error != 0) {
618 aprint_error_dev(dev,
619 "Unable to create TX DMA map (%d)\n", error);
620 goto fail;
621 }
622 }
623
624 return 0;
625 fail:
626 /* We free all, it handles case where we are in the middle */
627 #if 0 /* XXX was FreeBSD */
628 ixgbe_free_transmit_structures(adapter);
629 #else
630 ixgbe_free_transmit_buffers(txr);
631 #endif
632 return (error);
633 } /* ixgbe_allocate_transmit_buffers */
634
635 /************************************************************************
636 * ixgbe_setup_transmit_ring - Initialize a transmit ring.
637 ************************************************************************/
638 static void
639 ixgbe_setup_transmit_ring(struct tx_ring *txr)
640 {
641 struct adapter *adapter = txr->adapter;
642 struct ixgbe_tx_buf *txbuf;
643 #ifdef DEV_NETMAP
644 struct netmap_adapter *na = NA(adapter->ifp);
645 struct netmap_slot *slot;
646 #endif /* DEV_NETMAP */
647
648 /* Clear the old ring contents */
649 IXGBE_TX_LOCK(txr);
650
651 #ifdef DEV_NETMAP
652 if (adapter->feat_en & IXGBE_FEATURE_NETMAP) {
653 /*
654 * (under lock): if in netmap mode, do some consistency
655 * checks and set slot to entry 0 of the netmap ring.
656 */
657 slot = netmap_reset(na, NR_TX, txr->me, 0);
658 }
659 #endif /* DEV_NETMAP */
660
661 bzero((void *)txr->tx_base,
662 (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
663 /* Reset indices */
664 txr->next_avail_desc = 0;
665 txr->next_to_clean = 0;
666
667 /* Free any existing tx buffers. */
668 txbuf = txr->tx_buffers;
669 for (int i = 0; i < txr->num_desc; i++, txbuf++) {
670 if (txbuf->m_head != NULL) {
671 bus_dmamap_sync(txr->txtag->dt_dmat, txbuf->map,
672 0, txbuf->m_head->m_pkthdr.len,
673 BUS_DMASYNC_POSTWRITE);
674 ixgbe_dmamap_unload(txr->txtag, txbuf->map);
675 m_freem(txbuf->m_head);
676 txbuf->m_head = NULL;
677 }
678
679 #ifdef DEV_NETMAP
680 /*
681 * In netmap mode, set the map for the packet buffer.
682 * NOTE: Some drivers (not this one) also need to set
683 * the physical buffer address in the NIC ring.
684 * Slots in the netmap ring (indexed by "si") are
685 * kring->nkr_hwofs positions "ahead" wrt the
686 * corresponding slot in the NIC ring. In some drivers
687 * (not here) nkr_hwofs can be negative. Function
688 * netmap_idx_n2k() handles wraparounds properly.
689 */
690 if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) && slot) {
691 int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
692 netmap_load_map(na, txr->txtag,
693 txbuf->map, NMB(na, slot + si));
694 }
695 #endif /* DEV_NETMAP */
696
697 /* Clear the EOP descriptor pointer */
698 txbuf->eop = NULL;
699 }
700
701 /* Set the rate at which we sample packets */
702 if (adapter->feat_en & IXGBE_FEATURE_FDIR)
703 txr->atr_sample = atr_sample_rate;
704
705 /* Set number of descriptors available */
706 txr->tx_avail = adapter->num_tx_desc;
707
708 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
709 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
710 IXGBE_TX_UNLOCK(txr);
711 } /* ixgbe_setup_transmit_ring */
712
713 /************************************************************************
714 * ixgbe_setup_transmit_structures - Initialize all transmit rings.
715 ************************************************************************/
716 int
717 ixgbe_setup_transmit_structures(struct adapter *adapter)
718 {
719 struct tx_ring *txr = adapter->tx_rings;
720
721 for (int i = 0; i < adapter->num_queues; i++, txr++)
722 ixgbe_setup_transmit_ring(txr);
723
724 return (0);
725 } /* ixgbe_setup_transmit_structures */
726
727 /************************************************************************
728 * ixgbe_free_transmit_structures - Free all transmit rings.
729 ************************************************************************/
730 void
731 ixgbe_free_transmit_structures(struct adapter *adapter)
732 {
733 struct tx_ring *txr = adapter->tx_rings;
734
735 for (int i = 0; i < adapter->num_queues; i++, txr++) {
736 ixgbe_free_transmit_buffers(txr);
737 ixgbe_dma_free(adapter, &txr->txdma);
738 IXGBE_TX_LOCK_DESTROY(txr);
739 }
740 free(adapter->tx_rings, M_DEVBUF);
741 } /* ixgbe_free_transmit_structures */
742
743 /************************************************************************
744 * ixgbe_free_transmit_buffers
745 *
746 * Free transmit ring related data structures.
747 ************************************************************************/
748 static void
749 ixgbe_free_transmit_buffers(struct tx_ring *txr)
750 {
751 struct adapter *adapter = txr->adapter;
752 struct ixgbe_tx_buf *tx_buffer;
753 int i;
754
755 INIT_DEBUGOUT("ixgbe_free_transmit_buffers: begin");
756
757 if (txr->tx_buffers == NULL)
758 return;
759
760 tx_buffer = txr->tx_buffers;
761 for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
762 if (tx_buffer->m_head != NULL) {
763 bus_dmamap_sync(txr->txtag->dt_dmat, tx_buffer->map,
764 0, tx_buffer->m_head->m_pkthdr.len,
765 BUS_DMASYNC_POSTWRITE);
766 ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
767 m_freem(tx_buffer->m_head);
768 tx_buffer->m_head = NULL;
769 if (tx_buffer->map != NULL) {
770 ixgbe_dmamap_destroy(txr->txtag,
771 tx_buffer->map);
772 tx_buffer->map = NULL;
773 }
774 } else if (tx_buffer->map != NULL) {
775 ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
776 ixgbe_dmamap_destroy(txr->txtag, tx_buffer->map);
777 tx_buffer->map = NULL;
778 }
779 }
780 if (txr->txr_interq != NULL) {
781 struct mbuf *m;
782
783 while ((m = pcq_get(txr->txr_interq)) != NULL)
784 m_freem(m);
785 pcq_destroy(txr->txr_interq);
786 }
787 if (txr->tx_buffers != NULL) {
788 free(txr->tx_buffers, M_DEVBUF);
789 txr->tx_buffers = NULL;
790 }
791 if (txr->txtag != NULL) {
792 ixgbe_dma_tag_destroy(txr->txtag);
793 txr->txtag = NULL;
794 }
795 } /* ixgbe_free_transmit_buffers */
796
797 /************************************************************************
798 * ixgbe_tx_ctx_setup
799 *
800 * Advanced Context Descriptor setup for VLAN, CSUM or TSO
801 ************************************************************************/
802 static int
803 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
804 u32 *cmd_type_len, u32 *olinfo_status)
805 {
806 struct adapter *adapter = txr->adapter;
807 struct ixgbe_adv_tx_context_desc *TXD;
808 struct ether_vlan_header *eh;
809 #ifdef INET
810 struct ip *ip;
811 #endif
812 #ifdef INET6
813 struct ip6_hdr *ip6;
814 #endif
815 int ehdrlen, ip_hlen = 0;
816 int offload = TRUE;
817 int ctxd = txr->next_avail_desc;
818 u32 vlan_macip_lens = 0;
819 u32 type_tucmd_mlhl = 0;
820 u16 vtag = 0;
821 u16 etype;
822 u8 ipproto = 0;
823 char *l3d;
824
825
826 /* First check if TSO is to be used */
827 if (mp->m_pkthdr.csum_flags & (M_CSUM_TSOv4 | M_CSUM_TSOv6)) {
828 int rv = ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status);
829
830 if (rv != 0)
831 ++adapter->tso_err.ev_count;
832 return rv;
833 }
834
835 if ((mp->m_pkthdr.csum_flags & M_CSUM_OFFLOAD) == 0)
836 offload = FALSE;
837
838 /* Indicate the whole packet as payload when not doing TSO */
839 *olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
840
841 /* Now ready a context descriptor */
842 TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
843
844 /*
845 * In advanced descriptors the vlan tag must
846 * be placed into the context descriptor. Hence
847 * we need to make one even if not doing offloads.
848 */
849 if (vlan_has_tag(mp)) {
850 vtag = htole16(vlan_get_tag(mp));
851 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
852 } else if (!(txr->adapter->feat_en & IXGBE_FEATURE_NEEDS_CTXD) &&
853 (offload == FALSE))
854 return (0);
855
856 /*
857 * Determine where frame payload starts.
858 * Jump over vlan headers if already present,
859 * helpful for QinQ too.
860 */
861 KASSERT(mp->m_len >= offsetof(struct ether_vlan_header, evl_tag));
862 eh = mtod(mp, struct ether_vlan_header *);
863 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
864 KASSERT(mp->m_len >= sizeof(struct ether_vlan_header));
865 etype = ntohs(eh->evl_proto);
866 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
867 } else {
868 etype = ntohs(eh->evl_encap_proto);
869 ehdrlen = ETHER_HDR_LEN;
870 }
871
872 /* Set the ether header length */
873 vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
874
875 if (offload == FALSE)
876 goto no_offloads;
877
878 /*
879 * If the first mbuf only includes the ethernet header,
880 * jump to the next one
881 * XXX: This assumes the stack splits mbufs containing headers
882 * on header boundaries
883 * XXX: And assumes the entire IP header is contained in one mbuf
884 */
885 if (mp->m_len == ehdrlen && mp->m_next)
886 l3d = mtod(mp->m_next, char *);
887 else
888 l3d = mtod(mp, char *) + ehdrlen;
889
890 switch (etype) {
891 #ifdef INET
892 case ETHERTYPE_IP:
893 ip = (struct ip *)(l3d);
894 ip_hlen = ip->ip_hl << 2;
895 ipproto = ip->ip_p;
896 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
897 KASSERT((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) == 0 ||
898 ip->ip_sum == 0);
899 break;
900 #endif
901 #ifdef INET6
902 case ETHERTYPE_IPV6:
903 ip6 = (struct ip6_hdr *)(l3d);
904 ip_hlen = sizeof(struct ip6_hdr);
905 ipproto = ip6->ip6_nxt;
906 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
907 break;
908 #endif
909 default:
910 offload = false;
911 break;
912 }
913
914 if ((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) != 0)
915 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
916
917 vlan_macip_lens |= ip_hlen;
918
919 /* No support for offloads for non-L4 next headers */
920 switch (ipproto) {
921 case IPPROTO_TCP:
922 if (mp->m_pkthdr.csum_flags &
923 (M_CSUM_TCPv4 | M_CSUM_TCPv6))
924 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
925 else
926 offload = false;
927 break;
928 case IPPROTO_UDP:
929 if (mp->m_pkthdr.csum_flags &
930 (M_CSUM_UDPv4 | M_CSUM_UDPv6))
931 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
932 else
933 offload = false;
934 break;
935 default:
936 offload = false;
937 break;
938 }
939
940 if (offload) /* Insert L4 checksum into data descriptors */
941 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
942
943 no_offloads:
944 type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
945
946 /* Now copy bits into descriptor */
947 TXD->vlan_macip_lens = htole32(vlan_macip_lens);
948 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
949 TXD->seqnum_seed = htole32(0);
950 TXD->mss_l4len_idx = htole32(0);
951
952 /* We've consumed the first desc, adjust counters */
953 if (++ctxd == txr->num_desc)
954 ctxd = 0;
955 txr->next_avail_desc = ctxd;
956 --txr->tx_avail;
957
958 return (0);
959 } /* ixgbe_tx_ctx_setup */
960
961 /************************************************************************
962 * ixgbe_tso_setup
963 *
964 * Setup work for hardware segmentation offload (TSO) on
965 * adapters using advanced tx descriptors
966 ************************************************************************/
967 static int
968 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *cmd_type_len,
969 u32 *olinfo_status)
970 {
971 struct ixgbe_adv_tx_context_desc *TXD;
972 struct ether_vlan_header *eh;
973 #ifdef INET6
974 struct ip6_hdr *ip6;
975 #endif
976 #ifdef INET
977 struct ip *ip;
978 #endif
979 struct tcphdr *th;
980 int ctxd, ehdrlen, ip_hlen, tcp_hlen;
981 u32 vlan_macip_lens = 0;
982 u32 type_tucmd_mlhl = 0;
983 u32 mss_l4len_idx = 0, paylen;
984 u16 vtag = 0, eh_type;
985
986 /*
987 * Determine where frame payload starts.
988 * Jump over vlan headers if already present
989 */
990 eh = mtod(mp, struct ether_vlan_header *);
991 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
992 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
993 eh_type = eh->evl_proto;
994 } else {
995 ehdrlen = ETHER_HDR_LEN;
996 eh_type = eh->evl_encap_proto;
997 }
998
999 switch (ntohs(eh_type)) {
1000 #ifdef INET
1001 case ETHERTYPE_IP:
1002 ip = (struct ip *)(mp->m_data + ehdrlen);
1003 if (ip->ip_p != IPPROTO_TCP)
1004 return (ENXIO);
1005 ip->ip_sum = 0;
1006 ip_hlen = ip->ip_hl << 2;
1007 th = (struct tcphdr *)((char *)ip + ip_hlen);
1008 th->th_sum = in_cksum_phdr(ip->ip_src.s_addr,
1009 ip->ip_dst.s_addr, htons(IPPROTO_TCP));
1010 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
1011 /* Tell transmit desc to also do IPv4 checksum. */
1012 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
1013 break;
1014 #endif
1015 #ifdef INET6
1016 case ETHERTYPE_IPV6:
1017 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
1018 /* XXX-BZ For now we do not pretend to support ext. hdrs. */
1019 if (ip6->ip6_nxt != IPPROTO_TCP)
1020 return (ENXIO);
1021 ip_hlen = sizeof(struct ip6_hdr);
1022 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
1023 th = (struct tcphdr *)((char *)ip6 + ip_hlen);
1024 th->th_sum = in6_cksum_phdr(&ip6->ip6_src,
1025 &ip6->ip6_dst, 0, htonl(IPPROTO_TCP));
1026 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
1027 break;
1028 #endif
1029 default:
1030 panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
1031 __func__, ntohs(eh_type));
1032 break;
1033 }
1034
1035 ctxd = txr->next_avail_desc;
1036 TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
1037
1038 tcp_hlen = th->th_off << 2;
1039
1040 /* This is used in the transmit desc in encap */
1041 paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
1042
1043 /* VLAN MACLEN IPLEN */
1044 if (vlan_has_tag(mp)) {
1045 vtag = htole16(vlan_get_tag(mp));
1046 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
1047 }
1048
1049 vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
1050 vlan_macip_lens |= ip_hlen;
1051 TXD->vlan_macip_lens = htole32(vlan_macip_lens);
1052
1053 /* ADV DTYPE TUCMD */
1054 type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
1055 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
1056 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
1057
1058 /* MSS L4LEN IDX */
1059 mss_l4len_idx |= (mp->m_pkthdr.segsz << IXGBE_ADVTXD_MSS_SHIFT);
1060 mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
1061 TXD->mss_l4len_idx = htole32(mss_l4len_idx);
1062
1063 TXD->seqnum_seed = htole32(0);
1064
1065 if (++ctxd == txr->num_desc)
1066 ctxd = 0;
1067
1068 txr->tx_avail--;
1069 txr->next_avail_desc = ctxd;
1070 *cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
1071 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
1072 *olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
1073 ++txr->tso_tx.ev_count;
1074
1075 return (0);
1076 } /* ixgbe_tso_setup */
1077
1078
1079 /************************************************************************
1080 * ixgbe_txeof
1081 *
1082 * Examine each tx_buffer in the used queue. If the hardware is done
1083 * processing the packet then free associated resources. The
1084 * tx_buffer is put back on the free queue.
1085 ************************************************************************/
1086 bool
1087 ixgbe_txeof(struct tx_ring *txr)
1088 {
1089 struct adapter *adapter = txr->adapter;
1090 struct ifnet *ifp = adapter->ifp;
1091 struct ixgbe_tx_buf *buf;
1092 union ixgbe_adv_tx_desc *txd;
1093 u32 work, processed = 0;
1094 u32 limit = adapter->tx_process_limit;
1095
1096 KASSERT(mutex_owned(&txr->tx_mtx));
1097
1098 #ifdef DEV_NETMAP
1099 if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) &&
1100 (adapter->ifp->if_capenable & IFCAP_NETMAP)) {
1101 struct netmap_adapter *na = NA(adapter->ifp);
1102 struct netmap_kring *kring = &na->tx_rings[txr->me];
1103 txd = txr->tx_base;
1104 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1105 BUS_DMASYNC_POSTREAD);
1106 /*
1107 * In netmap mode, all the work is done in the context
1108 * of the client thread. Interrupt handlers only wake up
1109 * clients, which may be sleeping on individual rings
1110 * or on a global resource for all rings.
1111 * To implement tx interrupt mitigation, we wake up the client
1112 * thread roughly every half ring, even if the NIC interrupts
1113 * more frequently. This is implemented as follows:
1114 * - ixgbe_txsync() sets kring->nr_kflags with the index of
1115 * the slot that should wake up the thread (nkr_num_slots
1116 * means the user thread should not be woken up);
1117 * - the driver ignores tx interrupts unless netmap_mitigate=0
1118 * or the slot has the DD bit set.
1119 */
1120 if (!netmap_mitigate ||
1121 (kring->nr_kflags < kring->nkr_num_slots &&
1122 txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD)) {
1123 netmap_tx_irq(ifp, txr->me);
1124 }
1125 return false;
1126 }
1127 #endif /* DEV_NETMAP */
1128
1129 if (txr->tx_avail == txr->num_desc) {
1130 txr->busy = 0;
1131 return false;
1132 }
1133
1134 /* Get work starting point */
1135 work = txr->next_to_clean;
1136 buf = &txr->tx_buffers[work];
1137 txd = &txr->tx_base[work];
1138 work -= txr->num_desc; /* The distance to ring end */
1139 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1140 BUS_DMASYNC_POSTREAD);
1141
1142 do {
1143 union ixgbe_adv_tx_desc *eop = buf->eop;
1144 if (eop == NULL) /* No work */
1145 break;
1146
1147 if ((eop->wb.status & IXGBE_TXD_STAT_DD) == 0)
1148 break; /* I/O not complete */
1149
1150 if (buf->m_head) {
1151 txr->bytes += buf->m_head->m_pkthdr.len;
1152 bus_dmamap_sync(txr->txtag->dt_dmat, buf->map,
1153 0, buf->m_head->m_pkthdr.len,
1154 BUS_DMASYNC_POSTWRITE);
1155 ixgbe_dmamap_unload(txr->txtag, buf->map);
1156 m_freem(buf->m_head);
1157 buf->m_head = NULL;
1158 }
1159 buf->eop = NULL;
1160 ++txr->tx_avail;
1161
1162 /* We clean the range if multi segment */
1163 while (txd != eop) {
1164 ++txd;
1165 ++buf;
1166 ++work;
1167 /* wrap the ring? */
1168 if (__predict_false(!work)) {
1169 work -= txr->num_desc;
1170 buf = txr->tx_buffers;
1171 txd = txr->tx_base;
1172 }
1173 if (buf->m_head) {
1174 txr->bytes +=
1175 buf->m_head->m_pkthdr.len;
1176 bus_dmamap_sync(txr->txtag->dt_dmat,
1177 buf->map,
1178 0, buf->m_head->m_pkthdr.len,
1179 BUS_DMASYNC_POSTWRITE);
1180 ixgbe_dmamap_unload(txr->txtag,
1181 buf->map);
1182 m_freem(buf->m_head);
1183 buf->m_head = NULL;
1184 }
1185 ++txr->tx_avail;
1186 buf->eop = NULL;
1187
1188 }
1189 ++txr->packets;
1190 ++processed;
1191 ++ifp->if_opackets;
1192
1193 /* Try the next packet */
1194 ++txd;
1195 ++buf;
1196 ++work;
1197 /* reset with a wrap */
1198 if (__predict_false(!work)) {
1199 work -= txr->num_desc;
1200 buf = txr->tx_buffers;
1201 txd = txr->tx_base;
1202 }
1203 prefetch(txd);
1204 } while (__predict_true(--limit));
1205
1206 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1207 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1208
1209 work += txr->num_desc;
1210 txr->next_to_clean = work;
1211
1212 /*
1213 * Queue Hang detection, we know there's
1214 * work outstanding or the first return
1215 * would have been taken, so increment busy
1216 * if nothing managed to get cleaned, then
1217 * in local_timer it will be checked and
1218 * marked as HUNG if it exceeds a MAX attempt.
1219 */
1220 if ((processed == 0) && (txr->busy != IXGBE_QUEUE_HUNG))
1221 ++txr->busy;
1222 /*
1223 * If anything gets cleaned we reset state to 1,
1224 * note this will turn off HUNG if its set.
1225 */
1226 if (processed)
1227 txr->busy = 1;
1228
1229 if (txr->tx_avail == txr->num_desc)
1230 txr->busy = 0;
1231
1232 return ((limit > 0) ? false : true);
1233 } /* ixgbe_txeof */
1234
1235 /************************************************************************
1236 * ixgbe_rsc_count
1237 *
1238 * Used to detect a descriptor that has been merged by Hardware RSC.
1239 ************************************************************************/
1240 static inline u32
1241 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1242 {
1243 return (le32toh(rx->wb.lower.lo_dword.data) &
1244 IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1245 } /* ixgbe_rsc_count */
1246
1247 /************************************************************************
1248 * ixgbe_setup_hw_rsc
1249 *
1250 * Initialize Hardware RSC (LRO) feature on 82599
1251 * for an RX ring, this is toggled by the LRO capability
1252 * even though it is transparent to the stack.
1253 *
1254 * NOTE: Since this HW feature only works with IPv4 and
1255 * testing has shown soft LRO to be as effective,
1256 * this feature will be disabled by default.
1257 ************************************************************************/
1258 static void
1259 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
1260 {
1261 struct adapter *adapter = rxr->adapter;
1262 struct ixgbe_hw *hw = &adapter->hw;
1263 u32 rscctrl, rdrxctl;
1264
1265 /* If turning LRO/RSC off we need to disable it */
1266 if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) {
1267 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1268 rscctrl &= ~IXGBE_RSCCTL_RSCEN;
1269 return;
1270 }
1271
1272 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
1273 rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
1274 #ifdef DEV_NETMAP
1275 /* Always strip CRC unless Netmap disabled it */
1276 if (!(adapter->feat_en & IXGBE_FEATURE_NETMAP) ||
1277 !(adapter->ifp->if_capenable & IFCAP_NETMAP) ||
1278 ix_crcstrip)
1279 #endif /* DEV_NETMAP */
1280 rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
1281 rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
1282 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
1283
1284 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1285 rscctrl |= IXGBE_RSCCTL_RSCEN;
1286 /*
1287 * Limit the total number of descriptors that
1288 * can be combined, so it does not exceed 64K
1289 */
1290 if (rxr->mbuf_sz == MCLBYTES)
1291 rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
1292 else if (rxr->mbuf_sz == MJUMPAGESIZE)
1293 rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
1294 else if (rxr->mbuf_sz == MJUM9BYTES)
1295 rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
1296 else /* Using 16K cluster */
1297 rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
1298
1299 IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
1300
1301 /* Enable TCP header recognition */
1302 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
1303 (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) | IXGBE_PSRTYPE_TCPHDR));
1304
1305 /* Disable RSC for ACK packets */
1306 IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
1307 (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
1308
1309 rxr->hw_rsc = TRUE;
1310 } /* ixgbe_setup_hw_rsc */
1311
1312 /************************************************************************
1313 * ixgbe_refresh_mbufs
1314 *
1315 * Refresh mbuf buffers for RX descriptor rings
1316 * - now keeps its own state so discards due to resource
1317 * exhaustion are unnecessary, if an mbuf cannot be obtained
1318 * it just returns, keeping its placeholder, thus it can simply
1319 * be recalled to try again.
1320 ************************************************************************/
1321 static void
1322 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
1323 {
1324 struct adapter *adapter = rxr->adapter;
1325 struct ixgbe_rx_buf *rxbuf;
1326 struct mbuf *mp;
1327 int i, j, error;
1328 bool refreshed = false;
1329
1330 i = j = rxr->next_to_refresh;
1331 /* Control the loop with one beyond */
1332 if (++j == rxr->num_desc)
1333 j = 0;
1334
1335 while (j != limit) {
1336 rxbuf = &rxr->rx_buffers[i];
1337 if (rxbuf->buf == NULL) {
1338 mp = ixgbe_getjcl(&adapter->jcl_head, M_NOWAIT,
1339 MT_DATA, M_PKTHDR, rxr->mbuf_sz);
1340 if (mp == NULL) {
1341 rxr->no_jmbuf.ev_count++;
1342 goto update;
1343 }
1344 if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
1345 m_adj(mp, ETHER_ALIGN);
1346 } else
1347 mp = rxbuf->buf;
1348
1349 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1350
1351 /* If we're dealing with an mbuf that was copied rather
1352 * than replaced, there's no need to go through busdma.
1353 */
1354 if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
1355 /* Get the memory mapping */
1356 ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
1357 error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat,
1358 rxbuf->pmap, mp, BUS_DMA_NOWAIT);
1359 if (error != 0) {
1360 printf("Refresh mbufs: payload dmamap load failure - %d\n", error);
1361 m_free(mp);
1362 rxbuf->buf = NULL;
1363 goto update;
1364 }
1365 rxbuf->buf = mp;
1366 bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
1367 0, mp->m_pkthdr.len, BUS_DMASYNC_PREREAD);
1368 rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
1369 htole64(rxbuf->pmap->dm_segs[0].ds_addr);
1370 } else {
1371 rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
1372 rxbuf->flags &= ~IXGBE_RX_COPY;
1373 }
1374
1375 refreshed = true;
1376 /* Next is precalculated */
1377 i = j;
1378 rxr->next_to_refresh = i;
1379 if (++j == rxr->num_desc)
1380 j = 0;
1381 }
1382
1383 update:
1384 if (refreshed) /* Update hardware tail index */
1385 IXGBE_WRITE_REG(&adapter->hw, rxr->tail, rxr->next_to_refresh);
1386
1387 return;
1388 } /* ixgbe_refresh_mbufs */
1389
1390 /************************************************************************
1391 * ixgbe_allocate_receive_buffers
1392 *
1393 * Allocate memory for rx_buffer structures. Since we use one
1394 * rx_buffer per received packet, the maximum number of rx_buffer's
1395 * that we'll need is equal to the number of receive descriptors
1396 * that we've allocated.
1397 ************************************************************************/
1398 static int
1399 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
1400 {
1401 struct adapter *adapter = rxr->adapter;
1402 device_t dev = adapter->dev;
1403 struct ixgbe_rx_buf *rxbuf;
1404 int bsize, error;
1405
1406 bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
1407 rxr->rx_buffers = (struct ixgbe_rx_buf *)malloc(bsize, M_DEVBUF,
1408 M_NOWAIT | M_ZERO);
1409 if (rxr->rx_buffers == NULL) {
1410 aprint_error_dev(dev, "Unable to allocate rx_buffer memory\n");
1411 error = ENOMEM;
1412 goto fail;
1413 }
1414
1415 error = ixgbe_dma_tag_create(
1416 /* parent */ adapter->osdep.dmat,
1417 /* alignment */ 1,
1418 /* bounds */ 0,
1419 /* maxsize */ MJUM16BYTES,
1420 /* nsegments */ 1,
1421 /* maxsegsize */ MJUM16BYTES,
1422 /* flags */ 0,
1423 &rxr->ptag);
1424 if (error != 0) {
1425 aprint_error_dev(dev, "Unable to create RX DMA tag\n");
1426 goto fail;
1427 }
1428
1429 for (int i = 0; i < rxr->num_desc; i++, rxbuf++) {
1430 rxbuf = &rxr->rx_buffers[i];
1431 error = ixgbe_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
1432 if (error) {
1433 aprint_error_dev(dev, "Unable to create RX dma map\n");
1434 goto fail;
1435 }
1436 }
1437
1438 return (0);
1439
1440 fail:
1441 /* Frees all, but can handle partial completion */
1442 ixgbe_free_receive_structures(adapter);
1443
1444 return (error);
1445 } /* ixgbe_allocate_receive_buffers */
1446
1447 /************************************************************************
1448 * ixgbe_free_receive_ring
1449 ************************************************************************/
1450 static void
1451 ixgbe_free_receive_ring(struct rx_ring *rxr)
1452 {
1453 for (int i = 0; i < rxr->num_desc; i++) {
1454 ixgbe_rx_discard(rxr, i);
1455 }
1456 } /* ixgbe_free_receive_ring */
1457
1458 /************************************************************************
1459 * ixgbe_setup_receive_ring
1460 *
1461 * Initialize a receive ring and its buffers.
1462 ************************************************************************/
1463 static int
1464 ixgbe_setup_receive_ring(struct rx_ring *rxr)
1465 {
1466 struct adapter *adapter;
1467 struct ixgbe_rx_buf *rxbuf;
1468 #ifdef LRO
1469 struct ifnet *ifp;
1470 struct lro_ctrl *lro = &rxr->lro;
1471 #endif /* LRO */
1472 #ifdef DEV_NETMAP
1473 struct netmap_adapter *na = NA(rxr->adapter->ifp);
1474 struct netmap_slot *slot;
1475 #endif /* DEV_NETMAP */
1476 int rsize, error = 0;
1477
1478 adapter = rxr->adapter;
1479 #ifdef LRO
1480 ifp = adapter->ifp;
1481 #endif /* LRO */
1482
1483 /* Clear the ring contents */
1484 IXGBE_RX_LOCK(rxr);
1485
1486 #ifdef DEV_NETMAP
1487 if (adapter->feat_en & IXGBE_FEATURE_NETMAP)
1488 slot = netmap_reset(na, NR_RX, rxr->me, 0);
1489 #endif /* DEV_NETMAP */
1490
1491 rsize = roundup2(adapter->num_rx_desc *
1492 sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
1493 bzero((void *)rxr->rx_base, rsize);
1494 /* Cache the size */
1495 rxr->mbuf_sz = adapter->rx_mbuf_sz;
1496
1497 /* Free current RX buffer structs and their mbufs */
1498 ixgbe_free_receive_ring(rxr);
1499
1500 /* Now replenish the mbufs */
1501 for (int j = 0; j != rxr->num_desc; ++j) {
1502 struct mbuf *mp;
1503
1504 rxbuf = &rxr->rx_buffers[j];
1505
1506 #ifdef DEV_NETMAP
1507 /*
1508 * In netmap mode, fill the map and set the buffer
1509 * address in the NIC ring, considering the offset
1510 * between the netmap and NIC rings (see comment in
1511 * ixgbe_setup_transmit_ring() ). No need to allocate
1512 * an mbuf, so end the block with a continue;
1513 */
1514 if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) && slot) {
1515 int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
1516 uint64_t paddr;
1517 void *addr;
1518
1519 addr = PNMB(na, slot + sj, &paddr);
1520 netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
1521 /* Update descriptor and the cached value */
1522 rxr->rx_base[j].read.pkt_addr = htole64(paddr);
1523 rxbuf->addr = htole64(paddr);
1524 continue;
1525 }
1526 #endif /* DEV_NETMAP */
1527
1528 rxbuf->flags = 0;
1529 rxbuf->buf = ixgbe_getjcl(&adapter->jcl_head, M_NOWAIT,
1530 MT_DATA, M_PKTHDR, adapter->rx_mbuf_sz);
1531 if (rxbuf->buf == NULL) {
1532 error = ENOBUFS;
1533 goto fail;
1534 }
1535 mp = rxbuf->buf;
1536 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1537 /* Get the memory mapping */
1538 error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat, rxbuf->pmap,
1539 mp, BUS_DMA_NOWAIT);
1540 if (error != 0)
1541 goto fail;
1542 bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
1543 0, adapter->rx_mbuf_sz, BUS_DMASYNC_PREREAD);
1544 /* Update the descriptor and the cached value */
1545 rxr->rx_base[j].read.pkt_addr =
1546 htole64(rxbuf->pmap->dm_segs[0].ds_addr);
1547 rxbuf->addr = htole64(rxbuf->pmap->dm_segs[0].ds_addr);
1548 }
1549
1550
1551 /* Setup our descriptor indices */
1552 rxr->next_to_check = 0;
1553 rxr->next_to_refresh = 0;
1554 rxr->lro_enabled = FALSE;
1555 rxr->rx_copies.ev_count = 0;
1556 #if 0 /* NetBSD */
1557 rxr->rx_bytes.ev_count = 0;
1558 #if 1 /* Fix inconsistency */
1559 rxr->rx_packets.ev_count = 0;
1560 #endif
1561 #endif
1562 rxr->vtag_strip = FALSE;
1563
1564 ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1565 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1566
1567 /*
1568 * Now set up the LRO interface
1569 */
1570 if (ixgbe_rsc_enable)
1571 ixgbe_setup_hw_rsc(rxr);
1572 #ifdef LRO
1573 else if (ifp->if_capenable & IFCAP_LRO) {
1574 device_t dev = adapter->dev;
1575 int err = tcp_lro_init(lro);
1576 if (err) {
1577 device_printf(dev, "LRO Initialization failed!\n");
1578 goto fail;
1579 }
1580 INIT_DEBUGOUT("RX Soft LRO Initialized\n");
1581 rxr->lro_enabled = TRUE;
1582 lro->ifp = adapter->ifp;
1583 }
1584 #endif /* LRO */
1585
1586 IXGBE_RX_UNLOCK(rxr);
1587
1588 return (0);
1589
1590 fail:
1591 ixgbe_free_receive_ring(rxr);
1592 IXGBE_RX_UNLOCK(rxr);
1593
1594 return (error);
1595 } /* ixgbe_setup_receive_ring */
1596
1597 /************************************************************************
1598 * ixgbe_setup_receive_structures - Initialize all receive rings.
1599 ************************************************************************/
1600 int
1601 ixgbe_setup_receive_structures(struct adapter *adapter)
1602 {
1603 struct rx_ring *rxr = adapter->rx_rings;
1604 int j;
1605
1606 /*
1607 * Now reinitialize our supply of jumbo mbufs. The number
1608 * or size of jumbo mbufs may have changed.
1609 * Assume all of rxr->ptag are the same.
1610 */
1611 ixgbe_jcl_reinit(&adapter->jcl_head, rxr->ptag->dt_dmat,
1612 (2 * adapter->num_rx_desc) * adapter->num_queues,
1613 adapter->rx_mbuf_sz);
1614
1615 for (j = 0; j < adapter->num_queues; j++, rxr++)
1616 if (ixgbe_setup_receive_ring(rxr))
1617 goto fail;
1618
1619 return (0);
1620 fail:
1621 /*
1622 * Free RX buffers allocated so far, we will only handle
1623 * the rings that completed, the failing case will have
1624 * cleaned up for itself. 'j' failed, so its the terminus.
1625 */
1626 for (int i = 0; i < j; ++i) {
1627 rxr = &adapter->rx_rings[i];
1628 IXGBE_RX_LOCK(rxr);
1629 ixgbe_free_receive_ring(rxr);
1630 IXGBE_RX_UNLOCK(rxr);
1631 }
1632
1633 return (ENOBUFS);
1634 } /* ixgbe_setup_receive_structures */
1635
1636
1637 /************************************************************************
1638 * ixgbe_free_receive_structures - Free all receive rings.
1639 ************************************************************************/
1640 void
1641 ixgbe_free_receive_structures(struct adapter *adapter)
1642 {
1643 struct rx_ring *rxr = adapter->rx_rings;
1644
1645 INIT_DEBUGOUT("ixgbe_free_receive_structures: begin");
1646
1647 for (int i = 0; i < adapter->num_queues; i++, rxr++) {
1648 ixgbe_free_receive_buffers(rxr);
1649 #ifdef LRO
1650 /* Free LRO memory */
1651 tcp_lro_free(&rxr->lro);
1652 #endif /* LRO */
1653 /* Free the ring memory as well */
1654 ixgbe_dma_free(adapter, &rxr->rxdma);
1655 IXGBE_RX_LOCK_DESTROY(rxr);
1656 }
1657
1658 free(adapter->rx_rings, M_DEVBUF);
1659 } /* ixgbe_free_receive_structures */
1660
1661
1662 /************************************************************************
1663 * ixgbe_free_receive_buffers - Free receive ring data structures
1664 ************************************************************************/
1665 static void
1666 ixgbe_free_receive_buffers(struct rx_ring *rxr)
1667 {
1668 struct adapter *adapter = rxr->adapter;
1669 struct ixgbe_rx_buf *rxbuf;
1670
1671 INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin");
1672
1673 /* Cleanup any existing buffers */
1674 if (rxr->rx_buffers != NULL) {
1675 for (int i = 0; i < adapter->num_rx_desc; i++) {
1676 rxbuf = &rxr->rx_buffers[i];
1677 ixgbe_rx_discard(rxr, i);
1678 if (rxbuf->pmap != NULL) {
1679 ixgbe_dmamap_destroy(rxr->ptag, rxbuf->pmap);
1680 rxbuf->pmap = NULL;
1681 }
1682 }
1683 if (rxr->rx_buffers != NULL) {
1684 free(rxr->rx_buffers, M_DEVBUF);
1685 rxr->rx_buffers = NULL;
1686 }
1687 }
1688
1689 if (rxr->ptag != NULL) {
1690 ixgbe_dma_tag_destroy(rxr->ptag);
1691 rxr->ptag = NULL;
1692 }
1693
1694 return;
1695 } /* ixgbe_free_receive_buffers */
1696
1697 /************************************************************************
1698 * ixgbe_rx_input
1699 ************************************************************************/
1700 static __inline void
1701 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m,
1702 u32 ptype)
1703 {
1704 struct adapter *adapter = ifp->if_softc;
1705
1706 #ifdef LRO
1707 struct ethercom *ec = &adapter->osdep.ec;
1708
1709 /*
1710 * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
1711 * should be computed by hardware. Also it should not have VLAN tag in
1712 * ethernet header. In case of IPv6 we do not yet support ext. hdrs.
1713 */
1714 if (rxr->lro_enabled &&
1715 (ec->ec_capenable & ETHERCAP_VLAN_HWTAGGING) != 0 &&
1716 (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
1717 ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1718 (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
1719 (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1720 (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
1721 (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
1722 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
1723 /*
1724 * Send to the stack if:
1725 ** - LRO not enabled, or
1726 ** - no LRO resources, or
1727 ** - lro enqueue fails
1728 */
1729 if (rxr->lro.lro_cnt != 0)
1730 if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
1731 return;
1732 }
1733 #endif /* LRO */
1734
1735 if_percpuq_enqueue(adapter->ipq, m);
1736 } /* ixgbe_rx_input */
1737
1738 /************************************************************************
1739 * ixgbe_rx_discard
1740 ************************************************************************/
1741 static __inline void
1742 ixgbe_rx_discard(struct rx_ring *rxr, int i)
1743 {
1744 struct ixgbe_rx_buf *rbuf;
1745
1746 rbuf = &rxr->rx_buffers[i];
1747
1748 /*
1749 * With advanced descriptors the writeback
1750 * clobbers the buffer addrs, so its easier
1751 * to just free the existing mbufs and take
1752 * the normal refresh path to get new buffers
1753 * and mapping.
1754 */
1755
1756 if (rbuf->fmp != NULL) {/* Partial chain ? */
1757 bus_dmamap_sync(rxr->ptag->dt_dmat, rbuf->pmap, 0,
1758 rbuf->buf->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
1759 m_freem(rbuf->fmp);
1760 rbuf->fmp = NULL;
1761 rbuf->buf = NULL; /* rbuf->buf is part of fmp's chain */
1762 } else if (rbuf->buf) {
1763 bus_dmamap_sync(rxr->ptag->dt_dmat, rbuf->pmap, 0,
1764 rbuf->buf->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
1765 m_free(rbuf->buf);
1766 rbuf->buf = NULL;
1767 }
1768 ixgbe_dmamap_unload(rxr->ptag, rbuf->pmap);
1769
1770 rbuf->flags = 0;
1771
1772 return;
1773 } /* ixgbe_rx_discard */
1774
1775
1776 /************************************************************************
1777 * ixgbe_rxeof
1778 *
1779 * Executes in interrupt context. It replenishes the
1780 * mbufs in the descriptor and sends data which has
1781 * been dma'ed into host memory to upper layer.
1782 *
1783 * Return TRUE for more work, FALSE for all clean.
1784 ************************************************************************/
1785 bool
1786 ixgbe_rxeof(struct ix_queue *que)
1787 {
1788 struct adapter *adapter = que->adapter;
1789 struct rx_ring *rxr = que->rxr;
1790 struct ifnet *ifp = adapter->ifp;
1791 #ifdef LRO
1792 struct lro_ctrl *lro = &rxr->lro;
1793 #endif /* LRO */
1794 union ixgbe_adv_rx_desc *cur;
1795 struct ixgbe_rx_buf *rbuf, *nbuf;
1796 int i, nextp, processed = 0;
1797 u32 staterr = 0;
1798 u32 count = adapter->rx_process_limit;
1799 #ifdef RSS
1800 u16 pkt_info;
1801 #endif
1802
1803 IXGBE_RX_LOCK(rxr);
1804
1805 #ifdef DEV_NETMAP
1806 if (adapter->feat_en & IXGBE_FEATURE_NETMAP) {
1807 /* Same as the txeof routine: wakeup clients on intr. */
1808 if (netmap_rx_irq(ifp, rxr->me, &processed)) {
1809 IXGBE_RX_UNLOCK(rxr);
1810 return (FALSE);
1811 }
1812 }
1813 #endif /* DEV_NETMAP */
1814
1815 for (i = rxr->next_to_check; count != 0;) {
1816 struct mbuf *sendmp, *mp;
1817 u32 rsc, ptype;
1818 u16 len;
1819 u16 vtag = 0;
1820 bool eop;
1821
1822 /* Sync the ring. */
1823 ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1824 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1825
1826 cur = &rxr->rx_base[i];
1827 staterr = le32toh(cur->wb.upper.status_error);
1828 #ifdef RSS
1829 pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
1830 #endif
1831
1832 if ((staterr & IXGBE_RXD_STAT_DD) == 0)
1833 break;
1834 if ((ifp->if_flags & IFF_RUNNING) == 0)
1835 break;
1836
1837 count--;
1838 sendmp = NULL;
1839 nbuf = NULL;
1840 rsc = 0;
1841 cur->wb.upper.status_error = 0;
1842 rbuf = &rxr->rx_buffers[i];
1843 mp = rbuf->buf;
1844
1845 len = le16toh(cur->wb.upper.length);
1846 ptype = le32toh(cur->wb.lower.lo_dword.data) &
1847 IXGBE_RXDADV_PKTTYPE_MASK;
1848 eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
1849
1850 /* Make sure bad packets are discarded */
1851 if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) {
1852 #if __FreeBSD_version >= 1100036
1853 if (adapter->feat_en & IXGBE_FEATURE_VF)
1854 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
1855 #endif
1856 rxr->rx_discarded.ev_count++;
1857 ixgbe_rx_discard(rxr, i);
1858 goto next_desc;
1859 }
1860
1861 bus_dmamap_sync(rxr->ptag->dt_dmat, rbuf->pmap, 0,
1862 rbuf->buf->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
1863
1864 /*
1865 * On 82599 which supports a hardware
1866 * LRO (called HW RSC), packets need
1867 * not be fragmented across sequential
1868 * descriptors, rather the next descriptor
1869 * is indicated in bits of the descriptor.
1870 * This also means that we might proceses
1871 * more than one packet at a time, something
1872 * that has never been true before, it
1873 * required eliminating global chain pointers
1874 * in favor of what we are doing here. -jfv
1875 */
1876 if (!eop) {
1877 /*
1878 * Figure out the next descriptor
1879 * of this frame.
1880 */
1881 if (rxr->hw_rsc == TRUE) {
1882 rsc = ixgbe_rsc_count(cur);
1883 rxr->rsc_num += (rsc - 1);
1884 }
1885 if (rsc) { /* Get hardware index */
1886 nextp = ((staterr & IXGBE_RXDADV_NEXTP_MASK) >>
1887 IXGBE_RXDADV_NEXTP_SHIFT);
1888 } else { /* Just sequential */
1889 nextp = i + 1;
1890 if (nextp == adapter->num_rx_desc)
1891 nextp = 0;
1892 }
1893 nbuf = &rxr->rx_buffers[nextp];
1894 prefetch(nbuf);
1895 }
1896 /*
1897 * Rather than using the fmp/lmp global pointers
1898 * we now keep the head of a packet chain in the
1899 * buffer struct and pass this along from one
1900 * descriptor to the next, until we get EOP.
1901 */
1902 mp->m_len = len;
1903 /*
1904 * See if there is a stored head
1905 * that determines what we are
1906 */
1907 sendmp = rbuf->fmp;
1908 if (sendmp != NULL) { /* secondary frag */
1909 rbuf->buf = rbuf->fmp = NULL;
1910 mp->m_flags &= ~M_PKTHDR;
1911 sendmp->m_pkthdr.len += mp->m_len;
1912 } else {
1913 /*
1914 * Optimize. This might be a small packet,
1915 * maybe just a TCP ACK. Do a fast copy that
1916 * is cache aligned into a new mbuf, and
1917 * leave the old mbuf+cluster for re-use.
1918 */
1919 if (eop && len <= IXGBE_RX_COPY_LEN) {
1920 sendmp = m_gethdr(M_NOWAIT, MT_DATA);
1921 if (sendmp != NULL) {
1922 sendmp->m_data += IXGBE_RX_COPY_ALIGN;
1923 ixgbe_bcopy(mp->m_data, sendmp->m_data,
1924 len);
1925 sendmp->m_len = len;
1926 rxr->rx_copies.ev_count++;
1927 rbuf->flags |= IXGBE_RX_COPY;
1928 }
1929 }
1930 if (sendmp == NULL) {
1931 rbuf->buf = rbuf->fmp = NULL;
1932 sendmp = mp;
1933 }
1934
1935 /* first desc of a non-ps chain */
1936 sendmp->m_flags |= M_PKTHDR;
1937 sendmp->m_pkthdr.len = mp->m_len;
1938 }
1939 ++processed;
1940
1941 /* Pass the head pointer on */
1942 if (eop == 0) {
1943 nbuf->fmp = sendmp;
1944 sendmp = NULL;
1945 mp->m_next = nbuf->buf;
1946 } else { /* Sending this frame */
1947 m_set_rcvif(sendmp, ifp);
1948 ++rxr->packets;
1949 rxr->rx_packets.ev_count++;
1950 /* capture data for AIM */
1951 rxr->bytes += sendmp->m_pkthdr.len;
1952 rxr->rx_bytes.ev_count += sendmp->m_pkthdr.len;
1953 /* Process vlan info */
1954 if ((rxr->vtag_strip) && (staterr & IXGBE_RXD_STAT_VP))
1955 vtag = le16toh(cur->wb.upper.vlan);
1956 if (vtag) {
1957 vlan_set_tag(sendmp, vtag);
1958 }
1959 if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) {
1960 ixgbe_rx_checksum(staterr, sendmp, ptype,
1961 &adapter->stats.pf);
1962 }
1963
1964 #if 0 /* FreeBSD */
1965 /*
1966 * In case of multiqueue, we have RXCSUM.PCSD bit set
1967 * and never cleared. This means we have RSS hash
1968 * available to be used.
1969 */
1970 if (adapter->num_queues > 1) {
1971 sendmp->m_pkthdr.flowid =
1972 le32toh(cur->wb.lower.hi_dword.rss);
1973 switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) {
1974 case IXGBE_RXDADV_RSSTYPE_IPV4:
1975 M_HASHTYPE_SET(sendmp,
1976 M_HASHTYPE_RSS_IPV4);
1977 break;
1978 case IXGBE_RXDADV_RSSTYPE_IPV4_TCP:
1979 M_HASHTYPE_SET(sendmp,
1980 M_HASHTYPE_RSS_TCP_IPV4);
1981 break;
1982 case IXGBE_RXDADV_RSSTYPE_IPV6:
1983 M_HASHTYPE_SET(sendmp,
1984 M_HASHTYPE_RSS_IPV6);
1985 break;
1986 case IXGBE_RXDADV_RSSTYPE_IPV6_TCP:
1987 M_HASHTYPE_SET(sendmp,
1988 M_HASHTYPE_RSS_TCP_IPV6);
1989 break;
1990 case IXGBE_RXDADV_RSSTYPE_IPV6_EX:
1991 M_HASHTYPE_SET(sendmp,
1992 M_HASHTYPE_RSS_IPV6_EX);
1993 break;
1994 case IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX:
1995 M_HASHTYPE_SET(sendmp,
1996 M_HASHTYPE_RSS_TCP_IPV6_EX);
1997 break;
1998 #if __FreeBSD_version > 1100000
1999 case IXGBE_RXDADV_RSSTYPE_IPV4_UDP:
2000 M_HASHTYPE_SET(sendmp,
2001 M_HASHTYPE_RSS_UDP_IPV4);
2002 break;
2003 case IXGBE_RXDADV_RSSTYPE_IPV6_UDP:
2004 M_HASHTYPE_SET(sendmp,
2005 M_HASHTYPE_RSS_UDP_IPV6);
2006 break;
2007 case IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX:
2008 M_HASHTYPE_SET(sendmp,
2009 M_HASHTYPE_RSS_UDP_IPV6_EX);
2010 break;
2011 #endif
2012 default:
2013 M_HASHTYPE_SET(sendmp,
2014 M_HASHTYPE_OPAQUE_HASH);
2015 }
2016 } else {
2017 sendmp->m_pkthdr.flowid = que->msix;
2018 M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
2019 }
2020 #endif
2021 }
2022 next_desc:
2023 ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
2024 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2025
2026 /* Advance our pointers to the next descriptor. */
2027 if (++i == rxr->num_desc)
2028 i = 0;
2029
2030 /* Now send to the stack or do LRO */
2031 if (sendmp != NULL) {
2032 rxr->next_to_check = i;
2033 IXGBE_RX_UNLOCK(rxr);
2034 ixgbe_rx_input(rxr, ifp, sendmp, ptype);
2035 IXGBE_RX_LOCK(rxr);
2036 i = rxr->next_to_check;
2037 }
2038
2039 /* Every 8 descriptors we go to refresh mbufs */
2040 if (processed == 8) {
2041 ixgbe_refresh_mbufs(rxr, i);
2042 processed = 0;
2043 }
2044 }
2045
2046 /* Refresh any remaining buf structs */
2047 if (ixgbe_rx_unrefreshed(rxr))
2048 ixgbe_refresh_mbufs(rxr, i);
2049
2050 rxr->next_to_check = i;
2051
2052 IXGBE_RX_UNLOCK(rxr);
2053
2054 #ifdef LRO
2055 /*
2056 * Flush any outstanding LRO work
2057 */
2058 tcp_lro_flush_all(lro);
2059 #endif /* LRO */
2060
2061 /*
2062 * Still have cleaning to do?
2063 */
2064 if ((staterr & IXGBE_RXD_STAT_DD) != 0)
2065 return (TRUE);
2066
2067 return (FALSE);
2068 } /* ixgbe_rxeof */
2069
2070
2071 /************************************************************************
2072 * ixgbe_rx_checksum
2073 *
2074 * Verify that the hardware indicated that the checksum is valid.
2075 * Inform the stack about the status of checksum so that stack
2076 * doesn't spend time verifying the checksum.
2077 ************************************************************************/
2078 static void
2079 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype,
2080 struct ixgbe_hw_stats *stats)
2081 {
2082 u16 status = (u16)staterr;
2083 u8 errors = (u8)(staterr >> 24);
2084 #if 0
2085 bool sctp = false;
2086
2087 if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
2088 (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
2089 sctp = true;
2090 #endif
2091
2092 /* IPv4 checksum */
2093 if (status & IXGBE_RXD_STAT_IPCS) {
2094 stats->ipcs.ev_count++;
2095 if (!(errors & IXGBE_RXD_ERR_IPE)) {
2096 /* IP Checksum Good */
2097 mp->m_pkthdr.csum_flags = M_CSUM_IPv4;
2098 } else {
2099 stats->ipcs_bad.ev_count++;
2100 mp->m_pkthdr.csum_flags = M_CSUM_IPv4|M_CSUM_IPv4_BAD;
2101 }
2102 }
2103 /* TCP/UDP/SCTP checksum */
2104 if (status & IXGBE_RXD_STAT_L4CS) {
2105 stats->l4cs.ev_count++;
2106 int type = M_CSUM_TCPv4|M_CSUM_TCPv6|M_CSUM_UDPv4|M_CSUM_UDPv6;
2107 if (!(errors & IXGBE_RXD_ERR_TCPE)) {
2108 mp->m_pkthdr.csum_flags |= type;
2109 } else {
2110 stats->l4cs_bad.ev_count++;
2111 mp->m_pkthdr.csum_flags |= type | M_CSUM_TCP_UDP_BAD;
2112 }
2113 }
2114 } /* ixgbe_rx_checksum */
2115
2116 /************************************************************************
2117 * ixgbe_dma_malloc
2118 ************************************************************************/
2119 int
2120 ixgbe_dma_malloc(struct adapter *adapter, const bus_size_t size,
2121 struct ixgbe_dma_alloc *dma, const int mapflags)
2122 {
2123 device_t dev = adapter->dev;
2124 int r, rsegs;
2125
2126 r = ixgbe_dma_tag_create(
2127 /* parent */ adapter->osdep.dmat,
2128 /* alignment */ DBA_ALIGN,
2129 /* bounds */ 0,
2130 /* maxsize */ size,
2131 /* nsegments */ 1,
2132 /* maxsegsize */ size,
2133 /* flags */ BUS_DMA_ALLOCNOW,
2134 &dma->dma_tag);
2135 if (r != 0) {
2136 aprint_error_dev(dev,
2137 "%s: ixgbe_dma_tag_create failed; error %d\n", __func__, r);
2138 goto fail_0;
2139 }
2140
2141 r = bus_dmamem_alloc(dma->dma_tag->dt_dmat, size,
2142 dma->dma_tag->dt_alignment, dma->dma_tag->dt_boundary,
2143 &dma->dma_seg, 1, &rsegs, BUS_DMA_NOWAIT);
2144 if (r != 0) {
2145 aprint_error_dev(dev,
2146 "%s: bus_dmamem_alloc failed; error %d\n", __func__, r);
2147 goto fail_1;
2148 }
2149
2150 r = bus_dmamem_map(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs,
2151 size, &dma->dma_vaddr, BUS_DMA_NOWAIT);
2152 if (r != 0) {
2153 aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
2154 __func__, r);
2155 goto fail_2;
2156 }
2157
2158 r = ixgbe_dmamap_create(dma->dma_tag, 0, &dma->dma_map);
2159 if (r != 0) {
2160 aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
2161 __func__, r);
2162 goto fail_3;
2163 }
2164
2165 r = bus_dmamap_load(dma->dma_tag->dt_dmat, dma->dma_map,
2166 dma->dma_vaddr, size, NULL, mapflags | BUS_DMA_NOWAIT);
2167 if (r != 0) {
2168 aprint_error_dev(dev, "%s: bus_dmamap_load failed; error %d\n",
2169 __func__, r);
2170 goto fail_4;
2171 }
2172 dma->dma_paddr = dma->dma_map->dm_segs[0].ds_addr;
2173 dma->dma_size = size;
2174 return 0;
2175 fail_4:
2176 ixgbe_dmamap_destroy(dma->dma_tag, dma->dma_map);
2177 fail_3:
2178 bus_dmamem_unmap(dma->dma_tag->dt_dmat, dma->dma_vaddr, size);
2179 fail_2:
2180 bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs);
2181 fail_1:
2182 ixgbe_dma_tag_destroy(dma->dma_tag);
2183 fail_0:
2184
2185 return (r);
2186 } /* ixgbe_dma_malloc */
2187
2188 /************************************************************************
2189 * ixgbe_dma_free
2190 ************************************************************************/
2191 void
2192 ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma)
2193 {
2194 bus_dmamap_sync(dma->dma_tag->dt_dmat, dma->dma_map, 0, dma->dma_size,
2195 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2196 ixgbe_dmamap_unload(dma->dma_tag, dma->dma_map);
2197 bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, 1);
2198 ixgbe_dma_tag_destroy(dma->dma_tag);
2199 } /* ixgbe_dma_free */
2200
2201
2202 /************************************************************************
2203 * ixgbe_allocate_queues
2204 *
2205 * Allocate memory for the transmit and receive rings, and then
2206 * the descriptors associated with each, called only once at attach.
2207 ************************************************************************/
2208 int
2209 ixgbe_allocate_queues(struct adapter *adapter)
2210 {
2211 device_t dev = adapter->dev;
2212 struct ix_queue *que;
2213 struct tx_ring *txr;
2214 struct rx_ring *rxr;
2215 int rsize, tsize, error = IXGBE_SUCCESS;
2216 int txconf = 0, rxconf = 0;
2217
2218 /* First, allocate the top level queue structs */
2219 adapter->queues = (struct ix_queue *)malloc(sizeof(struct ix_queue) *
2220 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO);
2221 if (adapter->queues == NULL) {
2222 aprint_error_dev(dev, "Unable to allocate queue memory\n");
2223 error = ENOMEM;
2224 goto fail;
2225 }
2226
2227 /* Second, allocate the TX ring struct memory */
2228 adapter->tx_rings = (struct tx_ring *)malloc(sizeof(struct tx_ring) *
2229 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO);
2230 if (adapter->tx_rings == NULL) {
2231 aprint_error_dev(dev, "Unable to allocate TX ring memory\n");
2232 error = ENOMEM;
2233 goto tx_fail;
2234 }
2235
2236 /* Third, allocate the RX ring */
2237 adapter->rx_rings = (struct rx_ring *)malloc(sizeof(struct rx_ring) *
2238 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO);
2239 if (adapter->rx_rings == NULL) {
2240 aprint_error_dev(dev, "Unable to allocate RX ring memory\n");
2241 error = ENOMEM;
2242 goto rx_fail;
2243 }
2244
2245 /* For the ring itself */
2246 tsize = roundup2(adapter->num_tx_desc * sizeof(union ixgbe_adv_tx_desc),
2247 DBA_ALIGN);
2248
2249 /*
2250 * Now set up the TX queues, txconf is needed to handle the
2251 * possibility that things fail midcourse and we need to
2252 * undo memory gracefully
2253 */
2254 for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2255 /* Set up some basics */
2256 txr = &adapter->tx_rings[i];
2257 txr->adapter = adapter;
2258 txr->txr_interq = NULL;
2259 /* In case SR-IOV is enabled, align the index properly */
2260 #ifdef PCI_IOV
2261 txr->me = ixgbe_vf_que_index(adapter->iov_mode, adapter->pool,
2262 i);
2263 #else
2264 txr->me = i;
2265 #endif
2266 txr->num_desc = adapter->num_tx_desc;
2267
2268 /* Initialize the TX side lock */
2269 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
2270 device_xname(dev), txr->me);
2271 mutex_init(&txr->tx_mtx, MUTEX_DEFAULT, IPL_NET);
2272
2273 if (ixgbe_dma_malloc(adapter, tsize, &txr->txdma,
2274 BUS_DMA_NOWAIT)) {
2275 aprint_error_dev(dev,
2276 "Unable to allocate TX Descriptor memory\n");
2277 error = ENOMEM;
2278 goto err_tx_desc;
2279 }
2280 txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
2281 bzero((void *)txr->tx_base, tsize);
2282
2283 /* Now allocate transmit buffers for the ring */
2284 if (ixgbe_allocate_transmit_buffers(txr)) {
2285 aprint_error_dev(dev,
2286 "Critical Failure setting up transmit buffers\n");
2287 error = ENOMEM;
2288 goto err_tx_desc;
2289 }
2290 if (!(adapter->feat_en & IXGBE_FEATURE_LEGACY_TX)) {
2291 /* Allocate a buf ring */
2292 txr->txr_interq = pcq_create(IXGBE_BR_SIZE, KM_SLEEP);
2293 if (txr->txr_interq == NULL) {
2294 aprint_error_dev(dev,
2295 "Critical Failure setting up buf ring\n");
2296 error = ENOMEM;
2297 goto err_tx_desc;
2298 }
2299 }
2300 }
2301
2302 /*
2303 * Next the RX queues...
2304 */
2305 rsize = roundup2(adapter->num_rx_desc * sizeof(union ixgbe_adv_rx_desc),
2306 DBA_ALIGN);
2307 for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2308 rxr = &adapter->rx_rings[i];
2309 /* Set up some basics */
2310 rxr->adapter = adapter;
2311 #ifdef PCI_IOV
2312 /* In case SR-IOV is enabled, align the index properly */
2313 rxr->me = ixgbe_vf_que_index(adapter->iov_mode, adapter->pool,
2314 i);
2315 #else
2316 rxr->me = i;
2317 #endif
2318 rxr->num_desc = adapter->num_rx_desc;
2319
2320 /* Initialize the RX side lock */
2321 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
2322 device_xname(dev), rxr->me);
2323 mutex_init(&rxr->rx_mtx, MUTEX_DEFAULT, IPL_NET);
2324
2325 if (ixgbe_dma_malloc(adapter, rsize, &rxr->rxdma,
2326 BUS_DMA_NOWAIT)) {
2327 aprint_error_dev(dev,
2328 "Unable to allocate RxDescriptor memory\n");
2329 error = ENOMEM;
2330 goto err_rx_desc;
2331 }
2332 rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2333 bzero((void *)rxr->rx_base, rsize);
2334
2335 /* Allocate receive buffers for the ring */
2336 if (ixgbe_allocate_receive_buffers(rxr)) {
2337 aprint_error_dev(dev,
2338 "Critical Failure setting up receive buffers\n");
2339 error = ENOMEM;
2340 goto err_rx_desc;
2341 }
2342 }
2343
2344 /*
2345 * Finally set up the queue holding structs
2346 */
2347 for (int i = 0; i < adapter->num_queues; i++) {
2348 que = &adapter->queues[i];
2349 que->adapter = adapter;
2350 que->me = i;
2351 que->txr = &adapter->tx_rings[i];
2352 que->rxr = &adapter->rx_rings[i];
2353
2354 mutex_init(&que->dc_mtx, MUTEX_DEFAULT, IPL_NET);
2355 que->disabled_count = 0;
2356 }
2357
2358 return (0);
2359
2360 err_rx_desc:
2361 for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2362 ixgbe_dma_free(adapter, &rxr->rxdma);
2363 err_tx_desc:
2364 for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2365 ixgbe_dma_free(adapter, &txr->txdma);
2366 free(adapter->rx_rings, M_DEVBUF);
2367 rx_fail:
2368 free(adapter->tx_rings, M_DEVBUF);
2369 tx_fail:
2370 free(adapter->queues, M_DEVBUF);
2371 fail:
2372 return (error);
2373 } /* ixgbe_allocate_queues */
2374