ix_txrx.c revision 1.56 1 /* $NetBSD: ix_txrx.c,v 1.56 2019/10/16 06:36:00 knakahara Exp $ */
2
3 /******************************************************************************
4
5 Copyright (c) 2001-2017, Intel Corporation
6 All rights reserved.
7
8 Redistribution and use in source and binary forms, with or without
9 modification, are permitted provided that the following conditions are met:
10
11 1. Redistributions of source code must retain the above copyright notice,
12 this list of conditions and the following disclaimer.
13
14 2. Redistributions in binary form must reproduce the above copyright
15 notice, this list of conditions and the following disclaimer in the
16 documentation and/or other materials provided with the distribution.
17
18 3. Neither the name of the Intel Corporation nor the names of its
19 contributors may be used to endorse or promote products derived from
20 this software without specific prior written permission.
21
22 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
23 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
26 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
29 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
30 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32 POSSIBILITY OF SUCH DAMAGE.
33
34 ******************************************************************************/
35 /*$FreeBSD: head/sys/dev/ixgbe/ix_txrx.c 327031 2017-12-20 18:15:06Z erj $*/
36
37 /*
38 * Copyright (c) 2011 The NetBSD Foundation, Inc.
39 * All rights reserved.
40 *
41 * This code is derived from software contributed to The NetBSD Foundation
42 * by Coyote Point Systems, Inc.
43 *
44 * Redistribution and use in source and binary forms, with or without
45 * modification, are permitted provided that the following conditions
46 * are met:
47 * 1. Redistributions of source code must retain the above copyright
48 * notice, this list of conditions and the following disclaimer.
49 * 2. Redistributions in binary form must reproduce the above copyright
50 * notice, this list of conditions and the following disclaimer in the
51 * documentation and/or other materials provided with the distribution.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
54 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
55 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
56 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
57 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
58 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
59 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
60 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
61 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
62 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
63 * POSSIBILITY OF SUCH DAMAGE.
64 */
65
66 #include "opt_inet.h"
67 #include "opt_inet6.h"
68
69 #include "ixgbe.h"
70
71 /*
72 * HW RSC control:
73 * this feature only works with
74 * IPv4, and only on 82599 and later.
75 * Also this will cause IP forwarding to
76 * fail and that can't be controlled by
77 * the stack as LRO can. For all these
78 * reasons I've deemed it best to leave
79 * this off and not bother with a tuneable
80 * interface, this would need to be compiled
81 * to enable.
82 */
83 static bool ixgbe_rsc_enable = FALSE;
84
85 /*
86 * For Flow Director: this is the
87 * number of TX packets we sample
88 * for the filter pool, this means
89 * every 20th packet will be probed.
90 *
91 * This feature can be disabled by
92 * setting this to 0.
93 */
94 static int atr_sample_rate = 20;
95
96 /************************************************************************
97 * Local Function prototypes
98 ************************************************************************/
99 static void ixgbe_setup_transmit_ring(struct tx_ring *);
100 static void ixgbe_free_transmit_buffers(struct tx_ring *);
101 static int ixgbe_setup_receive_ring(struct rx_ring *);
102 static void ixgbe_free_receive_buffers(struct rx_ring *);
103 static void ixgbe_rx_checksum(u32, struct mbuf *, u32,
104 struct ixgbe_hw_stats *);
105 static void ixgbe_refresh_mbufs(struct rx_ring *, int);
106 static void ixgbe_drain(struct ifnet *, struct tx_ring *);
107 static int ixgbe_xmit(struct tx_ring *, struct mbuf *);
108 static int ixgbe_tx_ctx_setup(struct tx_ring *,
109 struct mbuf *, u32 *, u32 *);
110 static int ixgbe_tso_setup(struct tx_ring *,
111 struct mbuf *, u32 *, u32 *);
112 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
113 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
114 struct mbuf *, u32);
115 static int ixgbe_dma_malloc(struct adapter *, bus_size_t,
116 struct ixgbe_dma_alloc *, int);
117 static void ixgbe_dma_free(struct adapter *, struct ixgbe_dma_alloc *);
118
119 static void ixgbe_setup_hw_rsc(struct rx_ring *);
120
121 /************************************************************************
122 * ixgbe_legacy_start_locked - Transmit entry point
123 *
124 * Called by the stack to initiate a transmit.
125 * The driver will remain in this routine as long as there are
126 * packets to transmit and transmit resources are available.
127 * In case resources are not available, the stack is notified
128 * and the packet is requeued.
129 ************************************************************************/
130 int
131 ixgbe_legacy_start_locked(struct ifnet *ifp, struct tx_ring *txr)
132 {
133 int rc;
134 struct mbuf *m_head;
135 struct adapter *adapter = txr->adapter;
136
137 IXGBE_TX_LOCK_ASSERT(txr);
138
139 if (adapter->link_active != LINK_STATE_UP) {
140 /*
141 * discard all packets buffered in IFQ to avoid
142 * sending old packets at next link up timing.
143 */
144 ixgbe_drain(ifp, txr);
145 return (ENETDOWN);
146 }
147 if ((ifp->if_flags & IFF_RUNNING) == 0)
148 return (ENETDOWN);
149 if (txr->txr_no_space)
150 return (ENETDOWN);
151
152 while (!IFQ_IS_EMPTY(&ifp->if_snd)) {
153 if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
154 break;
155
156 IFQ_POLL(&ifp->if_snd, m_head);
157 if (m_head == NULL)
158 break;
159
160 if ((rc = ixgbe_xmit(txr, m_head)) == EAGAIN) {
161 break;
162 }
163 IFQ_DEQUEUE(&ifp->if_snd, m_head);
164 if (rc != 0) {
165 m_freem(m_head);
166 continue;
167 }
168
169 /* Send a copy of the frame to the BPF listener */
170 bpf_mtap(ifp, m_head, BPF_D_OUT);
171 }
172
173 return IXGBE_SUCCESS;
174 } /* ixgbe_legacy_start_locked */
175
176 /************************************************************************
177 * ixgbe_legacy_start
178 *
179 * Called by the stack, this always uses the first tx ring,
180 * and should not be used with multiqueue tx enabled.
181 ************************************************************************/
182 void
183 ixgbe_legacy_start(struct ifnet *ifp)
184 {
185 struct adapter *adapter = ifp->if_softc;
186 struct tx_ring *txr = adapter->tx_rings;
187
188 if (ifp->if_flags & IFF_RUNNING) {
189 IXGBE_TX_LOCK(txr);
190 ixgbe_legacy_start_locked(ifp, txr);
191 IXGBE_TX_UNLOCK(txr);
192 }
193 } /* ixgbe_legacy_start */
194
195 /************************************************************************
196 * ixgbe_mq_start - Multiqueue Transmit Entry Point
197 *
198 * (if_transmit function)
199 ************************************************************************/
200 int
201 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
202 {
203 struct adapter *adapter = ifp->if_softc;
204 struct tx_ring *txr;
205 int i;
206 #ifdef RSS
207 uint32_t bucket_id;
208 #endif
209
210 /*
211 * When doing RSS, map it to the same outbound queue
212 * as the incoming flow would be mapped to.
213 *
214 * If everything is setup correctly, it should be the
215 * same bucket that the current CPU we're on is.
216 */
217 #ifdef RSS
218 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
219 if ((adapter->feat_en & IXGBE_FEATURE_RSS) &&
220 (rss_hash2bucket(m->m_pkthdr.flowid, M_HASHTYPE_GET(m),
221 &bucket_id) == 0)) {
222 i = bucket_id % adapter->num_queues;
223 #ifdef IXGBE_DEBUG
224 if (bucket_id > adapter->num_queues)
225 if_printf(ifp,
226 "bucket_id (%d) > num_queues (%d)\n",
227 bucket_id, adapter->num_queues);
228 #endif
229 } else
230 i = m->m_pkthdr.flowid % adapter->num_queues;
231 } else
232 #endif /* 0 */
233 i = (cpu_index(curcpu()) % ncpu) % adapter->num_queues;
234
235 /* Check for a hung queue and pick alternative */
236 if (((1ULL << i) & adapter->active_queues) == 0)
237 i = ffs64(adapter->active_queues);
238
239 txr = &adapter->tx_rings[i];
240
241 if (__predict_false(!pcq_put(txr->txr_interq, m))) {
242 m_freem(m);
243 txr->pcq_drops.ev_count++;
244 return ENOBUFS;
245 }
246 if (IXGBE_TX_TRYLOCK(txr)) {
247 ixgbe_mq_start_locked(ifp, txr);
248 IXGBE_TX_UNLOCK(txr);
249 } else {
250 if (adapter->txrx_use_workqueue) {
251 u_int *enqueued;
252
253 /*
254 * This function itself is not called in interrupt
255 * context, however it can be called in fast softint
256 * context right after receiving forwarding packets.
257 * So, it is required to protect workqueue from twice
258 * enqueuing when the machine uses both spontaneous
259 * packets and forwarding packets.
260 */
261 enqueued = percpu_getref(adapter->txr_wq_enqueued);
262 if (*enqueued == 0) {
263 *enqueued = 1;
264 percpu_putref(adapter->txr_wq_enqueued);
265 workqueue_enqueue(adapter->txr_wq,
266 &txr->wq_cookie, curcpu());
267 } else
268 percpu_putref(adapter->txr_wq_enqueued);
269 } else {
270 kpreempt_disable();
271 softint_schedule(txr->txr_si);
272 kpreempt_enable();
273 }
274 }
275
276 return (0);
277 } /* ixgbe_mq_start */
278
279 /************************************************************************
280 * ixgbe_mq_start_locked
281 ************************************************************************/
282 int
283 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
284 {
285 struct mbuf *next;
286 int enqueued = 0, err = 0;
287
288 if (txr->adapter->link_active != LINK_STATE_UP) {
289 /*
290 * discard all packets buffered in txr_interq to avoid
291 * sending old packets at next link up timing.
292 */
293 ixgbe_drain(ifp, txr);
294 return (ENETDOWN);
295 }
296 if ((ifp->if_flags & IFF_RUNNING) == 0)
297 return (ENETDOWN);
298 if (txr->txr_no_space)
299 return (ENETDOWN);
300
301 /* Process the queue */
302 while ((next = pcq_get(txr->txr_interq)) != NULL) {
303 if ((err = ixgbe_xmit(txr, next)) != 0) {
304 m_freem(next);
305 /* All errors are counted in ixgbe_xmit() */
306 break;
307 }
308 enqueued++;
309 #if __FreeBSD_version >= 1100036
310 /*
311 * Since we're looking at the tx ring, we can check
312 * to see if we're a VF by examing our tail register
313 * address.
314 */
315 if ((txr->adapter->feat_en & IXGBE_FEATURE_VF) &&
316 (next->m_flags & M_MCAST))
317 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
318 #endif
319 /* Send a copy of the frame to the BPF listener */
320 bpf_mtap(ifp, next, BPF_D_OUT);
321 if ((ifp->if_flags & IFF_RUNNING) == 0)
322 break;
323 }
324
325 if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD(txr->adapter))
326 ixgbe_txeof(txr);
327
328 return (err);
329 } /* ixgbe_mq_start_locked */
330
331 /************************************************************************
332 * ixgbe_deferred_mq_start
333 *
334 * Called from a softint and workqueue (indirectly) to drain queued
335 * transmit packets.
336 ************************************************************************/
337 void
338 ixgbe_deferred_mq_start(void *arg)
339 {
340 struct tx_ring *txr = arg;
341 struct adapter *adapter = txr->adapter;
342 struct ifnet *ifp = adapter->ifp;
343
344 IXGBE_TX_LOCK(txr);
345 if (pcq_peek(txr->txr_interq) != NULL)
346 ixgbe_mq_start_locked(ifp, txr);
347 IXGBE_TX_UNLOCK(txr);
348 } /* ixgbe_deferred_mq_start */
349
350 /************************************************************************
351 * ixgbe_deferred_mq_start_work
352 *
353 * Called from a workqueue to drain queued transmit packets.
354 ************************************************************************/
355 void
356 ixgbe_deferred_mq_start_work(struct work *wk, void *arg)
357 {
358 struct tx_ring *txr = container_of(wk, struct tx_ring, wq_cookie);
359 struct adapter *adapter = txr->adapter;
360 u_int *enqueued = percpu_getref(adapter->txr_wq_enqueued);
361 *enqueued = 0;
362 percpu_putref(adapter->txr_wq_enqueued);
363
364 ixgbe_deferred_mq_start(txr);
365 } /* ixgbe_deferred_mq_start */
366
367 /************************************************************************
368 * ixgbe_drain_all
369 ************************************************************************/
370 void
371 ixgbe_drain_all(struct adapter *adapter)
372 {
373 struct ifnet *ifp = adapter->ifp;
374 struct ix_queue *que = adapter->queues;
375
376 for (int i = 0; i < adapter->num_queues; i++, que++) {
377 struct tx_ring *txr = que->txr;
378
379 IXGBE_TX_LOCK(txr);
380 ixgbe_drain(ifp, txr);
381 IXGBE_TX_UNLOCK(txr);
382 }
383 }
384
385 /************************************************************************
386 * ixgbe_xmit
387 *
388 * Maps the mbufs to tx descriptors, allowing the
389 * TX engine to transmit the packets.
390 *
391 * Return 0 on success, positive on failure
392 ************************************************************************/
393 static int
394 ixgbe_xmit(struct tx_ring *txr, struct mbuf *m_head)
395 {
396 struct adapter *adapter = txr->adapter;
397 struct ixgbe_tx_buf *txbuf;
398 union ixgbe_adv_tx_desc *txd = NULL;
399 struct ifnet *ifp = adapter->ifp;
400 int i, j, error;
401 int first;
402 u32 olinfo_status = 0, cmd_type_len;
403 bool remap = TRUE;
404 bus_dmamap_t map;
405
406 /* Basic descriptor defines */
407 cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
408 IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
409
410 if (vlan_has_tag(m_head))
411 cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
412
413 /*
414 * Important to capture the first descriptor
415 * used because it will contain the index of
416 * the one we tell the hardware to report back
417 */
418 first = txr->next_avail_desc;
419 txbuf = &txr->tx_buffers[first];
420 map = txbuf->map;
421
422 /*
423 * Map the packet for DMA.
424 */
425 retry:
426 error = bus_dmamap_load_mbuf(txr->txtag->dt_dmat, map, m_head,
427 BUS_DMA_NOWAIT);
428
429 if (__predict_false(error)) {
430 struct mbuf *m;
431
432 switch (error) {
433 case EAGAIN:
434 txr->q_eagain_tx_dma_setup++;
435 return EAGAIN;
436 case ENOMEM:
437 txr->q_enomem_tx_dma_setup++;
438 return EAGAIN;
439 case EFBIG:
440 /* Try it again? - one try */
441 if (remap == TRUE) {
442 remap = FALSE;
443 /*
444 * XXX: m_defrag will choke on
445 * non-MCLBYTES-sized clusters
446 */
447 txr->q_efbig_tx_dma_setup++;
448 m = m_defrag(m_head, M_NOWAIT);
449 if (m == NULL) {
450 txr->q_mbuf_defrag_failed++;
451 return ENOBUFS;
452 }
453 m_head = m;
454 goto retry;
455 } else {
456 txr->q_efbig2_tx_dma_setup++;
457 return error;
458 }
459 case EINVAL:
460 txr->q_einval_tx_dma_setup++;
461 return error;
462 default:
463 txr->q_other_tx_dma_setup++;
464 return error;
465 }
466 }
467
468 /* Make certain there are enough descriptors */
469 if (txr->tx_avail < (map->dm_nsegs + 2)) {
470 txr->txr_no_space = true;
471 txr->no_desc_avail.ev_count++;
472 ixgbe_dmamap_unload(txr->txtag, txbuf->map);
473 return EAGAIN;
474 }
475
476 /*
477 * Set up the appropriate offload context
478 * this will consume the first descriptor
479 */
480 error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
481 if (__predict_false(error)) {
482 return (error);
483 }
484
485 /* Do the flow director magic */
486 if ((adapter->feat_en & IXGBE_FEATURE_FDIR) &&
487 (txr->atr_sample) && (!adapter->fdir_reinit)) {
488 ++txr->atr_count;
489 if (txr->atr_count >= atr_sample_rate) {
490 ixgbe_atr(txr, m_head);
491 txr->atr_count = 0;
492 }
493 }
494
495 olinfo_status |= IXGBE_ADVTXD_CC;
496 i = txr->next_avail_desc;
497 for (j = 0; j < map->dm_nsegs; j++) {
498 bus_size_t seglen;
499 bus_addr_t segaddr;
500
501 txbuf = &txr->tx_buffers[i];
502 txd = &txr->tx_base[i];
503 seglen = map->dm_segs[j].ds_len;
504 segaddr = htole64(map->dm_segs[j].ds_addr);
505
506 txd->read.buffer_addr = segaddr;
507 txd->read.cmd_type_len = htole32(cmd_type_len | seglen);
508 txd->read.olinfo_status = htole32(olinfo_status);
509
510 if (++i == txr->num_desc)
511 i = 0;
512 }
513
514 txd->read.cmd_type_len |= htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
515 txr->tx_avail -= map->dm_nsegs;
516 txr->next_avail_desc = i;
517
518 txbuf->m_head = m_head;
519 /*
520 * Here we swap the map so the last descriptor,
521 * which gets the completion interrupt has the
522 * real map, and the first descriptor gets the
523 * unused map from this descriptor.
524 */
525 txr->tx_buffers[first].map = txbuf->map;
526 txbuf->map = map;
527 bus_dmamap_sync(txr->txtag->dt_dmat, map, 0, m_head->m_pkthdr.len,
528 BUS_DMASYNC_PREWRITE);
529
530 /* Set the EOP descriptor that will be marked done */
531 txbuf = &txr->tx_buffers[first];
532 txbuf->eop = txd;
533
534 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
535 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
536 /*
537 * Advance the Transmit Descriptor Tail (Tdt), this tells the
538 * hardware that this frame is available to transmit.
539 */
540 ++txr->total_packets.ev_count;
541 IXGBE_WRITE_REG(&adapter->hw, txr->tail, i);
542
543 /*
544 * XXXX NOMPSAFE: ifp->if_data should be percpu.
545 */
546 ifp->if_obytes += m_head->m_pkthdr.len;
547 if (m_head->m_flags & M_MCAST)
548 ifp->if_omcasts++;
549
550 /* Mark queue as having work */
551 if (txr->busy == 0)
552 txr->busy = 1;
553
554 return (0);
555 } /* ixgbe_xmit */
556
557 /************************************************************************
558 * ixgbe_drain
559 ************************************************************************/
560 static void
561 ixgbe_drain(struct ifnet *ifp, struct tx_ring *txr)
562 {
563 struct mbuf *m;
564
565 IXGBE_TX_LOCK_ASSERT(txr);
566
567 if (txr->me == 0) {
568 while (!IFQ_IS_EMPTY(&ifp->if_snd)) {
569 IFQ_DEQUEUE(&ifp->if_snd, m);
570 m_freem(m);
571 IF_DROP(&ifp->if_snd);
572 }
573 }
574
575 while ((m = pcq_get(txr->txr_interq)) != NULL) {
576 m_freem(m);
577 txr->pcq_drops.ev_count++;
578 }
579 }
580
581 /************************************************************************
582 * ixgbe_allocate_transmit_buffers
583 *
584 * Allocate memory for tx_buffer structures. The tx_buffer stores all
585 * the information needed to transmit a packet on the wire. This is
586 * called only once at attach, setup is done every reset.
587 ************************************************************************/
588 static int
589 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
590 {
591 struct adapter *adapter = txr->adapter;
592 device_t dev = adapter->dev;
593 struct ixgbe_tx_buf *txbuf;
594 int error, i;
595
596 /*
597 * Setup DMA descriptor areas.
598 */
599 error = ixgbe_dma_tag_create(
600 /* parent */ adapter->osdep.dmat,
601 /* alignment */ 1,
602 /* bounds */ 0,
603 /* maxsize */ IXGBE_TSO_SIZE,
604 /* nsegments */ adapter->num_segs,
605 /* maxsegsize */ PAGE_SIZE,
606 /* flags */ 0,
607 &txr->txtag);
608 if (error != 0) {
609 aprint_error_dev(dev,"Unable to allocate TX DMA tag\n");
610 goto fail;
611 }
612
613 txr->tx_buffers =
614 (struct ixgbe_tx_buf *) malloc(sizeof(struct ixgbe_tx_buf) *
615 adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
616 if (txr->tx_buffers == NULL) {
617 aprint_error_dev(dev, "Unable to allocate tx_buffer memory\n");
618 error = ENOMEM;
619 goto fail;
620 }
621
622 /* Create the descriptor buffer dma maps */
623 txbuf = txr->tx_buffers;
624 for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
625 error = ixgbe_dmamap_create(txr->txtag, 0, &txbuf->map);
626 if (error != 0) {
627 aprint_error_dev(dev,
628 "Unable to create TX DMA map (%d)\n", error);
629 goto fail;
630 }
631 }
632
633 return 0;
634 fail:
635 /* We free all, it handles case where we are in the middle */
636 #if 0 /* XXX was FreeBSD */
637 ixgbe_free_transmit_structures(adapter);
638 #else
639 ixgbe_free_transmit_buffers(txr);
640 #endif
641 return (error);
642 } /* ixgbe_allocate_transmit_buffers */
643
644 /************************************************************************
645 * ixgbe_setup_transmit_ring - Initialize a transmit ring.
646 ************************************************************************/
647 static void
648 ixgbe_setup_transmit_ring(struct tx_ring *txr)
649 {
650 struct adapter *adapter = txr->adapter;
651 struct ixgbe_tx_buf *txbuf;
652 #ifdef DEV_NETMAP
653 struct netmap_adapter *na = NA(adapter->ifp);
654 struct netmap_slot *slot;
655 #endif /* DEV_NETMAP */
656
657 /* Clear the old ring contents */
658 IXGBE_TX_LOCK(txr);
659
660 #ifdef DEV_NETMAP
661 if (adapter->feat_en & IXGBE_FEATURE_NETMAP) {
662 /*
663 * (under lock): if in netmap mode, do some consistency
664 * checks and set slot to entry 0 of the netmap ring.
665 */
666 slot = netmap_reset(na, NR_TX, txr->me, 0);
667 }
668 #endif /* DEV_NETMAP */
669
670 bzero((void *)txr->tx_base,
671 (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
672 /* Reset indices */
673 txr->next_avail_desc = 0;
674 txr->next_to_clean = 0;
675
676 /* Free any existing tx buffers. */
677 txbuf = txr->tx_buffers;
678 for (int i = 0; i < txr->num_desc; i++, txbuf++) {
679 if (txbuf->m_head != NULL) {
680 bus_dmamap_sync(txr->txtag->dt_dmat, txbuf->map,
681 0, txbuf->m_head->m_pkthdr.len,
682 BUS_DMASYNC_POSTWRITE);
683 ixgbe_dmamap_unload(txr->txtag, txbuf->map);
684 m_freem(txbuf->m_head);
685 txbuf->m_head = NULL;
686 }
687
688 #ifdef DEV_NETMAP
689 /*
690 * In netmap mode, set the map for the packet buffer.
691 * NOTE: Some drivers (not this one) also need to set
692 * the physical buffer address in the NIC ring.
693 * Slots in the netmap ring (indexed by "si") are
694 * kring->nkr_hwofs positions "ahead" wrt the
695 * corresponding slot in the NIC ring. In some drivers
696 * (not here) nkr_hwofs can be negative. Function
697 * netmap_idx_n2k() handles wraparounds properly.
698 */
699 if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) && slot) {
700 int si = netmap_idx_n2k(na->tx_rings[txr->me], i);
701 netmap_load_map(na, txr->txtag,
702 txbuf->map, NMB(na, slot + si));
703 }
704 #endif /* DEV_NETMAP */
705
706 /* Clear the EOP descriptor pointer */
707 txbuf->eop = NULL;
708 }
709
710 /* Set the rate at which we sample packets */
711 if (adapter->feat_en & IXGBE_FEATURE_FDIR)
712 txr->atr_sample = atr_sample_rate;
713
714 /* Set number of descriptors available */
715 txr->tx_avail = adapter->num_tx_desc;
716
717 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
718 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
719 IXGBE_TX_UNLOCK(txr);
720 } /* ixgbe_setup_transmit_ring */
721
722 /************************************************************************
723 * ixgbe_setup_transmit_structures - Initialize all transmit rings.
724 ************************************************************************/
725 int
726 ixgbe_setup_transmit_structures(struct adapter *adapter)
727 {
728 struct tx_ring *txr = adapter->tx_rings;
729
730 for (int i = 0; i < adapter->num_queues; i++, txr++)
731 ixgbe_setup_transmit_ring(txr);
732
733 return (0);
734 } /* ixgbe_setup_transmit_structures */
735
736 /************************************************************************
737 * ixgbe_free_transmit_structures - Free all transmit rings.
738 ************************************************************************/
739 void
740 ixgbe_free_transmit_structures(struct adapter *adapter)
741 {
742 struct tx_ring *txr = adapter->tx_rings;
743
744 for (int i = 0; i < adapter->num_queues; i++, txr++) {
745 ixgbe_free_transmit_buffers(txr);
746 ixgbe_dma_free(adapter, &txr->txdma);
747 IXGBE_TX_LOCK_DESTROY(txr);
748 }
749 free(adapter->tx_rings, M_DEVBUF);
750 } /* ixgbe_free_transmit_structures */
751
752 /************************************************************************
753 * ixgbe_free_transmit_buffers
754 *
755 * Free transmit ring related data structures.
756 ************************************************************************/
757 static void
758 ixgbe_free_transmit_buffers(struct tx_ring *txr)
759 {
760 struct adapter *adapter = txr->adapter;
761 struct ixgbe_tx_buf *tx_buffer;
762 int i;
763
764 INIT_DEBUGOUT("ixgbe_free_transmit_buffers: begin");
765
766 if (txr->tx_buffers == NULL)
767 return;
768
769 tx_buffer = txr->tx_buffers;
770 for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
771 if (tx_buffer->m_head != NULL) {
772 bus_dmamap_sync(txr->txtag->dt_dmat, tx_buffer->map,
773 0, tx_buffer->m_head->m_pkthdr.len,
774 BUS_DMASYNC_POSTWRITE);
775 ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
776 m_freem(tx_buffer->m_head);
777 tx_buffer->m_head = NULL;
778 if (tx_buffer->map != NULL) {
779 ixgbe_dmamap_destroy(txr->txtag,
780 tx_buffer->map);
781 tx_buffer->map = NULL;
782 }
783 } else if (tx_buffer->map != NULL) {
784 ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
785 ixgbe_dmamap_destroy(txr->txtag, tx_buffer->map);
786 tx_buffer->map = NULL;
787 }
788 }
789 if (txr->txr_interq != NULL) {
790 struct mbuf *m;
791
792 while ((m = pcq_get(txr->txr_interq)) != NULL)
793 m_freem(m);
794 pcq_destroy(txr->txr_interq);
795 }
796 if (txr->tx_buffers != NULL) {
797 free(txr->tx_buffers, M_DEVBUF);
798 txr->tx_buffers = NULL;
799 }
800 if (txr->txtag != NULL) {
801 ixgbe_dma_tag_destroy(txr->txtag);
802 txr->txtag = NULL;
803 }
804 } /* ixgbe_free_transmit_buffers */
805
806 /************************************************************************
807 * ixgbe_tx_ctx_setup
808 *
809 * Advanced Context Descriptor setup for VLAN, CSUM or TSO
810 ************************************************************************/
811 static int
812 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
813 u32 *cmd_type_len, u32 *olinfo_status)
814 {
815 struct adapter *adapter = txr->adapter;
816 struct ixgbe_adv_tx_context_desc *TXD;
817 struct ether_vlan_header *eh;
818 #ifdef INET
819 struct ip *ip;
820 #endif
821 #ifdef INET6
822 struct ip6_hdr *ip6;
823 #endif
824 int ehdrlen, ip_hlen = 0;
825 int offload = TRUE;
826 int ctxd = txr->next_avail_desc;
827 u32 vlan_macip_lens = 0;
828 u32 type_tucmd_mlhl = 0;
829 u16 vtag = 0;
830 u16 etype;
831 u8 ipproto = 0;
832 char *l3d;
833
834
835 /* First check if TSO is to be used */
836 if (mp->m_pkthdr.csum_flags & (M_CSUM_TSOv4 | M_CSUM_TSOv6)) {
837 int rv = ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status);
838
839 if (rv != 0)
840 ++adapter->tso_err.ev_count;
841 return rv;
842 }
843
844 if ((mp->m_pkthdr.csum_flags & M_CSUM_OFFLOAD) == 0)
845 offload = FALSE;
846
847 /* Indicate the whole packet as payload when not doing TSO */
848 *olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
849
850 /* Now ready a context descriptor */
851 TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
852
853 /*
854 * In advanced descriptors the vlan tag must
855 * be placed into the context descriptor. Hence
856 * we need to make one even if not doing offloads.
857 */
858 if (vlan_has_tag(mp)) {
859 vtag = htole16(vlan_get_tag(mp));
860 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
861 } else if (!(txr->adapter->feat_en & IXGBE_FEATURE_NEEDS_CTXD) &&
862 (offload == FALSE))
863 return (0);
864
865 /*
866 * Determine where frame payload starts.
867 * Jump over vlan headers if already present,
868 * helpful for QinQ too.
869 */
870 KASSERT(mp->m_len >= offsetof(struct ether_vlan_header, evl_tag));
871 eh = mtod(mp, struct ether_vlan_header *);
872 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
873 KASSERT(mp->m_len >= sizeof(struct ether_vlan_header));
874 etype = ntohs(eh->evl_proto);
875 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
876 } else {
877 etype = ntohs(eh->evl_encap_proto);
878 ehdrlen = ETHER_HDR_LEN;
879 }
880
881 /* Set the ether header length */
882 vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
883
884 if (offload == FALSE)
885 goto no_offloads;
886
887 /*
888 * If the first mbuf only includes the ethernet header,
889 * jump to the next one
890 * XXX: This assumes the stack splits mbufs containing headers
891 * on header boundaries
892 * XXX: And assumes the entire IP header is contained in one mbuf
893 */
894 if (mp->m_len == ehdrlen && mp->m_next)
895 l3d = mtod(mp->m_next, char *);
896 else
897 l3d = mtod(mp, char *) + ehdrlen;
898
899 switch (etype) {
900 #ifdef INET
901 case ETHERTYPE_IP:
902 ip = (struct ip *)(l3d);
903 ip_hlen = ip->ip_hl << 2;
904 ipproto = ip->ip_p;
905 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
906 KASSERT((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) == 0 ||
907 ip->ip_sum == 0);
908 break;
909 #endif
910 #ifdef INET6
911 case ETHERTYPE_IPV6:
912 ip6 = (struct ip6_hdr *)(l3d);
913 ip_hlen = sizeof(struct ip6_hdr);
914 ipproto = ip6->ip6_nxt;
915 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
916 break;
917 #endif
918 default:
919 offload = false;
920 break;
921 }
922
923 if ((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) != 0)
924 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
925
926 vlan_macip_lens |= ip_hlen;
927
928 /* No support for offloads for non-L4 next headers */
929 switch (ipproto) {
930 case IPPROTO_TCP:
931 if (mp->m_pkthdr.csum_flags &
932 (M_CSUM_TCPv4 | M_CSUM_TCPv6))
933 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
934 else
935 offload = false;
936 break;
937 case IPPROTO_UDP:
938 if (mp->m_pkthdr.csum_flags &
939 (M_CSUM_UDPv4 | M_CSUM_UDPv6))
940 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
941 else
942 offload = false;
943 break;
944 default:
945 offload = false;
946 break;
947 }
948
949 if (offload) /* Insert L4 checksum into data descriptors */
950 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
951
952 no_offloads:
953 type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
954
955 /* Now copy bits into descriptor */
956 TXD->vlan_macip_lens = htole32(vlan_macip_lens);
957 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
958 TXD->seqnum_seed = htole32(0);
959 TXD->mss_l4len_idx = htole32(0);
960
961 /* We've consumed the first desc, adjust counters */
962 if (++ctxd == txr->num_desc)
963 ctxd = 0;
964 txr->next_avail_desc = ctxd;
965 --txr->tx_avail;
966
967 return (0);
968 } /* ixgbe_tx_ctx_setup */
969
970 /************************************************************************
971 * ixgbe_tso_setup
972 *
973 * Setup work for hardware segmentation offload (TSO) on
974 * adapters using advanced tx descriptors
975 ************************************************************************/
976 static int
977 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *cmd_type_len,
978 u32 *olinfo_status)
979 {
980 struct ixgbe_adv_tx_context_desc *TXD;
981 struct ether_vlan_header *eh;
982 #ifdef INET6
983 struct ip6_hdr *ip6;
984 #endif
985 #ifdef INET
986 struct ip *ip;
987 #endif
988 struct tcphdr *th;
989 int ctxd, ehdrlen, ip_hlen, tcp_hlen;
990 u32 vlan_macip_lens = 0;
991 u32 type_tucmd_mlhl = 0;
992 u32 mss_l4len_idx = 0, paylen;
993 u16 vtag = 0, eh_type;
994
995 /*
996 * Determine where frame payload starts.
997 * Jump over vlan headers if already present
998 */
999 eh = mtod(mp, struct ether_vlan_header *);
1000 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
1001 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
1002 eh_type = eh->evl_proto;
1003 } else {
1004 ehdrlen = ETHER_HDR_LEN;
1005 eh_type = eh->evl_encap_proto;
1006 }
1007
1008 switch (ntohs(eh_type)) {
1009 #ifdef INET
1010 case ETHERTYPE_IP:
1011 ip = (struct ip *)(mp->m_data + ehdrlen);
1012 if (ip->ip_p != IPPROTO_TCP)
1013 return (ENXIO);
1014 ip->ip_sum = 0;
1015 ip_hlen = ip->ip_hl << 2;
1016 th = (struct tcphdr *)((char *)ip + ip_hlen);
1017 th->th_sum = in_cksum_phdr(ip->ip_src.s_addr,
1018 ip->ip_dst.s_addr, htons(IPPROTO_TCP));
1019 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
1020 /* Tell transmit desc to also do IPv4 checksum. */
1021 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
1022 break;
1023 #endif
1024 #ifdef INET6
1025 case ETHERTYPE_IPV6:
1026 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
1027 /* XXX-BZ For now we do not pretend to support ext. hdrs. */
1028 if (ip6->ip6_nxt != IPPROTO_TCP)
1029 return (ENXIO);
1030 ip_hlen = sizeof(struct ip6_hdr);
1031 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
1032 th = (struct tcphdr *)((char *)ip6 + ip_hlen);
1033 th->th_sum = in6_cksum_phdr(&ip6->ip6_src,
1034 &ip6->ip6_dst, 0, htonl(IPPROTO_TCP));
1035 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
1036 break;
1037 #endif
1038 default:
1039 panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
1040 __func__, ntohs(eh_type));
1041 break;
1042 }
1043
1044 ctxd = txr->next_avail_desc;
1045 TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
1046
1047 tcp_hlen = th->th_off << 2;
1048
1049 /* This is used in the transmit desc in encap */
1050 paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
1051
1052 /* VLAN MACLEN IPLEN */
1053 if (vlan_has_tag(mp)) {
1054 vtag = htole16(vlan_get_tag(mp));
1055 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
1056 }
1057
1058 vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
1059 vlan_macip_lens |= ip_hlen;
1060 TXD->vlan_macip_lens = htole32(vlan_macip_lens);
1061
1062 /* ADV DTYPE TUCMD */
1063 type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
1064 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
1065 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
1066
1067 /* MSS L4LEN IDX */
1068 mss_l4len_idx |= (mp->m_pkthdr.segsz << IXGBE_ADVTXD_MSS_SHIFT);
1069 mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
1070 TXD->mss_l4len_idx = htole32(mss_l4len_idx);
1071
1072 TXD->seqnum_seed = htole32(0);
1073
1074 if (++ctxd == txr->num_desc)
1075 ctxd = 0;
1076
1077 txr->tx_avail--;
1078 txr->next_avail_desc = ctxd;
1079 *cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
1080 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
1081 *olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
1082 ++txr->tso_tx.ev_count;
1083
1084 return (0);
1085 } /* ixgbe_tso_setup */
1086
1087
1088 /************************************************************************
1089 * ixgbe_txeof
1090 *
1091 * Examine each tx_buffer in the used queue. If the hardware is done
1092 * processing the packet then free associated resources. The
1093 * tx_buffer is put back on the free queue.
1094 ************************************************************************/
1095 bool
1096 ixgbe_txeof(struct tx_ring *txr)
1097 {
1098 struct adapter *adapter = txr->adapter;
1099 struct ifnet *ifp = adapter->ifp;
1100 struct ixgbe_tx_buf *buf;
1101 union ixgbe_adv_tx_desc *txd;
1102 u32 work, processed = 0;
1103 u32 limit = adapter->tx_process_limit;
1104
1105 KASSERT(mutex_owned(&txr->tx_mtx));
1106
1107 #ifdef DEV_NETMAP
1108 if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) &&
1109 (adapter->ifp->if_capenable & IFCAP_NETMAP)) {
1110 struct netmap_adapter *na = NA(adapter->ifp);
1111 struct netmap_kring *kring = na->tx_rings[txr->me];
1112 txd = txr->tx_base;
1113 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1114 BUS_DMASYNC_POSTREAD);
1115 /*
1116 * In netmap mode, all the work is done in the context
1117 * of the client thread. Interrupt handlers only wake up
1118 * clients, which may be sleeping on individual rings
1119 * or on a global resource for all rings.
1120 * To implement tx interrupt mitigation, we wake up the client
1121 * thread roughly every half ring, even if the NIC interrupts
1122 * more frequently. This is implemented as follows:
1123 * - ixgbe_txsync() sets kring->nr_kflags with the index of
1124 * the slot that should wake up the thread (nkr_num_slots
1125 * means the user thread should not be woken up);
1126 * - the driver ignores tx interrupts unless netmap_mitigate=0
1127 * or the slot has the DD bit set.
1128 */
1129 if (kring->nr_kflags < kring->nkr_num_slots &&
1130 txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD) {
1131 netmap_tx_irq(ifp, txr->me);
1132 }
1133 return false;
1134 }
1135 #endif /* DEV_NETMAP */
1136
1137 if (txr->tx_avail == txr->num_desc) {
1138 txr->busy = 0;
1139 return false;
1140 }
1141
1142 /* Get work starting point */
1143 work = txr->next_to_clean;
1144 buf = &txr->tx_buffers[work];
1145 txd = &txr->tx_base[work];
1146 work -= txr->num_desc; /* The distance to ring end */
1147 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1148 BUS_DMASYNC_POSTREAD);
1149
1150 do {
1151 union ixgbe_adv_tx_desc *eop = buf->eop;
1152 if (eop == NULL) /* No work */
1153 break;
1154
1155 if ((eop->wb.status & IXGBE_TXD_STAT_DD) == 0)
1156 break; /* I/O not complete */
1157
1158 if (buf->m_head) {
1159 txr->bytes += buf->m_head->m_pkthdr.len;
1160 bus_dmamap_sync(txr->txtag->dt_dmat, buf->map,
1161 0, buf->m_head->m_pkthdr.len,
1162 BUS_DMASYNC_POSTWRITE);
1163 ixgbe_dmamap_unload(txr->txtag, buf->map);
1164 m_freem(buf->m_head);
1165 buf->m_head = NULL;
1166 }
1167 buf->eop = NULL;
1168 txr->txr_no_space = false;
1169 ++txr->tx_avail;
1170
1171 /* We clean the range if multi segment */
1172 while (txd != eop) {
1173 ++txd;
1174 ++buf;
1175 ++work;
1176 /* wrap the ring? */
1177 if (__predict_false(!work)) {
1178 work -= txr->num_desc;
1179 buf = txr->tx_buffers;
1180 txd = txr->tx_base;
1181 }
1182 if (buf->m_head) {
1183 txr->bytes +=
1184 buf->m_head->m_pkthdr.len;
1185 bus_dmamap_sync(txr->txtag->dt_dmat,
1186 buf->map,
1187 0, buf->m_head->m_pkthdr.len,
1188 BUS_DMASYNC_POSTWRITE);
1189 ixgbe_dmamap_unload(txr->txtag,
1190 buf->map);
1191 m_freem(buf->m_head);
1192 buf->m_head = NULL;
1193 }
1194 ++txr->tx_avail;
1195 buf->eop = NULL;
1196
1197 }
1198 ++txr->packets;
1199 ++processed;
1200 ++ifp->if_opackets;
1201
1202 /* Try the next packet */
1203 ++txd;
1204 ++buf;
1205 ++work;
1206 /* reset with a wrap */
1207 if (__predict_false(!work)) {
1208 work -= txr->num_desc;
1209 buf = txr->tx_buffers;
1210 txd = txr->tx_base;
1211 }
1212 prefetch(txd);
1213 } while (__predict_true(--limit));
1214
1215 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1216 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1217
1218 work += txr->num_desc;
1219 txr->next_to_clean = work;
1220
1221 /*
1222 * Queue Hang detection, we know there's
1223 * work outstanding or the first return
1224 * would have been taken, so increment busy
1225 * if nothing managed to get cleaned, then
1226 * in local_timer it will be checked and
1227 * marked as HUNG if it exceeds a MAX attempt.
1228 */
1229 if ((processed == 0) && (txr->busy != IXGBE_QUEUE_HUNG))
1230 ++txr->busy;
1231 /*
1232 * If anything gets cleaned we reset state to 1,
1233 * note this will turn off HUNG if its set.
1234 */
1235 if (processed)
1236 txr->busy = 1;
1237
1238 if (txr->tx_avail == txr->num_desc)
1239 txr->busy = 0;
1240
1241 return ((limit > 0) ? false : true);
1242 } /* ixgbe_txeof */
1243
1244 /************************************************************************
1245 * ixgbe_rsc_count
1246 *
1247 * Used to detect a descriptor that has been merged by Hardware RSC.
1248 ************************************************************************/
1249 static inline u32
1250 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1251 {
1252 return (le32toh(rx->wb.lower.lo_dword.data) &
1253 IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1254 } /* ixgbe_rsc_count */
1255
1256 /************************************************************************
1257 * ixgbe_setup_hw_rsc
1258 *
1259 * Initialize Hardware RSC (LRO) feature on 82599
1260 * for an RX ring, this is toggled by the LRO capability
1261 * even though it is transparent to the stack.
1262 *
1263 * NOTE: Since this HW feature only works with IPv4 and
1264 * testing has shown soft LRO to be as effective,
1265 * this feature will be disabled by default.
1266 ************************************************************************/
1267 static void
1268 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
1269 {
1270 struct adapter *adapter = rxr->adapter;
1271 struct ixgbe_hw *hw = &adapter->hw;
1272 u32 rscctrl, rdrxctl;
1273
1274 /* If turning LRO/RSC off we need to disable it */
1275 if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) {
1276 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1277 rscctrl &= ~IXGBE_RSCCTL_RSCEN;
1278 return;
1279 }
1280
1281 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
1282 rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
1283 #ifdef DEV_NETMAP
1284 /* Always strip CRC unless Netmap disabled it */
1285 if (!(adapter->feat_en & IXGBE_FEATURE_NETMAP) ||
1286 !(adapter->ifp->if_capenable & IFCAP_NETMAP) ||
1287 ix_crcstrip)
1288 #endif /* DEV_NETMAP */
1289 rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
1290 rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
1291 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
1292
1293 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1294 rscctrl |= IXGBE_RSCCTL_RSCEN;
1295 /*
1296 * Limit the total number of descriptors that
1297 * can be combined, so it does not exceed 64K
1298 */
1299 if (rxr->mbuf_sz == MCLBYTES)
1300 rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
1301 else if (rxr->mbuf_sz == MJUMPAGESIZE)
1302 rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
1303 else if (rxr->mbuf_sz == MJUM9BYTES)
1304 rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
1305 else /* Using 16K cluster */
1306 rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
1307
1308 IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
1309
1310 /* Enable TCP header recognition */
1311 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
1312 (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) | IXGBE_PSRTYPE_TCPHDR));
1313
1314 /* Disable RSC for ACK packets */
1315 IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
1316 (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
1317
1318 rxr->hw_rsc = TRUE;
1319 } /* ixgbe_setup_hw_rsc */
1320
1321 /************************************************************************
1322 * ixgbe_refresh_mbufs
1323 *
1324 * Refresh mbuf buffers for RX descriptor rings
1325 * - now keeps its own state so discards due to resource
1326 * exhaustion are unnecessary, if an mbuf cannot be obtained
1327 * it just returns, keeping its placeholder, thus it can simply
1328 * be recalled to try again.
1329 ************************************************************************/
1330 static void
1331 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
1332 {
1333 struct adapter *adapter = rxr->adapter;
1334 struct ixgbe_rx_buf *rxbuf;
1335 struct mbuf *mp;
1336 int i, j, error;
1337 bool refreshed = false;
1338
1339 i = j = rxr->next_to_refresh;
1340 /* Control the loop with one beyond */
1341 if (++j == rxr->num_desc)
1342 j = 0;
1343
1344 while (j != limit) {
1345 rxbuf = &rxr->rx_buffers[i];
1346 if (rxbuf->buf == NULL) {
1347 mp = ixgbe_getjcl(&rxr->jcl_head, M_NOWAIT,
1348 MT_DATA, M_PKTHDR, rxr->mbuf_sz);
1349 if (mp == NULL) {
1350 rxr->no_jmbuf.ev_count++;
1351 goto update;
1352 }
1353 if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
1354 m_adj(mp, ETHER_ALIGN);
1355 } else
1356 mp = rxbuf->buf;
1357
1358 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1359
1360 /* If we're dealing with an mbuf that was copied rather
1361 * than replaced, there's no need to go through busdma.
1362 */
1363 if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
1364 /* Get the memory mapping */
1365 ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
1366 error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat,
1367 rxbuf->pmap, mp, BUS_DMA_NOWAIT);
1368 if (error != 0) {
1369 device_printf(adapter->dev, "Refresh mbufs: "
1370 "payload dmamap load failure - %d\n",
1371 error);
1372 m_free(mp);
1373 rxbuf->buf = NULL;
1374 goto update;
1375 }
1376 rxbuf->buf = mp;
1377 bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
1378 0, mp->m_pkthdr.len, BUS_DMASYNC_PREREAD);
1379 rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
1380 htole64(rxbuf->pmap->dm_segs[0].ds_addr);
1381 } else {
1382 rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
1383 rxbuf->flags &= ~IXGBE_RX_COPY;
1384 }
1385
1386 refreshed = true;
1387 /* Next is precalculated */
1388 i = j;
1389 rxr->next_to_refresh = i;
1390 if (++j == rxr->num_desc)
1391 j = 0;
1392 }
1393
1394 update:
1395 if (refreshed) /* Update hardware tail index */
1396 IXGBE_WRITE_REG(&adapter->hw, rxr->tail, rxr->next_to_refresh);
1397
1398 return;
1399 } /* ixgbe_refresh_mbufs */
1400
1401 /************************************************************************
1402 * ixgbe_allocate_receive_buffers
1403 *
1404 * Allocate memory for rx_buffer structures. Since we use one
1405 * rx_buffer per received packet, the maximum number of rx_buffer's
1406 * that we'll need is equal to the number of receive descriptors
1407 * that we've allocated.
1408 ************************************************************************/
1409 static int
1410 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
1411 {
1412 struct adapter *adapter = rxr->adapter;
1413 device_t dev = adapter->dev;
1414 struct ixgbe_rx_buf *rxbuf;
1415 int bsize, error;
1416
1417 bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
1418 rxr->rx_buffers = (struct ixgbe_rx_buf *)malloc(bsize, M_DEVBUF,
1419 M_NOWAIT | M_ZERO);
1420 if (rxr->rx_buffers == NULL) {
1421 aprint_error_dev(dev, "Unable to allocate rx_buffer memory\n");
1422 error = ENOMEM;
1423 goto fail;
1424 }
1425
1426 error = ixgbe_dma_tag_create(
1427 /* parent */ adapter->osdep.dmat,
1428 /* alignment */ 1,
1429 /* bounds */ 0,
1430 /* maxsize */ MJUM16BYTES,
1431 /* nsegments */ 1,
1432 /* maxsegsize */ MJUM16BYTES,
1433 /* flags */ 0,
1434 &rxr->ptag);
1435 if (error != 0) {
1436 aprint_error_dev(dev, "Unable to create RX DMA tag\n");
1437 goto fail;
1438 }
1439
1440 for (int i = 0; i < rxr->num_desc; i++, rxbuf++) {
1441 rxbuf = &rxr->rx_buffers[i];
1442 error = ixgbe_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
1443 if (error) {
1444 aprint_error_dev(dev, "Unable to create RX dma map\n");
1445 goto fail;
1446 }
1447 }
1448
1449 return (0);
1450
1451 fail:
1452 /* Frees all, but can handle partial completion */
1453 ixgbe_free_receive_structures(adapter);
1454
1455 return (error);
1456 } /* ixgbe_allocate_receive_buffers */
1457
1458 /************************************************************************
1459 * ixgbe_free_receive_ring
1460 ************************************************************************/
1461 static void
1462 ixgbe_free_receive_ring(struct rx_ring *rxr)
1463 {
1464 for (int i = 0; i < rxr->num_desc; i++) {
1465 ixgbe_rx_discard(rxr, i);
1466 }
1467 } /* ixgbe_free_receive_ring */
1468
1469 /************************************************************************
1470 * ixgbe_setup_receive_ring
1471 *
1472 * Initialize a receive ring and its buffers.
1473 ************************************************************************/
1474 static int
1475 ixgbe_setup_receive_ring(struct rx_ring *rxr)
1476 {
1477 struct adapter *adapter;
1478 struct ixgbe_rx_buf *rxbuf;
1479 #ifdef LRO
1480 struct ifnet *ifp;
1481 struct lro_ctrl *lro = &rxr->lro;
1482 #endif /* LRO */
1483 #ifdef DEV_NETMAP
1484 struct netmap_adapter *na = NA(rxr->adapter->ifp);
1485 struct netmap_slot *slot;
1486 #endif /* DEV_NETMAP */
1487 int rsize, error = 0;
1488
1489 adapter = rxr->adapter;
1490 #ifdef LRO
1491 ifp = adapter->ifp;
1492 #endif /* LRO */
1493
1494 /* Clear the ring contents */
1495 IXGBE_RX_LOCK(rxr);
1496
1497 #ifdef DEV_NETMAP
1498 if (adapter->feat_en & IXGBE_FEATURE_NETMAP)
1499 slot = netmap_reset(na, NR_RX, rxr->me, 0);
1500 #endif /* DEV_NETMAP */
1501
1502 rsize = roundup2(adapter->num_rx_desc *
1503 sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
1504 bzero((void *)rxr->rx_base, rsize);
1505 /* Cache the size */
1506 rxr->mbuf_sz = adapter->rx_mbuf_sz;
1507
1508 /* Free current RX buffer structs and their mbufs */
1509 ixgbe_free_receive_ring(rxr);
1510
1511 IXGBE_RX_UNLOCK(rxr);
1512 /*
1513 * Now reinitialize our supply of jumbo mbufs. The number
1514 * or size of jumbo mbufs may have changed.
1515 * Assume all of rxr->ptag are the same.
1516 */
1517 ixgbe_jcl_reinit(adapter, rxr->ptag->dt_dmat, rxr,
1518 (2 * adapter->num_rx_desc), adapter->rx_mbuf_sz);
1519
1520 IXGBE_RX_LOCK(rxr);
1521
1522 /* Now replenish the mbufs */
1523 for (int j = 0; j != rxr->num_desc; ++j) {
1524 struct mbuf *mp;
1525
1526 rxbuf = &rxr->rx_buffers[j];
1527
1528 #ifdef DEV_NETMAP
1529 /*
1530 * In netmap mode, fill the map and set the buffer
1531 * address in the NIC ring, considering the offset
1532 * between the netmap and NIC rings (see comment in
1533 * ixgbe_setup_transmit_ring() ). No need to allocate
1534 * an mbuf, so end the block with a continue;
1535 */
1536 if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) && slot) {
1537 int sj = netmap_idx_n2k(na->rx_rings[rxr->me], j);
1538 uint64_t paddr;
1539 void *addr;
1540
1541 addr = PNMB(na, slot + sj, &paddr);
1542 netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
1543 /* Update descriptor and the cached value */
1544 rxr->rx_base[j].read.pkt_addr = htole64(paddr);
1545 rxbuf->addr = htole64(paddr);
1546 continue;
1547 }
1548 #endif /* DEV_NETMAP */
1549
1550 rxbuf->flags = 0;
1551 rxbuf->buf = ixgbe_getjcl(&rxr->jcl_head, M_NOWAIT,
1552 MT_DATA, M_PKTHDR, adapter->rx_mbuf_sz);
1553 if (rxbuf->buf == NULL) {
1554 error = ENOBUFS;
1555 goto fail;
1556 }
1557 mp = rxbuf->buf;
1558 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1559 /* Get the memory mapping */
1560 error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat, rxbuf->pmap,
1561 mp, BUS_DMA_NOWAIT);
1562 if (error != 0)
1563 goto fail;
1564 bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
1565 0, adapter->rx_mbuf_sz, BUS_DMASYNC_PREREAD);
1566 /* Update the descriptor and the cached value */
1567 rxr->rx_base[j].read.pkt_addr =
1568 htole64(rxbuf->pmap->dm_segs[0].ds_addr);
1569 rxbuf->addr = htole64(rxbuf->pmap->dm_segs[0].ds_addr);
1570 }
1571
1572
1573 /* Setup our descriptor indices */
1574 rxr->next_to_check = 0;
1575 rxr->next_to_refresh = 0;
1576 rxr->lro_enabled = FALSE;
1577 rxr->rx_copies.ev_count = 0;
1578 #if 0 /* NetBSD */
1579 rxr->rx_bytes.ev_count = 0;
1580 #if 1 /* Fix inconsistency */
1581 rxr->rx_packets.ev_count = 0;
1582 #endif
1583 #endif
1584 rxr->vtag_strip = FALSE;
1585
1586 ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1587 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1588
1589 /*
1590 * Now set up the LRO interface
1591 */
1592 if (ixgbe_rsc_enable)
1593 ixgbe_setup_hw_rsc(rxr);
1594 #ifdef LRO
1595 else if (ifp->if_capenable & IFCAP_LRO) {
1596 device_t dev = adapter->dev;
1597 int err = tcp_lro_init(lro);
1598 if (err) {
1599 device_printf(dev, "LRO Initialization failed!\n");
1600 goto fail;
1601 }
1602 INIT_DEBUGOUT("RX Soft LRO Initialized\n");
1603 rxr->lro_enabled = TRUE;
1604 lro->ifp = adapter->ifp;
1605 }
1606 #endif /* LRO */
1607
1608 IXGBE_RX_UNLOCK(rxr);
1609
1610 return (0);
1611
1612 fail:
1613 ixgbe_free_receive_ring(rxr);
1614 IXGBE_RX_UNLOCK(rxr);
1615
1616 return (error);
1617 } /* ixgbe_setup_receive_ring */
1618
1619 /************************************************************************
1620 * ixgbe_setup_receive_structures - Initialize all receive rings.
1621 ************************************************************************/
1622 int
1623 ixgbe_setup_receive_structures(struct adapter *adapter)
1624 {
1625 struct rx_ring *rxr = adapter->rx_rings;
1626 int j;
1627
1628 for (j = 0; j < adapter->num_queues; j++, rxr++)
1629 if (ixgbe_setup_receive_ring(rxr))
1630 goto fail;
1631
1632 return (0);
1633 fail:
1634 /*
1635 * Free RX buffers allocated so far, we will only handle
1636 * the rings that completed, the failing case will have
1637 * cleaned up for itself. 'j' failed, so its the terminus.
1638 */
1639 for (int i = 0; i < j; ++i) {
1640 rxr = &adapter->rx_rings[i];
1641 IXGBE_RX_LOCK(rxr);
1642 ixgbe_free_receive_ring(rxr);
1643 IXGBE_RX_UNLOCK(rxr);
1644 }
1645
1646 return (ENOBUFS);
1647 } /* ixgbe_setup_receive_structures */
1648
1649
1650 /************************************************************************
1651 * ixgbe_free_receive_structures - Free all receive rings.
1652 ************************************************************************/
1653 void
1654 ixgbe_free_receive_structures(struct adapter *adapter)
1655 {
1656 struct rx_ring *rxr = adapter->rx_rings;
1657
1658 INIT_DEBUGOUT("ixgbe_free_receive_structures: begin");
1659
1660 for (int i = 0; i < adapter->num_queues; i++, rxr++) {
1661 ixgbe_free_receive_buffers(rxr);
1662 #ifdef LRO
1663 /* Free LRO memory */
1664 tcp_lro_free(&rxr->lro);
1665 #endif /* LRO */
1666 /* Free the ring memory as well */
1667 ixgbe_dma_free(adapter, &rxr->rxdma);
1668 IXGBE_RX_LOCK_DESTROY(rxr);
1669 }
1670
1671 free(adapter->rx_rings, M_DEVBUF);
1672 } /* ixgbe_free_receive_structures */
1673
1674
1675 /************************************************************************
1676 * ixgbe_free_receive_buffers - Free receive ring data structures
1677 ************************************************************************/
1678 static void
1679 ixgbe_free_receive_buffers(struct rx_ring *rxr)
1680 {
1681 struct adapter *adapter = rxr->adapter;
1682 struct ixgbe_rx_buf *rxbuf;
1683
1684 INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin");
1685
1686 /* Cleanup any existing buffers */
1687 if (rxr->rx_buffers != NULL) {
1688 for (int i = 0; i < adapter->num_rx_desc; i++) {
1689 rxbuf = &rxr->rx_buffers[i];
1690 ixgbe_rx_discard(rxr, i);
1691 if (rxbuf->pmap != NULL) {
1692 ixgbe_dmamap_destroy(rxr->ptag, rxbuf->pmap);
1693 rxbuf->pmap = NULL;
1694 }
1695 }
1696 if (rxr->rx_buffers != NULL) {
1697 free(rxr->rx_buffers, M_DEVBUF);
1698 rxr->rx_buffers = NULL;
1699 }
1700 }
1701
1702 if (rxr->ptag != NULL) {
1703 ixgbe_dma_tag_destroy(rxr->ptag);
1704 rxr->ptag = NULL;
1705 }
1706
1707 return;
1708 } /* ixgbe_free_receive_buffers */
1709
1710 /************************************************************************
1711 * ixgbe_rx_input
1712 ************************************************************************/
1713 static __inline void
1714 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m,
1715 u32 ptype)
1716 {
1717 struct adapter *adapter = ifp->if_softc;
1718
1719 #ifdef LRO
1720 struct ethercom *ec = &adapter->osdep.ec;
1721
1722 /*
1723 * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
1724 * should be computed by hardware. Also it should not have VLAN tag in
1725 * ethernet header. In case of IPv6 we do not yet support ext. hdrs.
1726 */
1727 if (rxr->lro_enabled &&
1728 (ec->ec_capenable & ETHERCAP_VLAN_HWTAGGING) != 0 &&
1729 (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
1730 ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1731 (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
1732 (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1733 (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
1734 (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
1735 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
1736 /*
1737 * Send to the stack if:
1738 ** - LRO not enabled, or
1739 ** - no LRO resources, or
1740 ** - lro enqueue fails
1741 */
1742 if (rxr->lro.lro_cnt != 0)
1743 if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
1744 return;
1745 }
1746 #endif /* LRO */
1747
1748 if_percpuq_enqueue(adapter->ipq, m);
1749 } /* ixgbe_rx_input */
1750
1751 /************************************************************************
1752 * ixgbe_rx_discard
1753 ************************************************************************/
1754 static __inline void
1755 ixgbe_rx_discard(struct rx_ring *rxr, int i)
1756 {
1757 struct ixgbe_rx_buf *rbuf;
1758
1759 rbuf = &rxr->rx_buffers[i];
1760
1761 /*
1762 * With advanced descriptors the writeback
1763 * clobbers the buffer addrs, so its easier
1764 * to just free the existing mbufs and take
1765 * the normal refresh path to get new buffers
1766 * and mapping.
1767 */
1768
1769 if (rbuf->fmp != NULL) {/* Partial chain ? */
1770 bus_dmamap_sync(rxr->ptag->dt_dmat, rbuf->pmap, 0,
1771 rbuf->buf->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
1772 m_freem(rbuf->fmp);
1773 rbuf->fmp = NULL;
1774 rbuf->buf = NULL; /* rbuf->buf is part of fmp's chain */
1775 } else if (rbuf->buf) {
1776 bus_dmamap_sync(rxr->ptag->dt_dmat, rbuf->pmap, 0,
1777 rbuf->buf->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
1778 m_free(rbuf->buf);
1779 rbuf->buf = NULL;
1780 }
1781 ixgbe_dmamap_unload(rxr->ptag, rbuf->pmap);
1782
1783 rbuf->flags = 0;
1784
1785 return;
1786 } /* ixgbe_rx_discard */
1787
1788
1789 /************************************************************************
1790 * ixgbe_rxeof
1791 *
1792 * Executes in interrupt context. It replenishes the
1793 * mbufs in the descriptor and sends data which has
1794 * been dma'ed into host memory to upper layer.
1795 *
1796 * Return TRUE for more work, FALSE for all clean.
1797 ************************************************************************/
1798 bool
1799 ixgbe_rxeof(struct ix_queue *que)
1800 {
1801 struct adapter *adapter = que->adapter;
1802 struct rx_ring *rxr = que->rxr;
1803 struct ifnet *ifp = adapter->ifp;
1804 #ifdef LRO
1805 struct lro_ctrl *lro = &rxr->lro;
1806 #endif /* LRO */
1807 union ixgbe_adv_rx_desc *cur;
1808 struct ixgbe_rx_buf *rbuf, *nbuf;
1809 int i, nextp, processed = 0;
1810 u32 staterr = 0;
1811 u32 count = adapter->rx_process_limit;
1812 #ifdef RSS
1813 u16 pkt_info;
1814 #endif
1815
1816 IXGBE_RX_LOCK(rxr);
1817
1818 #ifdef DEV_NETMAP
1819 if (adapter->feat_en & IXGBE_FEATURE_NETMAP) {
1820 /* Same as the txeof routine: wakeup clients on intr. */
1821 if (netmap_rx_irq(ifp, rxr->me, &processed)) {
1822 IXGBE_RX_UNLOCK(rxr);
1823 return (FALSE);
1824 }
1825 }
1826 #endif /* DEV_NETMAP */
1827
1828 for (i = rxr->next_to_check; count != 0;) {
1829 struct mbuf *sendmp, *mp;
1830 u32 rsc, ptype;
1831 u16 len;
1832 u16 vtag = 0;
1833 bool eop;
1834
1835 /* Sync the ring. */
1836 ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1837 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1838
1839 cur = &rxr->rx_base[i];
1840 staterr = le32toh(cur->wb.upper.status_error);
1841 #ifdef RSS
1842 pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
1843 #endif
1844
1845 if ((staterr & IXGBE_RXD_STAT_DD) == 0)
1846 break;
1847
1848 count--;
1849 sendmp = NULL;
1850 nbuf = NULL;
1851 rsc = 0;
1852 cur->wb.upper.status_error = 0;
1853 rbuf = &rxr->rx_buffers[i];
1854 mp = rbuf->buf;
1855
1856 len = le16toh(cur->wb.upper.length);
1857 ptype = le32toh(cur->wb.lower.lo_dword.data) &
1858 IXGBE_RXDADV_PKTTYPE_MASK;
1859 eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
1860
1861 /* Make sure bad packets are discarded */
1862 if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) {
1863 #if __FreeBSD_version >= 1100036
1864 if (adapter->feat_en & IXGBE_FEATURE_VF)
1865 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
1866 #endif
1867 rxr->rx_discarded.ev_count++;
1868 ixgbe_rx_discard(rxr, i);
1869 goto next_desc;
1870 }
1871
1872 bus_dmamap_sync(rxr->ptag->dt_dmat, rbuf->pmap, 0,
1873 rbuf->buf->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
1874
1875 /*
1876 * On 82599 which supports a hardware
1877 * LRO (called HW RSC), packets need
1878 * not be fragmented across sequential
1879 * descriptors, rather the next descriptor
1880 * is indicated in bits of the descriptor.
1881 * This also means that we might proceses
1882 * more than one packet at a time, something
1883 * that has never been true before, it
1884 * required eliminating global chain pointers
1885 * in favor of what we are doing here. -jfv
1886 */
1887 if (!eop) {
1888 /*
1889 * Figure out the next descriptor
1890 * of this frame.
1891 */
1892 if (rxr->hw_rsc == TRUE) {
1893 rsc = ixgbe_rsc_count(cur);
1894 rxr->rsc_num += (rsc - 1);
1895 }
1896 if (rsc) { /* Get hardware index */
1897 nextp = ((staterr & IXGBE_RXDADV_NEXTP_MASK) >>
1898 IXGBE_RXDADV_NEXTP_SHIFT);
1899 } else { /* Just sequential */
1900 nextp = i + 1;
1901 if (nextp == adapter->num_rx_desc)
1902 nextp = 0;
1903 }
1904 nbuf = &rxr->rx_buffers[nextp];
1905 prefetch(nbuf);
1906 }
1907 /*
1908 * Rather than using the fmp/lmp global pointers
1909 * we now keep the head of a packet chain in the
1910 * buffer struct and pass this along from one
1911 * descriptor to the next, until we get EOP.
1912 */
1913 mp->m_len = len;
1914 /*
1915 * See if there is a stored head
1916 * that determines what we are
1917 */
1918 sendmp = rbuf->fmp;
1919 if (sendmp != NULL) { /* secondary frag */
1920 rbuf->buf = rbuf->fmp = NULL;
1921 mp->m_flags &= ~M_PKTHDR;
1922 sendmp->m_pkthdr.len += mp->m_len;
1923 } else {
1924 /*
1925 * Optimize. This might be a small packet,
1926 * maybe just a TCP ACK. Do a fast copy that
1927 * is cache aligned into a new mbuf, and
1928 * leave the old mbuf+cluster for re-use.
1929 */
1930 if (eop && len <= IXGBE_RX_COPY_LEN) {
1931 sendmp = m_gethdr(M_NOWAIT, MT_DATA);
1932 if (sendmp != NULL) {
1933 sendmp->m_data += IXGBE_RX_COPY_ALIGN;
1934 ixgbe_bcopy(mp->m_data, sendmp->m_data,
1935 len);
1936 sendmp->m_len = len;
1937 rxr->rx_copies.ev_count++;
1938 rbuf->flags |= IXGBE_RX_COPY;
1939 }
1940 }
1941 if (sendmp == NULL) {
1942 rbuf->buf = rbuf->fmp = NULL;
1943 sendmp = mp;
1944 }
1945
1946 /* first desc of a non-ps chain */
1947 sendmp->m_flags |= M_PKTHDR;
1948 sendmp->m_pkthdr.len = mp->m_len;
1949 }
1950 ++processed;
1951
1952 /* Pass the head pointer on */
1953 if (eop == 0) {
1954 nbuf->fmp = sendmp;
1955 sendmp = NULL;
1956 mp->m_next = nbuf->buf;
1957 } else { /* Sending this frame */
1958 m_set_rcvif(sendmp, ifp);
1959 ++rxr->packets;
1960 rxr->rx_packets.ev_count++;
1961 /* capture data for AIM */
1962 rxr->bytes += sendmp->m_pkthdr.len;
1963 rxr->rx_bytes.ev_count += sendmp->m_pkthdr.len;
1964 /* Process vlan info */
1965 if ((rxr->vtag_strip) && (staterr & IXGBE_RXD_STAT_VP))
1966 vtag = le16toh(cur->wb.upper.vlan);
1967 if (vtag) {
1968 vlan_set_tag(sendmp, vtag);
1969 }
1970 if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) {
1971 ixgbe_rx_checksum(staterr, sendmp, ptype,
1972 &adapter->stats.pf);
1973 }
1974
1975 #if 0 /* FreeBSD */
1976 /*
1977 * In case of multiqueue, we have RXCSUM.PCSD bit set
1978 * and never cleared. This means we have RSS hash
1979 * available to be used.
1980 */
1981 if (adapter->num_queues > 1) {
1982 sendmp->m_pkthdr.flowid =
1983 le32toh(cur->wb.lower.hi_dword.rss);
1984 switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) {
1985 case IXGBE_RXDADV_RSSTYPE_IPV4:
1986 M_HASHTYPE_SET(sendmp,
1987 M_HASHTYPE_RSS_IPV4);
1988 break;
1989 case IXGBE_RXDADV_RSSTYPE_IPV4_TCP:
1990 M_HASHTYPE_SET(sendmp,
1991 M_HASHTYPE_RSS_TCP_IPV4);
1992 break;
1993 case IXGBE_RXDADV_RSSTYPE_IPV6:
1994 M_HASHTYPE_SET(sendmp,
1995 M_HASHTYPE_RSS_IPV6);
1996 break;
1997 case IXGBE_RXDADV_RSSTYPE_IPV6_TCP:
1998 M_HASHTYPE_SET(sendmp,
1999 M_HASHTYPE_RSS_TCP_IPV6);
2000 break;
2001 case IXGBE_RXDADV_RSSTYPE_IPV6_EX:
2002 M_HASHTYPE_SET(sendmp,
2003 M_HASHTYPE_RSS_IPV6_EX);
2004 break;
2005 case IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX:
2006 M_HASHTYPE_SET(sendmp,
2007 M_HASHTYPE_RSS_TCP_IPV6_EX);
2008 break;
2009 #if __FreeBSD_version > 1100000
2010 case IXGBE_RXDADV_RSSTYPE_IPV4_UDP:
2011 M_HASHTYPE_SET(sendmp,
2012 M_HASHTYPE_RSS_UDP_IPV4);
2013 break;
2014 case IXGBE_RXDADV_RSSTYPE_IPV6_UDP:
2015 M_HASHTYPE_SET(sendmp,
2016 M_HASHTYPE_RSS_UDP_IPV6);
2017 break;
2018 case IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX:
2019 M_HASHTYPE_SET(sendmp,
2020 M_HASHTYPE_RSS_UDP_IPV6_EX);
2021 break;
2022 #endif
2023 default:
2024 M_HASHTYPE_SET(sendmp,
2025 M_HASHTYPE_OPAQUE_HASH);
2026 }
2027 } else {
2028 sendmp->m_pkthdr.flowid = que->msix;
2029 M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
2030 }
2031 #endif
2032 }
2033 next_desc:
2034 ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
2035 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2036
2037 /* Advance our pointers to the next descriptor. */
2038 if (++i == rxr->num_desc)
2039 i = 0;
2040
2041 /* Now send to the stack or do LRO */
2042 if (sendmp != NULL) {
2043 rxr->next_to_check = i;
2044 IXGBE_RX_UNLOCK(rxr);
2045 ixgbe_rx_input(rxr, ifp, sendmp, ptype);
2046 IXGBE_RX_LOCK(rxr);
2047 i = rxr->next_to_check;
2048 }
2049
2050 /* Every 8 descriptors we go to refresh mbufs */
2051 if (processed == 8) {
2052 ixgbe_refresh_mbufs(rxr, i);
2053 processed = 0;
2054 }
2055 }
2056
2057 /* Refresh any remaining buf structs */
2058 if (ixgbe_rx_unrefreshed(rxr))
2059 ixgbe_refresh_mbufs(rxr, i);
2060
2061 rxr->next_to_check = i;
2062
2063 IXGBE_RX_UNLOCK(rxr);
2064
2065 #ifdef LRO
2066 /*
2067 * Flush any outstanding LRO work
2068 */
2069 tcp_lro_flush_all(lro);
2070 #endif /* LRO */
2071
2072 /*
2073 * Still have cleaning to do?
2074 */
2075 if ((staterr & IXGBE_RXD_STAT_DD) != 0)
2076 return (TRUE);
2077
2078 return (FALSE);
2079 } /* ixgbe_rxeof */
2080
2081
2082 /************************************************************************
2083 * ixgbe_rx_checksum
2084 *
2085 * Verify that the hardware indicated that the checksum is valid.
2086 * Inform the stack about the status of checksum so that stack
2087 * doesn't spend time verifying the checksum.
2088 ************************************************************************/
2089 static void
2090 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype,
2091 struct ixgbe_hw_stats *stats)
2092 {
2093 u16 status = (u16)staterr;
2094 u8 errors = (u8)(staterr >> 24);
2095 #if 0
2096 bool sctp = false;
2097
2098 if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
2099 (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
2100 sctp = true;
2101 #endif
2102
2103 /* IPv4 checksum */
2104 if (status & IXGBE_RXD_STAT_IPCS) {
2105 stats->ipcs.ev_count++;
2106 if (!(errors & IXGBE_RXD_ERR_IPE)) {
2107 /* IP Checksum Good */
2108 mp->m_pkthdr.csum_flags = M_CSUM_IPv4;
2109 } else {
2110 stats->ipcs_bad.ev_count++;
2111 mp->m_pkthdr.csum_flags = M_CSUM_IPv4|M_CSUM_IPv4_BAD;
2112 }
2113 }
2114 /* TCP/UDP/SCTP checksum */
2115 if (status & IXGBE_RXD_STAT_L4CS) {
2116 stats->l4cs.ev_count++;
2117 int type = M_CSUM_TCPv4|M_CSUM_TCPv6|M_CSUM_UDPv4|M_CSUM_UDPv6;
2118 if (!(errors & IXGBE_RXD_ERR_TCPE)) {
2119 mp->m_pkthdr.csum_flags |= type;
2120 } else {
2121 stats->l4cs_bad.ev_count++;
2122 mp->m_pkthdr.csum_flags |= type | M_CSUM_TCP_UDP_BAD;
2123 }
2124 }
2125 } /* ixgbe_rx_checksum */
2126
2127 /************************************************************************
2128 * ixgbe_dma_malloc
2129 ************************************************************************/
2130 int
2131 ixgbe_dma_malloc(struct adapter *adapter, const bus_size_t size,
2132 struct ixgbe_dma_alloc *dma, const int mapflags)
2133 {
2134 device_t dev = adapter->dev;
2135 int r, rsegs;
2136
2137 r = ixgbe_dma_tag_create(
2138 /* parent */ adapter->osdep.dmat,
2139 /* alignment */ DBA_ALIGN,
2140 /* bounds */ 0,
2141 /* maxsize */ size,
2142 /* nsegments */ 1,
2143 /* maxsegsize */ size,
2144 /* flags */ BUS_DMA_ALLOCNOW,
2145 &dma->dma_tag);
2146 if (r != 0) {
2147 aprint_error_dev(dev,
2148 "%s: ixgbe_dma_tag_create failed; error %d\n", __func__,
2149 r);
2150 goto fail_0;
2151 }
2152
2153 r = bus_dmamem_alloc(dma->dma_tag->dt_dmat, size,
2154 dma->dma_tag->dt_alignment, dma->dma_tag->dt_boundary,
2155 &dma->dma_seg, 1, &rsegs, BUS_DMA_NOWAIT);
2156 if (r != 0) {
2157 aprint_error_dev(dev,
2158 "%s: bus_dmamem_alloc failed; error %d\n", __func__, r);
2159 goto fail_1;
2160 }
2161
2162 r = bus_dmamem_map(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs,
2163 size, &dma->dma_vaddr, BUS_DMA_NOWAIT);
2164 if (r != 0) {
2165 aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
2166 __func__, r);
2167 goto fail_2;
2168 }
2169
2170 r = ixgbe_dmamap_create(dma->dma_tag, 0, &dma->dma_map);
2171 if (r != 0) {
2172 aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
2173 __func__, r);
2174 goto fail_3;
2175 }
2176
2177 r = bus_dmamap_load(dma->dma_tag->dt_dmat, dma->dma_map,
2178 dma->dma_vaddr, size, NULL, mapflags | BUS_DMA_NOWAIT);
2179 if (r != 0) {
2180 aprint_error_dev(dev, "%s: bus_dmamap_load failed; error %d\n",
2181 __func__, r);
2182 goto fail_4;
2183 }
2184 dma->dma_paddr = dma->dma_map->dm_segs[0].ds_addr;
2185 dma->dma_size = size;
2186 return 0;
2187 fail_4:
2188 ixgbe_dmamap_destroy(dma->dma_tag, dma->dma_map);
2189 fail_3:
2190 bus_dmamem_unmap(dma->dma_tag->dt_dmat, dma->dma_vaddr, size);
2191 fail_2:
2192 bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs);
2193 fail_1:
2194 ixgbe_dma_tag_destroy(dma->dma_tag);
2195 fail_0:
2196
2197 return (r);
2198 } /* ixgbe_dma_malloc */
2199
2200 /************************************************************************
2201 * ixgbe_dma_free
2202 ************************************************************************/
2203 void
2204 ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma)
2205 {
2206 bus_dmamap_sync(dma->dma_tag->dt_dmat, dma->dma_map, 0, dma->dma_size,
2207 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2208 ixgbe_dmamap_unload(dma->dma_tag, dma->dma_map);
2209 bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, 1);
2210 ixgbe_dma_tag_destroy(dma->dma_tag);
2211 } /* ixgbe_dma_free */
2212
2213
2214 /************************************************************************
2215 * ixgbe_allocate_queues
2216 *
2217 * Allocate memory for the transmit and receive rings, and then
2218 * the descriptors associated with each, called only once at attach.
2219 ************************************************************************/
2220 int
2221 ixgbe_allocate_queues(struct adapter *adapter)
2222 {
2223 device_t dev = adapter->dev;
2224 struct ix_queue *que;
2225 struct tx_ring *txr;
2226 struct rx_ring *rxr;
2227 int rsize, tsize, error = IXGBE_SUCCESS;
2228 int txconf = 0, rxconf = 0;
2229
2230 /* First, allocate the top level queue structs */
2231 adapter->queues = (struct ix_queue *)malloc(sizeof(struct ix_queue) *
2232 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO);
2233 if (adapter->queues == NULL) {
2234 aprint_error_dev(dev, "Unable to allocate queue memory\n");
2235 error = ENOMEM;
2236 goto fail;
2237 }
2238
2239 /* Second, allocate the TX ring struct memory */
2240 adapter->tx_rings = (struct tx_ring *)malloc(sizeof(struct tx_ring) *
2241 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO);
2242 if (adapter->tx_rings == NULL) {
2243 aprint_error_dev(dev, "Unable to allocate TX ring memory\n");
2244 error = ENOMEM;
2245 goto tx_fail;
2246 }
2247
2248 /* Third, allocate the RX ring */
2249 adapter->rx_rings = (struct rx_ring *)malloc(sizeof(struct rx_ring) *
2250 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO);
2251 if (adapter->rx_rings == NULL) {
2252 aprint_error_dev(dev, "Unable to allocate RX ring memory\n");
2253 error = ENOMEM;
2254 goto rx_fail;
2255 }
2256
2257 /* For the ring itself */
2258 tsize = roundup2(adapter->num_tx_desc * sizeof(union ixgbe_adv_tx_desc),
2259 DBA_ALIGN);
2260
2261 /*
2262 * Now set up the TX queues, txconf is needed to handle the
2263 * possibility that things fail midcourse and we need to
2264 * undo memory gracefully
2265 */
2266 for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2267 /* Set up some basics */
2268 txr = &adapter->tx_rings[i];
2269 txr->adapter = adapter;
2270 txr->txr_interq = NULL;
2271 /* In case SR-IOV is enabled, align the index properly */
2272 #ifdef PCI_IOV
2273 txr->me = ixgbe_vf_que_index(adapter->iov_mode, adapter->pool,
2274 i);
2275 #else
2276 txr->me = i;
2277 #endif
2278 txr->num_desc = adapter->num_tx_desc;
2279
2280 /* Initialize the TX side lock */
2281 mutex_init(&txr->tx_mtx, MUTEX_DEFAULT, IPL_NET);
2282
2283 if (ixgbe_dma_malloc(adapter, tsize, &txr->txdma,
2284 BUS_DMA_NOWAIT)) {
2285 aprint_error_dev(dev,
2286 "Unable to allocate TX Descriptor memory\n");
2287 error = ENOMEM;
2288 goto err_tx_desc;
2289 }
2290 txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
2291 bzero((void *)txr->tx_base, tsize);
2292
2293 /* Now allocate transmit buffers for the ring */
2294 if (ixgbe_allocate_transmit_buffers(txr)) {
2295 aprint_error_dev(dev,
2296 "Critical Failure setting up transmit buffers\n");
2297 error = ENOMEM;
2298 goto err_tx_desc;
2299 }
2300 if (!(adapter->feat_en & IXGBE_FEATURE_LEGACY_TX)) {
2301 /* Allocate a buf ring */
2302 txr->txr_interq = pcq_create(IXGBE_BR_SIZE, KM_SLEEP);
2303 if (txr->txr_interq == NULL) {
2304 aprint_error_dev(dev,
2305 "Critical Failure setting up buf ring\n");
2306 error = ENOMEM;
2307 goto err_tx_desc;
2308 }
2309 }
2310 }
2311
2312 /*
2313 * Next the RX queues...
2314 */
2315 rsize = roundup2(adapter->num_rx_desc * sizeof(union ixgbe_adv_rx_desc),
2316 DBA_ALIGN);
2317 for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2318 rxr = &adapter->rx_rings[i];
2319 /* Set up some basics */
2320 rxr->adapter = adapter;
2321 #ifdef PCI_IOV
2322 /* In case SR-IOV is enabled, align the index properly */
2323 rxr->me = ixgbe_vf_que_index(adapter->iov_mode, adapter->pool,
2324 i);
2325 #else
2326 rxr->me = i;
2327 #endif
2328 rxr->num_desc = adapter->num_rx_desc;
2329
2330 /* Initialize the RX side lock */
2331 mutex_init(&rxr->rx_mtx, MUTEX_DEFAULT, IPL_NET);
2332
2333 if (ixgbe_dma_malloc(adapter, rsize, &rxr->rxdma,
2334 BUS_DMA_NOWAIT)) {
2335 aprint_error_dev(dev,
2336 "Unable to allocate RxDescriptor memory\n");
2337 error = ENOMEM;
2338 goto err_rx_desc;
2339 }
2340 rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2341 bzero((void *)rxr->rx_base, rsize);
2342
2343 /* Allocate receive buffers for the ring */
2344 if (ixgbe_allocate_receive_buffers(rxr)) {
2345 aprint_error_dev(dev,
2346 "Critical Failure setting up receive buffers\n");
2347 error = ENOMEM;
2348 goto err_rx_desc;
2349 }
2350 }
2351
2352 /*
2353 * Finally set up the queue holding structs
2354 */
2355 for (int i = 0; i < adapter->num_queues; i++) {
2356 que = &adapter->queues[i];
2357 que->adapter = adapter;
2358 que->me = i;
2359 que->txr = &adapter->tx_rings[i];
2360 que->rxr = &adapter->rx_rings[i];
2361
2362 mutex_init(&que->dc_mtx, MUTEX_DEFAULT, IPL_NET);
2363 que->disabled_count = 0;
2364 }
2365
2366 return (0);
2367
2368 err_rx_desc:
2369 for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2370 ixgbe_dma_free(adapter, &rxr->rxdma);
2371 err_tx_desc:
2372 for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2373 ixgbe_dma_free(adapter, &txr->txdma);
2374 free(adapter->rx_rings, M_DEVBUF);
2375 rx_fail:
2376 free(adapter->tx_rings, M_DEVBUF);
2377 tx_fail:
2378 free(adapter->queues, M_DEVBUF);
2379 fail:
2380 return (error);
2381 } /* ixgbe_allocate_queues */
2382