ix_txrx.c revision 1.49 1 /* $NetBSD: ix_txrx.c,v 1.49 2018/07/31 09:19:34 msaitoh Exp $ */
2
3 /******************************************************************************
4
5 Copyright (c) 2001-2017, Intel Corporation
6 All rights reserved.
7
8 Redistribution and use in source and binary forms, with or without
9 modification, are permitted provided that the following conditions are met:
10
11 1. Redistributions of source code must retain the above copyright notice,
12 this list of conditions and the following disclaimer.
13
14 2. Redistributions in binary form must reproduce the above copyright
15 notice, this list of conditions and the following disclaimer in the
16 documentation and/or other materials provided with the distribution.
17
18 3. Neither the name of the Intel Corporation nor the names of its
19 contributors may be used to endorse or promote products derived from
20 this software without specific prior written permission.
21
22 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
23 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
26 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
29 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
30 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32 POSSIBILITY OF SUCH DAMAGE.
33
34 ******************************************************************************/
35 /*$FreeBSD: head/sys/dev/ixgbe/ix_txrx.c 327031 2017-12-20 18:15:06Z erj $*/
36
37 /*
38 * Copyright (c) 2011 The NetBSD Foundation, Inc.
39 * All rights reserved.
40 *
41 * This code is derived from software contributed to The NetBSD Foundation
42 * by Coyote Point Systems, Inc.
43 *
44 * Redistribution and use in source and binary forms, with or without
45 * modification, are permitted provided that the following conditions
46 * are met:
47 * 1. Redistributions of source code must retain the above copyright
48 * notice, this list of conditions and the following disclaimer.
49 * 2. Redistributions in binary form must reproduce the above copyright
50 * notice, this list of conditions and the following disclaimer in the
51 * documentation and/or other materials provided with the distribution.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
54 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
55 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
56 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
57 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
58 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
59 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
60 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
61 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
62 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
63 * POSSIBILITY OF SUCH DAMAGE.
64 */
65
66 #include "opt_inet.h"
67 #include "opt_inet6.h"
68
69 #include "ixgbe.h"
70
71 /*
72 * HW RSC control:
73 * this feature only works with
74 * IPv4, and only on 82599 and later.
75 * Also this will cause IP forwarding to
76 * fail and that can't be controlled by
77 * the stack as LRO can. For all these
78 * reasons I've deemed it best to leave
79 * this off and not bother with a tuneable
80 * interface, this would need to be compiled
81 * to enable.
82 */
83 static bool ixgbe_rsc_enable = FALSE;
84
85 /*
86 * For Flow Director: this is the
87 * number of TX packets we sample
88 * for the filter pool, this means
89 * every 20th packet will be probed.
90 *
91 * This feature can be disabled by
92 * setting this to 0.
93 */
94 static int atr_sample_rate = 20;
95
96 /************************************************************************
97 * Local Function prototypes
98 ************************************************************************/
99 static void ixgbe_setup_transmit_ring(struct tx_ring *);
100 static void ixgbe_free_transmit_buffers(struct tx_ring *);
101 static int ixgbe_setup_receive_ring(struct rx_ring *);
102 static void ixgbe_free_receive_buffers(struct rx_ring *);
103 static void ixgbe_rx_checksum(u32, struct mbuf *, u32,
104 struct ixgbe_hw_stats *);
105 static void ixgbe_refresh_mbufs(struct rx_ring *, int);
106 static void ixgbe_drain(struct ifnet *, struct tx_ring *);
107 static int ixgbe_xmit(struct tx_ring *, struct mbuf *);
108 static int ixgbe_tx_ctx_setup(struct tx_ring *,
109 struct mbuf *, u32 *, u32 *);
110 static int ixgbe_tso_setup(struct tx_ring *,
111 struct mbuf *, u32 *, u32 *);
112 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
113 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
114 struct mbuf *, u32);
115 static int ixgbe_dma_malloc(struct adapter *, bus_size_t,
116 struct ixgbe_dma_alloc *, int);
117 static void ixgbe_dma_free(struct adapter *, struct ixgbe_dma_alloc *);
118
119 static void ixgbe_setup_hw_rsc(struct rx_ring *);
120
121 /************************************************************************
122 * ixgbe_legacy_start_locked - Transmit entry point
123 *
124 * Called by the stack to initiate a transmit.
125 * The driver will remain in this routine as long as there are
126 * packets to transmit and transmit resources are available.
127 * In case resources are not available, the stack is notified
128 * and the packet is requeued.
129 ************************************************************************/
130 int
131 ixgbe_legacy_start_locked(struct ifnet *ifp, struct tx_ring *txr)
132 {
133 int rc;
134 struct mbuf *m_head;
135 struct adapter *adapter = txr->adapter;
136
137 IXGBE_TX_LOCK_ASSERT(txr);
138
139 if (!adapter->link_active) {
140 /*
141 * discard all packets buffered in IFQ to avoid
142 * sending old packets at next link up timing.
143 */
144 ixgbe_drain(ifp, txr);
145 return (ENETDOWN);
146 }
147 if ((ifp->if_flags & IFF_RUNNING) == 0)
148 return (ENETDOWN);
149 if (txr->txr_no_space)
150 return (ENETDOWN);
151
152 while (!IFQ_IS_EMPTY(&ifp->if_snd)) {
153 if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
154 break;
155
156 IFQ_POLL(&ifp->if_snd, m_head);
157 if (m_head == NULL)
158 break;
159
160 if ((rc = ixgbe_xmit(txr, m_head)) == EAGAIN) {
161 break;
162 }
163 IFQ_DEQUEUE(&ifp->if_snd, m_head);
164 if (rc != 0) {
165 m_freem(m_head);
166 continue;
167 }
168
169 /* Send a copy of the frame to the BPF listener */
170 bpf_mtap(ifp, m_head, BPF_D_OUT);
171 }
172
173 return IXGBE_SUCCESS;
174 } /* ixgbe_legacy_start_locked */
175
176 /************************************************************************
177 * ixgbe_legacy_start
178 *
179 * Called by the stack, this always uses the first tx ring,
180 * and should not be used with multiqueue tx enabled.
181 ************************************************************************/
182 void
183 ixgbe_legacy_start(struct ifnet *ifp)
184 {
185 struct adapter *adapter = ifp->if_softc;
186 struct tx_ring *txr = adapter->tx_rings;
187
188 if (ifp->if_flags & IFF_RUNNING) {
189 IXGBE_TX_LOCK(txr);
190 ixgbe_legacy_start_locked(ifp, txr);
191 IXGBE_TX_UNLOCK(txr);
192 }
193 } /* ixgbe_legacy_start */
194
195 /************************************************************************
196 * ixgbe_mq_start - Multiqueue Transmit Entry Point
197 *
198 * (if_transmit function)
199 ************************************************************************/
200 int
201 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
202 {
203 struct adapter *adapter = ifp->if_softc;
204 struct tx_ring *txr;
205 int i, err = 0;
206 #ifdef RSS
207 uint32_t bucket_id;
208 #endif
209
210 /*
211 * When doing RSS, map it to the same outbound queue
212 * as the incoming flow would be mapped to.
213 *
214 * If everything is setup correctly, it should be the
215 * same bucket that the current CPU we're on is.
216 */
217 #ifdef RSS
218 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
219 if ((adapter->feat_en & IXGBE_FEATURE_RSS) &&
220 (rss_hash2bucket(m->m_pkthdr.flowid, M_HASHTYPE_GET(m),
221 &bucket_id) == 0)) {
222 i = bucket_id % adapter->num_queues;
223 #ifdef IXGBE_DEBUG
224 if (bucket_id > adapter->num_queues)
225 if_printf(ifp,
226 "bucket_id (%d) > num_queues (%d)\n",
227 bucket_id, adapter->num_queues);
228 #endif
229 } else
230 i = m->m_pkthdr.flowid % adapter->num_queues;
231 } else
232 #endif /* 0 */
233 i = cpu_index(curcpu()) % adapter->num_queues;
234
235 /* Check for a hung queue and pick alternative */
236 if (((1 << i) & adapter->active_queues) == 0)
237 i = ffs64(adapter->active_queues);
238
239 txr = &adapter->tx_rings[i];
240
241 err = pcq_put(txr->txr_interq, m);
242 if (err == false) {
243 m_freem(m);
244 txr->pcq_drops.ev_count++;
245 return (err);
246 }
247 if (IXGBE_TX_TRYLOCK(txr)) {
248 ixgbe_mq_start_locked(ifp, txr);
249 IXGBE_TX_UNLOCK(txr);
250 } else {
251 if (adapter->txrx_use_workqueue) {
252 u_int *enqueued;
253
254 /*
255 * This function itself is not called in interrupt
256 * context, however it can be called in fast softint
257 * context right after receiving forwarding packets.
258 * So, it is required to protect workqueue from twice
259 * enqueuing when the machine uses both spontaneous
260 * packets and forwarding packets.
261 */
262 enqueued = percpu_getref(adapter->txr_wq_enqueued);
263 if (*enqueued == 0) {
264 *enqueued = 1;
265 percpu_putref(adapter->txr_wq_enqueued);
266 workqueue_enqueue(adapter->txr_wq,
267 &txr->wq_cookie, curcpu());
268 } else
269 percpu_putref(adapter->txr_wq_enqueued);
270 } else
271 softint_schedule(txr->txr_si);
272 }
273
274 return (0);
275 } /* ixgbe_mq_start */
276
277 /************************************************************************
278 * ixgbe_mq_start_locked
279 ************************************************************************/
280 int
281 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
282 {
283 struct mbuf *next;
284 int enqueued = 0, err = 0;
285
286 if (!txr->adapter->link_active) {
287 /*
288 * discard all packets buffered in txr_interq to avoid
289 * sending old packets at next link up timing.
290 */
291 ixgbe_drain(ifp, txr);
292 return (ENETDOWN);
293 }
294 if ((ifp->if_flags & IFF_RUNNING) == 0)
295 return (ENETDOWN);
296 if (txr->txr_no_space)
297 return (ENETDOWN);
298
299 /* Process the queue */
300 while ((next = pcq_get(txr->txr_interq)) != NULL) {
301 if ((err = ixgbe_xmit(txr, next)) != 0) {
302 m_freem(next);
303 /* All errors are counted in ixgbe_xmit() */
304 break;
305 }
306 enqueued++;
307 #if __FreeBSD_version >= 1100036
308 /*
309 * Since we're looking at the tx ring, we can check
310 * to see if we're a VF by examing our tail register
311 * address.
312 */
313 if ((txr->adapter->feat_en & IXGBE_FEATURE_VF) &&
314 (next->m_flags & M_MCAST))
315 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
316 #endif
317 /* Send a copy of the frame to the BPF listener */
318 bpf_mtap(ifp, next, BPF_D_OUT);
319 if ((ifp->if_flags & IFF_RUNNING) == 0)
320 break;
321 }
322
323 if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD(txr->adapter))
324 ixgbe_txeof(txr);
325
326 return (err);
327 } /* ixgbe_mq_start_locked */
328
329 /************************************************************************
330 * ixgbe_deferred_mq_start
331 *
332 * Called from a softint and workqueue (indirectly) to drain queued
333 * transmit packets.
334 ************************************************************************/
335 void
336 ixgbe_deferred_mq_start(void *arg)
337 {
338 struct tx_ring *txr = arg;
339 struct adapter *adapter = txr->adapter;
340 struct ifnet *ifp = adapter->ifp;
341
342 IXGBE_TX_LOCK(txr);
343 if (pcq_peek(txr->txr_interq) != NULL)
344 ixgbe_mq_start_locked(ifp, txr);
345 IXGBE_TX_UNLOCK(txr);
346 } /* ixgbe_deferred_mq_start */
347
348 /************************************************************************
349 * ixgbe_deferred_mq_start_work
350 *
351 * Called from a workqueue to drain queued transmit packets.
352 ************************************************************************/
353 void
354 ixgbe_deferred_mq_start_work(struct work *wk, void *arg)
355 {
356 struct tx_ring *txr = container_of(wk, struct tx_ring, wq_cookie);
357 struct adapter *adapter = txr->adapter;
358 u_int *enqueued = percpu_getref(adapter->txr_wq_enqueued);
359 *enqueued = 0;
360 percpu_putref(adapter->txr_wq_enqueued);
361
362 ixgbe_deferred_mq_start(txr);
363 } /* ixgbe_deferred_mq_start */
364
365 /************************************************************************
366 * ixgbe_drain_all
367 ************************************************************************/
368 void
369 ixgbe_drain_all(struct adapter *adapter)
370 {
371 struct ifnet *ifp = adapter->ifp;
372 struct ix_queue *que = adapter->queues;
373
374 for (int i = 0; i < adapter->num_queues; i++, que++) {
375 struct tx_ring *txr = que->txr;
376
377 IXGBE_TX_LOCK(txr);
378 ixgbe_drain(ifp, txr);
379 IXGBE_TX_UNLOCK(txr);
380 }
381 }
382
383 /************************************************************************
384 * ixgbe_xmit
385 *
386 * Maps the mbufs to tx descriptors, allowing the
387 * TX engine to transmit the packets.
388 *
389 * Return 0 on success, positive on failure
390 ************************************************************************/
391 static int
392 ixgbe_xmit(struct tx_ring *txr, struct mbuf *m_head)
393 {
394 struct adapter *adapter = txr->adapter;
395 struct ixgbe_tx_buf *txbuf;
396 union ixgbe_adv_tx_desc *txd = NULL;
397 struct ifnet *ifp = adapter->ifp;
398 int i, j, error;
399 int first;
400 u32 olinfo_status = 0, cmd_type_len;
401 bool remap = TRUE;
402 bus_dmamap_t map;
403
404 /* Basic descriptor defines */
405 cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
406 IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
407
408 if (vlan_has_tag(m_head))
409 cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
410
411 /*
412 * Important to capture the first descriptor
413 * used because it will contain the index of
414 * the one we tell the hardware to report back
415 */
416 first = txr->next_avail_desc;
417 txbuf = &txr->tx_buffers[first];
418 map = txbuf->map;
419
420 /*
421 * Map the packet for DMA.
422 */
423 retry:
424 error = bus_dmamap_load_mbuf(txr->txtag->dt_dmat, map, m_head,
425 BUS_DMA_NOWAIT);
426
427 if (__predict_false(error)) {
428 struct mbuf *m;
429
430 switch (error) {
431 case EAGAIN:
432 txr->q_eagain_tx_dma_setup++;
433 return EAGAIN;
434 case ENOMEM:
435 txr->q_enomem_tx_dma_setup++;
436 return EAGAIN;
437 case EFBIG:
438 /* Try it again? - one try */
439 if (remap == TRUE) {
440 remap = FALSE;
441 /*
442 * XXX: m_defrag will choke on
443 * non-MCLBYTES-sized clusters
444 */
445 txr->q_efbig_tx_dma_setup++;
446 m = m_defrag(m_head, M_NOWAIT);
447 if (m == NULL) {
448 txr->q_mbuf_defrag_failed++;
449 return ENOBUFS;
450 }
451 m_head = m;
452 goto retry;
453 } else {
454 txr->q_efbig2_tx_dma_setup++;
455 return error;
456 }
457 case EINVAL:
458 txr->q_einval_tx_dma_setup++;
459 return error;
460 default:
461 txr->q_other_tx_dma_setup++;
462 return error;
463 }
464 }
465
466 /* Make certain there are enough descriptors */
467 if (txr->tx_avail < (map->dm_nsegs + 2)) {
468 txr->txr_no_space = true;
469 txr->no_desc_avail.ev_count++;
470 ixgbe_dmamap_unload(txr->txtag, txbuf->map);
471 return EAGAIN;
472 }
473
474 /*
475 * Set up the appropriate offload context
476 * this will consume the first descriptor
477 */
478 error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
479 if (__predict_false(error)) {
480 return (error);
481 }
482
483 /* Do the flow director magic */
484 if ((adapter->feat_en & IXGBE_FEATURE_FDIR) &&
485 (txr->atr_sample) && (!adapter->fdir_reinit)) {
486 ++txr->atr_count;
487 if (txr->atr_count >= atr_sample_rate) {
488 ixgbe_atr(txr, m_head);
489 txr->atr_count = 0;
490 }
491 }
492
493 olinfo_status |= IXGBE_ADVTXD_CC;
494 i = txr->next_avail_desc;
495 for (j = 0; j < map->dm_nsegs; j++) {
496 bus_size_t seglen;
497 bus_addr_t segaddr;
498
499 txbuf = &txr->tx_buffers[i];
500 txd = &txr->tx_base[i];
501 seglen = map->dm_segs[j].ds_len;
502 segaddr = htole64(map->dm_segs[j].ds_addr);
503
504 txd->read.buffer_addr = segaddr;
505 txd->read.cmd_type_len = htole32(cmd_type_len | seglen);
506 txd->read.olinfo_status = htole32(olinfo_status);
507
508 if (++i == txr->num_desc)
509 i = 0;
510 }
511
512 txd->read.cmd_type_len |= htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
513 txr->tx_avail -= map->dm_nsegs;
514 txr->next_avail_desc = i;
515
516 txbuf->m_head = m_head;
517 /*
518 * Here we swap the map so the last descriptor,
519 * which gets the completion interrupt has the
520 * real map, and the first descriptor gets the
521 * unused map from this descriptor.
522 */
523 txr->tx_buffers[first].map = txbuf->map;
524 txbuf->map = map;
525 bus_dmamap_sync(txr->txtag->dt_dmat, map, 0, m_head->m_pkthdr.len,
526 BUS_DMASYNC_PREWRITE);
527
528 /* Set the EOP descriptor that will be marked done */
529 txbuf = &txr->tx_buffers[first];
530 txbuf->eop = txd;
531
532 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
533 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
534 /*
535 * Advance the Transmit Descriptor Tail (Tdt), this tells the
536 * hardware that this frame is available to transmit.
537 */
538 ++txr->total_packets.ev_count;
539 IXGBE_WRITE_REG(&adapter->hw, txr->tail, i);
540
541 /*
542 * XXXX NOMPSAFE: ifp->if_data should be percpu.
543 */
544 ifp->if_obytes += m_head->m_pkthdr.len;
545 if (m_head->m_flags & M_MCAST)
546 ifp->if_omcasts++;
547
548 /* Mark queue as having work */
549 if (txr->busy == 0)
550 txr->busy = 1;
551
552 return (0);
553 } /* ixgbe_xmit */
554
555 /************************************************************************
556 * ixgbe_drain
557 ************************************************************************/
558 static void
559 ixgbe_drain(struct ifnet *ifp, struct tx_ring *txr)
560 {
561 struct mbuf *m;
562
563 IXGBE_TX_LOCK_ASSERT(txr);
564
565 if (txr->me == 0) {
566 while (!IFQ_IS_EMPTY(&ifp->if_snd)) {
567 IFQ_DEQUEUE(&ifp->if_snd, m);
568 m_freem(m);
569 IF_DROP(&ifp->if_snd);
570 }
571 }
572
573 while ((m = pcq_get(txr->txr_interq)) != NULL) {
574 m_freem(m);
575 txr->pcq_drops.ev_count++;
576 }
577 }
578
579 /************************************************************************
580 * ixgbe_allocate_transmit_buffers
581 *
582 * Allocate memory for tx_buffer structures. The tx_buffer stores all
583 * the information needed to transmit a packet on the wire. This is
584 * called only once at attach, setup is done every reset.
585 ************************************************************************/
586 static int
587 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
588 {
589 struct adapter *adapter = txr->adapter;
590 device_t dev = adapter->dev;
591 struct ixgbe_tx_buf *txbuf;
592 int error, i;
593
594 /*
595 * Setup DMA descriptor areas.
596 */
597 error = ixgbe_dma_tag_create(
598 /* parent */ adapter->osdep.dmat,
599 /* alignment */ 1,
600 /* bounds */ 0,
601 /* maxsize */ IXGBE_TSO_SIZE,
602 /* nsegments */ adapter->num_segs,
603 /* maxsegsize */ PAGE_SIZE,
604 /* flags */ 0,
605 &txr->txtag);
606 if (error != 0) {
607 aprint_error_dev(dev,"Unable to allocate TX DMA tag\n");
608 goto fail;
609 }
610
611 txr->tx_buffers =
612 (struct ixgbe_tx_buf *) malloc(sizeof(struct ixgbe_tx_buf) *
613 adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
614 if (txr->tx_buffers == NULL) {
615 aprint_error_dev(dev, "Unable to allocate tx_buffer memory\n");
616 error = ENOMEM;
617 goto fail;
618 }
619
620 /* Create the descriptor buffer dma maps */
621 txbuf = txr->tx_buffers;
622 for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
623 error = ixgbe_dmamap_create(txr->txtag, 0, &txbuf->map);
624 if (error != 0) {
625 aprint_error_dev(dev,
626 "Unable to create TX DMA map (%d)\n", error);
627 goto fail;
628 }
629 }
630
631 return 0;
632 fail:
633 /* We free all, it handles case where we are in the middle */
634 #if 0 /* XXX was FreeBSD */
635 ixgbe_free_transmit_structures(adapter);
636 #else
637 ixgbe_free_transmit_buffers(txr);
638 #endif
639 return (error);
640 } /* ixgbe_allocate_transmit_buffers */
641
642 /************************************************************************
643 * ixgbe_setup_transmit_ring - Initialize a transmit ring.
644 ************************************************************************/
645 static void
646 ixgbe_setup_transmit_ring(struct tx_ring *txr)
647 {
648 struct adapter *adapter = txr->adapter;
649 struct ixgbe_tx_buf *txbuf;
650 #ifdef DEV_NETMAP
651 struct netmap_adapter *na = NA(adapter->ifp);
652 struct netmap_slot *slot;
653 #endif /* DEV_NETMAP */
654
655 /* Clear the old ring contents */
656 IXGBE_TX_LOCK(txr);
657
658 #ifdef DEV_NETMAP
659 if (adapter->feat_en & IXGBE_FEATURE_NETMAP) {
660 /*
661 * (under lock): if in netmap mode, do some consistency
662 * checks and set slot to entry 0 of the netmap ring.
663 */
664 slot = netmap_reset(na, NR_TX, txr->me, 0);
665 }
666 #endif /* DEV_NETMAP */
667
668 bzero((void *)txr->tx_base,
669 (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
670 /* Reset indices */
671 txr->next_avail_desc = 0;
672 txr->next_to_clean = 0;
673
674 /* Free any existing tx buffers. */
675 txbuf = txr->tx_buffers;
676 for (int i = 0; i < txr->num_desc; i++, txbuf++) {
677 if (txbuf->m_head != NULL) {
678 bus_dmamap_sync(txr->txtag->dt_dmat, txbuf->map,
679 0, txbuf->m_head->m_pkthdr.len,
680 BUS_DMASYNC_POSTWRITE);
681 ixgbe_dmamap_unload(txr->txtag, txbuf->map);
682 m_freem(txbuf->m_head);
683 txbuf->m_head = NULL;
684 }
685
686 #ifdef DEV_NETMAP
687 /*
688 * In netmap mode, set the map for the packet buffer.
689 * NOTE: Some drivers (not this one) also need to set
690 * the physical buffer address in the NIC ring.
691 * Slots in the netmap ring (indexed by "si") are
692 * kring->nkr_hwofs positions "ahead" wrt the
693 * corresponding slot in the NIC ring. In some drivers
694 * (not here) nkr_hwofs can be negative. Function
695 * netmap_idx_n2k() handles wraparounds properly.
696 */
697 if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) && slot) {
698 int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
699 netmap_load_map(na, txr->txtag,
700 txbuf->map, NMB(na, slot + si));
701 }
702 #endif /* DEV_NETMAP */
703
704 /* Clear the EOP descriptor pointer */
705 txbuf->eop = NULL;
706 }
707
708 /* Set the rate at which we sample packets */
709 if (adapter->feat_en & IXGBE_FEATURE_FDIR)
710 txr->atr_sample = atr_sample_rate;
711
712 /* Set number of descriptors available */
713 txr->tx_avail = adapter->num_tx_desc;
714
715 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
716 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
717 IXGBE_TX_UNLOCK(txr);
718 } /* ixgbe_setup_transmit_ring */
719
720 /************************************************************************
721 * ixgbe_setup_transmit_structures - Initialize all transmit rings.
722 ************************************************************************/
723 int
724 ixgbe_setup_transmit_structures(struct adapter *adapter)
725 {
726 struct tx_ring *txr = adapter->tx_rings;
727
728 for (int i = 0; i < adapter->num_queues; i++, txr++)
729 ixgbe_setup_transmit_ring(txr);
730
731 return (0);
732 } /* ixgbe_setup_transmit_structures */
733
734 /************************************************************************
735 * ixgbe_free_transmit_structures - Free all transmit rings.
736 ************************************************************************/
737 void
738 ixgbe_free_transmit_structures(struct adapter *adapter)
739 {
740 struct tx_ring *txr = adapter->tx_rings;
741
742 for (int i = 0; i < adapter->num_queues; i++, txr++) {
743 ixgbe_free_transmit_buffers(txr);
744 ixgbe_dma_free(adapter, &txr->txdma);
745 IXGBE_TX_LOCK_DESTROY(txr);
746 }
747 free(adapter->tx_rings, M_DEVBUF);
748 } /* ixgbe_free_transmit_structures */
749
750 /************************************************************************
751 * ixgbe_free_transmit_buffers
752 *
753 * Free transmit ring related data structures.
754 ************************************************************************/
755 static void
756 ixgbe_free_transmit_buffers(struct tx_ring *txr)
757 {
758 struct adapter *adapter = txr->adapter;
759 struct ixgbe_tx_buf *tx_buffer;
760 int i;
761
762 INIT_DEBUGOUT("ixgbe_free_transmit_buffers: begin");
763
764 if (txr->tx_buffers == NULL)
765 return;
766
767 tx_buffer = txr->tx_buffers;
768 for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
769 if (tx_buffer->m_head != NULL) {
770 bus_dmamap_sync(txr->txtag->dt_dmat, tx_buffer->map,
771 0, tx_buffer->m_head->m_pkthdr.len,
772 BUS_DMASYNC_POSTWRITE);
773 ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
774 m_freem(tx_buffer->m_head);
775 tx_buffer->m_head = NULL;
776 if (tx_buffer->map != NULL) {
777 ixgbe_dmamap_destroy(txr->txtag,
778 tx_buffer->map);
779 tx_buffer->map = NULL;
780 }
781 } else if (tx_buffer->map != NULL) {
782 ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
783 ixgbe_dmamap_destroy(txr->txtag, tx_buffer->map);
784 tx_buffer->map = NULL;
785 }
786 }
787 if (txr->txr_interq != NULL) {
788 struct mbuf *m;
789
790 while ((m = pcq_get(txr->txr_interq)) != NULL)
791 m_freem(m);
792 pcq_destroy(txr->txr_interq);
793 }
794 if (txr->tx_buffers != NULL) {
795 free(txr->tx_buffers, M_DEVBUF);
796 txr->tx_buffers = NULL;
797 }
798 if (txr->txtag != NULL) {
799 ixgbe_dma_tag_destroy(txr->txtag);
800 txr->txtag = NULL;
801 }
802 } /* ixgbe_free_transmit_buffers */
803
804 /************************************************************************
805 * ixgbe_tx_ctx_setup
806 *
807 * Advanced Context Descriptor setup for VLAN, CSUM or TSO
808 ************************************************************************/
809 static int
810 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
811 u32 *cmd_type_len, u32 *olinfo_status)
812 {
813 struct adapter *adapter = txr->adapter;
814 struct ixgbe_adv_tx_context_desc *TXD;
815 struct ether_vlan_header *eh;
816 #ifdef INET
817 struct ip *ip;
818 #endif
819 #ifdef INET6
820 struct ip6_hdr *ip6;
821 #endif
822 int ehdrlen, ip_hlen = 0;
823 int offload = TRUE;
824 int ctxd = txr->next_avail_desc;
825 u32 vlan_macip_lens = 0;
826 u32 type_tucmd_mlhl = 0;
827 u16 vtag = 0;
828 u16 etype;
829 u8 ipproto = 0;
830 char *l3d;
831
832
833 /* First check if TSO is to be used */
834 if (mp->m_pkthdr.csum_flags & (M_CSUM_TSOv4 | M_CSUM_TSOv6)) {
835 int rv = ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status);
836
837 if (rv != 0)
838 ++adapter->tso_err.ev_count;
839 return rv;
840 }
841
842 if ((mp->m_pkthdr.csum_flags & M_CSUM_OFFLOAD) == 0)
843 offload = FALSE;
844
845 /* Indicate the whole packet as payload when not doing TSO */
846 *olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
847
848 /* Now ready a context descriptor */
849 TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
850
851 /*
852 * In advanced descriptors the vlan tag must
853 * be placed into the context descriptor. Hence
854 * we need to make one even if not doing offloads.
855 */
856 if (vlan_has_tag(mp)) {
857 vtag = htole16(vlan_get_tag(mp));
858 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
859 } else if (!(txr->adapter->feat_en & IXGBE_FEATURE_NEEDS_CTXD) &&
860 (offload == FALSE))
861 return (0);
862
863 /*
864 * Determine where frame payload starts.
865 * Jump over vlan headers if already present,
866 * helpful for QinQ too.
867 */
868 KASSERT(mp->m_len >= offsetof(struct ether_vlan_header, evl_tag));
869 eh = mtod(mp, struct ether_vlan_header *);
870 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
871 KASSERT(mp->m_len >= sizeof(struct ether_vlan_header));
872 etype = ntohs(eh->evl_proto);
873 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
874 } else {
875 etype = ntohs(eh->evl_encap_proto);
876 ehdrlen = ETHER_HDR_LEN;
877 }
878
879 /* Set the ether header length */
880 vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
881
882 if (offload == FALSE)
883 goto no_offloads;
884
885 /*
886 * If the first mbuf only includes the ethernet header,
887 * jump to the next one
888 * XXX: This assumes the stack splits mbufs containing headers
889 * on header boundaries
890 * XXX: And assumes the entire IP header is contained in one mbuf
891 */
892 if (mp->m_len == ehdrlen && mp->m_next)
893 l3d = mtod(mp->m_next, char *);
894 else
895 l3d = mtod(mp, char *) + ehdrlen;
896
897 switch (etype) {
898 #ifdef INET
899 case ETHERTYPE_IP:
900 ip = (struct ip *)(l3d);
901 ip_hlen = ip->ip_hl << 2;
902 ipproto = ip->ip_p;
903 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
904 KASSERT((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) == 0 ||
905 ip->ip_sum == 0);
906 break;
907 #endif
908 #ifdef INET6
909 case ETHERTYPE_IPV6:
910 ip6 = (struct ip6_hdr *)(l3d);
911 ip_hlen = sizeof(struct ip6_hdr);
912 ipproto = ip6->ip6_nxt;
913 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
914 break;
915 #endif
916 default:
917 offload = false;
918 break;
919 }
920
921 if ((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) != 0)
922 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
923
924 vlan_macip_lens |= ip_hlen;
925
926 /* No support for offloads for non-L4 next headers */
927 switch (ipproto) {
928 case IPPROTO_TCP:
929 if (mp->m_pkthdr.csum_flags &
930 (M_CSUM_TCPv4 | M_CSUM_TCPv6))
931 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
932 else
933 offload = false;
934 break;
935 case IPPROTO_UDP:
936 if (mp->m_pkthdr.csum_flags &
937 (M_CSUM_UDPv4 | M_CSUM_UDPv6))
938 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
939 else
940 offload = false;
941 break;
942 default:
943 offload = false;
944 break;
945 }
946
947 if (offload) /* Insert L4 checksum into data descriptors */
948 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
949
950 no_offloads:
951 type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
952
953 /* Now copy bits into descriptor */
954 TXD->vlan_macip_lens = htole32(vlan_macip_lens);
955 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
956 TXD->seqnum_seed = htole32(0);
957 TXD->mss_l4len_idx = htole32(0);
958
959 /* We've consumed the first desc, adjust counters */
960 if (++ctxd == txr->num_desc)
961 ctxd = 0;
962 txr->next_avail_desc = ctxd;
963 --txr->tx_avail;
964
965 return (0);
966 } /* ixgbe_tx_ctx_setup */
967
968 /************************************************************************
969 * ixgbe_tso_setup
970 *
971 * Setup work for hardware segmentation offload (TSO) on
972 * adapters using advanced tx descriptors
973 ************************************************************************/
974 static int
975 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *cmd_type_len,
976 u32 *olinfo_status)
977 {
978 struct ixgbe_adv_tx_context_desc *TXD;
979 struct ether_vlan_header *eh;
980 #ifdef INET6
981 struct ip6_hdr *ip6;
982 #endif
983 #ifdef INET
984 struct ip *ip;
985 #endif
986 struct tcphdr *th;
987 int ctxd, ehdrlen, ip_hlen, tcp_hlen;
988 u32 vlan_macip_lens = 0;
989 u32 type_tucmd_mlhl = 0;
990 u32 mss_l4len_idx = 0, paylen;
991 u16 vtag = 0, eh_type;
992
993 /*
994 * Determine where frame payload starts.
995 * Jump over vlan headers if already present
996 */
997 eh = mtod(mp, struct ether_vlan_header *);
998 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
999 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
1000 eh_type = eh->evl_proto;
1001 } else {
1002 ehdrlen = ETHER_HDR_LEN;
1003 eh_type = eh->evl_encap_proto;
1004 }
1005
1006 switch (ntohs(eh_type)) {
1007 #ifdef INET
1008 case ETHERTYPE_IP:
1009 ip = (struct ip *)(mp->m_data + ehdrlen);
1010 if (ip->ip_p != IPPROTO_TCP)
1011 return (ENXIO);
1012 ip->ip_sum = 0;
1013 ip_hlen = ip->ip_hl << 2;
1014 th = (struct tcphdr *)((char *)ip + ip_hlen);
1015 th->th_sum = in_cksum_phdr(ip->ip_src.s_addr,
1016 ip->ip_dst.s_addr, htons(IPPROTO_TCP));
1017 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
1018 /* Tell transmit desc to also do IPv4 checksum. */
1019 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
1020 break;
1021 #endif
1022 #ifdef INET6
1023 case ETHERTYPE_IPV6:
1024 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
1025 /* XXX-BZ For now we do not pretend to support ext. hdrs. */
1026 if (ip6->ip6_nxt != IPPROTO_TCP)
1027 return (ENXIO);
1028 ip_hlen = sizeof(struct ip6_hdr);
1029 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
1030 th = (struct tcphdr *)((char *)ip6 + ip_hlen);
1031 th->th_sum = in6_cksum_phdr(&ip6->ip6_src,
1032 &ip6->ip6_dst, 0, htonl(IPPROTO_TCP));
1033 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
1034 break;
1035 #endif
1036 default:
1037 panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
1038 __func__, ntohs(eh_type));
1039 break;
1040 }
1041
1042 ctxd = txr->next_avail_desc;
1043 TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
1044
1045 tcp_hlen = th->th_off << 2;
1046
1047 /* This is used in the transmit desc in encap */
1048 paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
1049
1050 /* VLAN MACLEN IPLEN */
1051 if (vlan_has_tag(mp)) {
1052 vtag = htole16(vlan_get_tag(mp));
1053 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
1054 }
1055
1056 vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
1057 vlan_macip_lens |= ip_hlen;
1058 TXD->vlan_macip_lens = htole32(vlan_macip_lens);
1059
1060 /* ADV DTYPE TUCMD */
1061 type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
1062 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
1063 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
1064
1065 /* MSS L4LEN IDX */
1066 mss_l4len_idx |= (mp->m_pkthdr.segsz << IXGBE_ADVTXD_MSS_SHIFT);
1067 mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
1068 TXD->mss_l4len_idx = htole32(mss_l4len_idx);
1069
1070 TXD->seqnum_seed = htole32(0);
1071
1072 if (++ctxd == txr->num_desc)
1073 ctxd = 0;
1074
1075 txr->tx_avail--;
1076 txr->next_avail_desc = ctxd;
1077 *cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
1078 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
1079 *olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
1080 ++txr->tso_tx.ev_count;
1081
1082 return (0);
1083 } /* ixgbe_tso_setup */
1084
1085
1086 /************************************************************************
1087 * ixgbe_txeof
1088 *
1089 * Examine each tx_buffer in the used queue. If the hardware is done
1090 * processing the packet then free associated resources. The
1091 * tx_buffer is put back on the free queue.
1092 ************************************************************************/
1093 bool
1094 ixgbe_txeof(struct tx_ring *txr)
1095 {
1096 struct adapter *adapter = txr->adapter;
1097 struct ifnet *ifp = adapter->ifp;
1098 struct ixgbe_tx_buf *buf;
1099 union ixgbe_adv_tx_desc *txd;
1100 u32 work, processed = 0;
1101 u32 limit = adapter->tx_process_limit;
1102
1103 KASSERT(mutex_owned(&txr->tx_mtx));
1104
1105 #ifdef DEV_NETMAP
1106 if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) &&
1107 (adapter->ifp->if_capenable & IFCAP_NETMAP)) {
1108 struct netmap_adapter *na = NA(adapter->ifp);
1109 struct netmap_kring *kring = &na->tx_rings[txr->me];
1110 txd = txr->tx_base;
1111 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1112 BUS_DMASYNC_POSTREAD);
1113 /*
1114 * In netmap mode, all the work is done in the context
1115 * of the client thread. Interrupt handlers only wake up
1116 * clients, which may be sleeping on individual rings
1117 * or on a global resource for all rings.
1118 * To implement tx interrupt mitigation, we wake up the client
1119 * thread roughly every half ring, even if the NIC interrupts
1120 * more frequently. This is implemented as follows:
1121 * - ixgbe_txsync() sets kring->nr_kflags with the index of
1122 * the slot that should wake up the thread (nkr_num_slots
1123 * means the user thread should not be woken up);
1124 * - the driver ignores tx interrupts unless netmap_mitigate=0
1125 * or the slot has the DD bit set.
1126 */
1127 if (!netmap_mitigate ||
1128 (kring->nr_kflags < kring->nkr_num_slots &&
1129 txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD)) {
1130 netmap_tx_irq(ifp, txr->me);
1131 }
1132 return false;
1133 }
1134 #endif /* DEV_NETMAP */
1135
1136 if (txr->tx_avail == txr->num_desc) {
1137 txr->busy = 0;
1138 return false;
1139 }
1140
1141 /* Get work starting point */
1142 work = txr->next_to_clean;
1143 buf = &txr->tx_buffers[work];
1144 txd = &txr->tx_base[work];
1145 work -= txr->num_desc; /* The distance to ring end */
1146 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1147 BUS_DMASYNC_POSTREAD);
1148
1149 do {
1150 union ixgbe_adv_tx_desc *eop = buf->eop;
1151 if (eop == NULL) /* No work */
1152 break;
1153
1154 if ((eop->wb.status & IXGBE_TXD_STAT_DD) == 0)
1155 break; /* I/O not complete */
1156
1157 if (buf->m_head) {
1158 txr->bytes += buf->m_head->m_pkthdr.len;
1159 bus_dmamap_sync(txr->txtag->dt_dmat, buf->map,
1160 0, buf->m_head->m_pkthdr.len,
1161 BUS_DMASYNC_POSTWRITE);
1162 ixgbe_dmamap_unload(txr->txtag, buf->map);
1163 m_freem(buf->m_head);
1164 buf->m_head = NULL;
1165 }
1166 buf->eop = NULL;
1167 txr->txr_no_space = false;
1168 ++txr->tx_avail;
1169
1170 /* We clean the range if multi segment */
1171 while (txd != eop) {
1172 ++txd;
1173 ++buf;
1174 ++work;
1175 /* wrap the ring? */
1176 if (__predict_false(!work)) {
1177 work -= txr->num_desc;
1178 buf = txr->tx_buffers;
1179 txd = txr->tx_base;
1180 }
1181 if (buf->m_head) {
1182 txr->bytes +=
1183 buf->m_head->m_pkthdr.len;
1184 bus_dmamap_sync(txr->txtag->dt_dmat,
1185 buf->map,
1186 0, buf->m_head->m_pkthdr.len,
1187 BUS_DMASYNC_POSTWRITE);
1188 ixgbe_dmamap_unload(txr->txtag,
1189 buf->map);
1190 m_freem(buf->m_head);
1191 buf->m_head = NULL;
1192 }
1193 ++txr->tx_avail;
1194 buf->eop = NULL;
1195
1196 }
1197 ++txr->packets;
1198 ++processed;
1199 ++ifp->if_opackets;
1200
1201 /* Try the next packet */
1202 ++txd;
1203 ++buf;
1204 ++work;
1205 /* reset with a wrap */
1206 if (__predict_false(!work)) {
1207 work -= txr->num_desc;
1208 buf = txr->tx_buffers;
1209 txd = txr->tx_base;
1210 }
1211 prefetch(txd);
1212 } while (__predict_true(--limit));
1213
1214 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1215 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1216
1217 work += txr->num_desc;
1218 txr->next_to_clean = work;
1219
1220 /*
1221 * Queue Hang detection, we know there's
1222 * work outstanding or the first return
1223 * would have been taken, so increment busy
1224 * if nothing managed to get cleaned, then
1225 * in local_timer it will be checked and
1226 * marked as HUNG if it exceeds a MAX attempt.
1227 */
1228 if ((processed == 0) && (txr->busy != IXGBE_QUEUE_HUNG))
1229 ++txr->busy;
1230 /*
1231 * If anything gets cleaned we reset state to 1,
1232 * note this will turn off HUNG if its set.
1233 */
1234 if (processed)
1235 txr->busy = 1;
1236
1237 if (txr->tx_avail == txr->num_desc)
1238 txr->busy = 0;
1239
1240 return ((limit > 0) ? false : true);
1241 } /* ixgbe_txeof */
1242
1243 /************************************************************************
1244 * ixgbe_rsc_count
1245 *
1246 * Used to detect a descriptor that has been merged by Hardware RSC.
1247 ************************************************************************/
1248 static inline u32
1249 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1250 {
1251 return (le32toh(rx->wb.lower.lo_dword.data) &
1252 IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1253 } /* ixgbe_rsc_count */
1254
1255 /************************************************************************
1256 * ixgbe_setup_hw_rsc
1257 *
1258 * Initialize Hardware RSC (LRO) feature on 82599
1259 * for an RX ring, this is toggled by the LRO capability
1260 * even though it is transparent to the stack.
1261 *
1262 * NOTE: Since this HW feature only works with IPv4 and
1263 * testing has shown soft LRO to be as effective,
1264 * this feature will be disabled by default.
1265 ************************************************************************/
1266 static void
1267 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
1268 {
1269 struct adapter *adapter = rxr->adapter;
1270 struct ixgbe_hw *hw = &adapter->hw;
1271 u32 rscctrl, rdrxctl;
1272
1273 /* If turning LRO/RSC off we need to disable it */
1274 if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) {
1275 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1276 rscctrl &= ~IXGBE_RSCCTL_RSCEN;
1277 return;
1278 }
1279
1280 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
1281 rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
1282 #ifdef DEV_NETMAP
1283 /* Always strip CRC unless Netmap disabled it */
1284 if (!(adapter->feat_en & IXGBE_FEATURE_NETMAP) ||
1285 !(adapter->ifp->if_capenable & IFCAP_NETMAP) ||
1286 ix_crcstrip)
1287 #endif /* DEV_NETMAP */
1288 rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
1289 rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
1290 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
1291
1292 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1293 rscctrl |= IXGBE_RSCCTL_RSCEN;
1294 /*
1295 * Limit the total number of descriptors that
1296 * can be combined, so it does not exceed 64K
1297 */
1298 if (rxr->mbuf_sz == MCLBYTES)
1299 rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
1300 else if (rxr->mbuf_sz == MJUMPAGESIZE)
1301 rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
1302 else if (rxr->mbuf_sz == MJUM9BYTES)
1303 rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
1304 else /* Using 16K cluster */
1305 rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
1306
1307 IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
1308
1309 /* Enable TCP header recognition */
1310 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
1311 (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) | IXGBE_PSRTYPE_TCPHDR));
1312
1313 /* Disable RSC for ACK packets */
1314 IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
1315 (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
1316
1317 rxr->hw_rsc = TRUE;
1318 } /* ixgbe_setup_hw_rsc */
1319
1320 /************************************************************************
1321 * ixgbe_refresh_mbufs
1322 *
1323 * Refresh mbuf buffers for RX descriptor rings
1324 * - now keeps its own state so discards due to resource
1325 * exhaustion are unnecessary, if an mbuf cannot be obtained
1326 * it just returns, keeping its placeholder, thus it can simply
1327 * be recalled to try again.
1328 ************************************************************************/
1329 static void
1330 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
1331 {
1332 struct adapter *adapter = rxr->adapter;
1333 struct ixgbe_rx_buf *rxbuf;
1334 struct mbuf *mp;
1335 int i, j, error;
1336 bool refreshed = false;
1337
1338 i = j = rxr->next_to_refresh;
1339 /* Control the loop with one beyond */
1340 if (++j == rxr->num_desc)
1341 j = 0;
1342
1343 while (j != limit) {
1344 rxbuf = &rxr->rx_buffers[i];
1345 if (rxbuf->buf == NULL) {
1346 mp = ixgbe_getjcl(&rxr->jcl_head, M_NOWAIT,
1347 MT_DATA, M_PKTHDR, rxr->mbuf_sz);
1348 if (mp == NULL) {
1349 rxr->no_jmbuf.ev_count++;
1350 goto update;
1351 }
1352 if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
1353 m_adj(mp, ETHER_ALIGN);
1354 } else
1355 mp = rxbuf->buf;
1356
1357 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1358
1359 /* If we're dealing with an mbuf that was copied rather
1360 * than replaced, there's no need to go through busdma.
1361 */
1362 if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
1363 /* Get the memory mapping */
1364 ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
1365 error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat,
1366 rxbuf->pmap, mp, BUS_DMA_NOWAIT);
1367 if (error != 0) {
1368 printf("Refresh mbufs: payload dmamap load failure - %d\n", error);
1369 m_free(mp);
1370 rxbuf->buf = NULL;
1371 goto update;
1372 }
1373 rxbuf->buf = mp;
1374 bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
1375 0, mp->m_pkthdr.len, BUS_DMASYNC_PREREAD);
1376 rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
1377 htole64(rxbuf->pmap->dm_segs[0].ds_addr);
1378 } else {
1379 rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
1380 rxbuf->flags &= ~IXGBE_RX_COPY;
1381 }
1382
1383 refreshed = true;
1384 /* Next is precalculated */
1385 i = j;
1386 rxr->next_to_refresh = i;
1387 if (++j == rxr->num_desc)
1388 j = 0;
1389 }
1390
1391 update:
1392 if (refreshed) /* Update hardware tail index */
1393 IXGBE_WRITE_REG(&adapter->hw, rxr->tail, rxr->next_to_refresh);
1394
1395 return;
1396 } /* ixgbe_refresh_mbufs */
1397
1398 /************************************************************************
1399 * ixgbe_allocate_receive_buffers
1400 *
1401 * Allocate memory for rx_buffer structures. Since we use one
1402 * rx_buffer per received packet, the maximum number of rx_buffer's
1403 * that we'll need is equal to the number of receive descriptors
1404 * that we've allocated.
1405 ************************************************************************/
1406 static int
1407 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
1408 {
1409 struct adapter *adapter = rxr->adapter;
1410 device_t dev = adapter->dev;
1411 struct ixgbe_rx_buf *rxbuf;
1412 int bsize, error;
1413
1414 bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
1415 rxr->rx_buffers = (struct ixgbe_rx_buf *)malloc(bsize, M_DEVBUF,
1416 M_NOWAIT | M_ZERO);
1417 if (rxr->rx_buffers == NULL) {
1418 aprint_error_dev(dev, "Unable to allocate rx_buffer memory\n");
1419 error = ENOMEM;
1420 goto fail;
1421 }
1422
1423 error = ixgbe_dma_tag_create(
1424 /* parent */ adapter->osdep.dmat,
1425 /* alignment */ 1,
1426 /* bounds */ 0,
1427 /* maxsize */ MJUM16BYTES,
1428 /* nsegments */ 1,
1429 /* maxsegsize */ MJUM16BYTES,
1430 /* flags */ 0,
1431 &rxr->ptag);
1432 if (error != 0) {
1433 aprint_error_dev(dev, "Unable to create RX DMA tag\n");
1434 goto fail;
1435 }
1436
1437 for (int i = 0; i < rxr->num_desc; i++, rxbuf++) {
1438 rxbuf = &rxr->rx_buffers[i];
1439 error = ixgbe_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
1440 if (error) {
1441 aprint_error_dev(dev, "Unable to create RX dma map\n");
1442 goto fail;
1443 }
1444 }
1445
1446 return (0);
1447
1448 fail:
1449 /* Frees all, but can handle partial completion */
1450 ixgbe_free_receive_structures(adapter);
1451
1452 return (error);
1453 } /* ixgbe_allocate_receive_buffers */
1454
1455 /************************************************************************
1456 * ixgbe_free_receive_ring
1457 ************************************************************************/
1458 static void
1459 ixgbe_free_receive_ring(struct rx_ring *rxr)
1460 {
1461 for (int i = 0; i < rxr->num_desc; i++) {
1462 ixgbe_rx_discard(rxr, i);
1463 }
1464 } /* ixgbe_free_receive_ring */
1465
1466 /************************************************************************
1467 * ixgbe_setup_receive_ring
1468 *
1469 * Initialize a receive ring and its buffers.
1470 ************************************************************************/
1471 static int
1472 ixgbe_setup_receive_ring(struct rx_ring *rxr)
1473 {
1474 struct adapter *adapter;
1475 struct ixgbe_rx_buf *rxbuf;
1476 #ifdef LRO
1477 struct ifnet *ifp;
1478 struct lro_ctrl *lro = &rxr->lro;
1479 #endif /* LRO */
1480 #ifdef DEV_NETMAP
1481 struct netmap_adapter *na = NA(rxr->adapter->ifp);
1482 struct netmap_slot *slot;
1483 #endif /* DEV_NETMAP */
1484 int rsize, error = 0;
1485
1486 adapter = rxr->adapter;
1487 #ifdef LRO
1488 ifp = adapter->ifp;
1489 #endif /* LRO */
1490
1491 /* Clear the ring contents */
1492 IXGBE_RX_LOCK(rxr);
1493
1494 #ifdef DEV_NETMAP
1495 if (adapter->feat_en & IXGBE_FEATURE_NETMAP)
1496 slot = netmap_reset(na, NR_RX, rxr->me, 0);
1497 #endif /* DEV_NETMAP */
1498
1499 rsize = roundup2(adapter->num_rx_desc *
1500 sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
1501 bzero((void *)rxr->rx_base, rsize);
1502 /* Cache the size */
1503 rxr->mbuf_sz = adapter->rx_mbuf_sz;
1504
1505 /* Free current RX buffer structs and their mbufs */
1506 ixgbe_free_receive_ring(rxr);
1507
1508 IXGBE_RX_UNLOCK(rxr);
1509 /*
1510 * Now reinitialize our supply of jumbo mbufs. The number
1511 * or size of jumbo mbufs may have changed.
1512 * Assume all of rxr->ptag are the same.
1513 */
1514 ixgbe_jcl_reinit(adapter, rxr->ptag->dt_dmat, rxr,
1515 (2 * adapter->num_rx_desc), adapter->rx_mbuf_sz);
1516
1517 IXGBE_RX_LOCK(rxr);
1518
1519 /* Now replenish the mbufs */
1520 for (int j = 0; j != rxr->num_desc; ++j) {
1521 struct mbuf *mp;
1522
1523 rxbuf = &rxr->rx_buffers[j];
1524
1525 #ifdef DEV_NETMAP
1526 /*
1527 * In netmap mode, fill the map and set the buffer
1528 * address in the NIC ring, considering the offset
1529 * between the netmap and NIC rings (see comment in
1530 * ixgbe_setup_transmit_ring() ). No need to allocate
1531 * an mbuf, so end the block with a continue;
1532 */
1533 if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) && slot) {
1534 int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
1535 uint64_t paddr;
1536 void *addr;
1537
1538 addr = PNMB(na, slot + sj, &paddr);
1539 netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
1540 /* Update descriptor and the cached value */
1541 rxr->rx_base[j].read.pkt_addr = htole64(paddr);
1542 rxbuf->addr = htole64(paddr);
1543 continue;
1544 }
1545 #endif /* DEV_NETMAP */
1546
1547 rxbuf->flags = 0;
1548 rxbuf->buf = ixgbe_getjcl(&rxr->jcl_head, M_NOWAIT,
1549 MT_DATA, M_PKTHDR, adapter->rx_mbuf_sz);
1550 if (rxbuf->buf == NULL) {
1551 error = ENOBUFS;
1552 goto fail;
1553 }
1554 mp = rxbuf->buf;
1555 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1556 /* Get the memory mapping */
1557 error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat, rxbuf->pmap,
1558 mp, BUS_DMA_NOWAIT);
1559 if (error != 0)
1560 goto fail;
1561 bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
1562 0, adapter->rx_mbuf_sz, BUS_DMASYNC_PREREAD);
1563 /* Update the descriptor and the cached value */
1564 rxr->rx_base[j].read.pkt_addr =
1565 htole64(rxbuf->pmap->dm_segs[0].ds_addr);
1566 rxbuf->addr = htole64(rxbuf->pmap->dm_segs[0].ds_addr);
1567 }
1568
1569
1570 /* Setup our descriptor indices */
1571 rxr->next_to_check = 0;
1572 rxr->next_to_refresh = 0;
1573 rxr->lro_enabled = FALSE;
1574 rxr->rx_copies.ev_count = 0;
1575 #if 0 /* NetBSD */
1576 rxr->rx_bytes.ev_count = 0;
1577 #if 1 /* Fix inconsistency */
1578 rxr->rx_packets.ev_count = 0;
1579 #endif
1580 #endif
1581 rxr->vtag_strip = FALSE;
1582
1583 ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1584 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1585
1586 /*
1587 * Now set up the LRO interface
1588 */
1589 if (ixgbe_rsc_enable)
1590 ixgbe_setup_hw_rsc(rxr);
1591 #ifdef LRO
1592 else if (ifp->if_capenable & IFCAP_LRO) {
1593 device_t dev = adapter->dev;
1594 int err = tcp_lro_init(lro);
1595 if (err) {
1596 device_printf(dev, "LRO Initialization failed!\n");
1597 goto fail;
1598 }
1599 INIT_DEBUGOUT("RX Soft LRO Initialized\n");
1600 rxr->lro_enabled = TRUE;
1601 lro->ifp = adapter->ifp;
1602 }
1603 #endif /* LRO */
1604
1605 IXGBE_RX_UNLOCK(rxr);
1606
1607 return (0);
1608
1609 fail:
1610 ixgbe_free_receive_ring(rxr);
1611 IXGBE_RX_UNLOCK(rxr);
1612
1613 return (error);
1614 } /* ixgbe_setup_receive_ring */
1615
1616 /************************************************************************
1617 * ixgbe_setup_receive_structures - Initialize all receive rings.
1618 ************************************************************************/
1619 int
1620 ixgbe_setup_receive_structures(struct adapter *adapter)
1621 {
1622 struct rx_ring *rxr = adapter->rx_rings;
1623 int j;
1624
1625 for (j = 0; j < adapter->num_queues; j++, rxr++)
1626 if (ixgbe_setup_receive_ring(rxr))
1627 goto fail;
1628
1629 return (0);
1630 fail:
1631 /*
1632 * Free RX buffers allocated so far, we will only handle
1633 * the rings that completed, the failing case will have
1634 * cleaned up for itself. 'j' failed, so its the terminus.
1635 */
1636 for (int i = 0; i < j; ++i) {
1637 rxr = &adapter->rx_rings[i];
1638 IXGBE_RX_LOCK(rxr);
1639 ixgbe_free_receive_ring(rxr);
1640 IXGBE_RX_UNLOCK(rxr);
1641 }
1642
1643 return (ENOBUFS);
1644 } /* ixgbe_setup_receive_structures */
1645
1646
1647 /************************************************************************
1648 * ixgbe_free_receive_structures - Free all receive rings.
1649 ************************************************************************/
1650 void
1651 ixgbe_free_receive_structures(struct adapter *adapter)
1652 {
1653 struct rx_ring *rxr = adapter->rx_rings;
1654
1655 INIT_DEBUGOUT("ixgbe_free_receive_structures: begin");
1656
1657 for (int i = 0; i < adapter->num_queues; i++, rxr++) {
1658 ixgbe_free_receive_buffers(rxr);
1659 #ifdef LRO
1660 /* Free LRO memory */
1661 tcp_lro_free(&rxr->lro);
1662 #endif /* LRO */
1663 /* Free the ring memory as well */
1664 ixgbe_dma_free(adapter, &rxr->rxdma);
1665 IXGBE_RX_LOCK_DESTROY(rxr);
1666 }
1667
1668 free(adapter->rx_rings, M_DEVBUF);
1669 } /* ixgbe_free_receive_structures */
1670
1671
1672 /************************************************************************
1673 * ixgbe_free_receive_buffers - Free receive ring data structures
1674 ************************************************************************/
1675 static void
1676 ixgbe_free_receive_buffers(struct rx_ring *rxr)
1677 {
1678 struct adapter *adapter = rxr->adapter;
1679 struct ixgbe_rx_buf *rxbuf;
1680
1681 INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin");
1682
1683 /* Cleanup any existing buffers */
1684 if (rxr->rx_buffers != NULL) {
1685 for (int i = 0; i < adapter->num_rx_desc; i++) {
1686 rxbuf = &rxr->rx_buffers[i];
1687 ixgbe_rx_discard(rxr, i);
1688 if (rxbuf->pmap != NULL) {
1689 ixgbe_dmamap_destroy(rxr->ptag, rxbuf->pmap);
1690 rxbuf->pmap = NULL;
1691 }
1692 }
1693 if (rxr->rx_buffers != NULL) {
1694 free(rxr->rx_buffers, M_DEVBUF);
1695 rxr->rx_buffers = NULL;
1696 }
1697 }
1698
1699 if (rxr->ptag != NULL) {
1700 ixgbe_dma_tag_destroy(rxr->ptag);
1701 rxr->ptag = NULL;
1702 }
1703
1704 return;
1705 } /* ixgbe_free_receive_buffers */
1706
1707 /************************************************************************
1708 * ixgbe_rx_input
1709 ************************************************************************/
1710 static __inline void
1711 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m,
1712 u32 ptype)
1713 {
1714 struct adapter *adapter = ifp->if_softc;
1715
1716 #ifdef LRO
1717 struct ethercom *ec = &adapter->osdep.ec;
1718
1719 /*
1720 * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
1721 * should be computed by hardware. Also it should not have VLAN tag in
1722 * ethernet header. In case of IPv6 we do not yet support ext. hdrs.
1723 */
1724 if (rxr->lro_enabled &&
1725 (ec->ec_capenable & ETHERCAP_VLAN_HWTAGGING) != 0 &&
1726 (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
1727 ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1728 (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
1729 (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1730 (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
1731 (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
1732 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
1733 /*
1734 * Send to the stack if:
1735 ** - LRO not enabled, or
1736 ** - no LRO resources, or
1737 ** - lro enqueue fails
1738 */
1739 if (rxr->lro.lro_cnt != 0)
1740 if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
1741 return;
1742 }
1743 #endif /* LRO */
1744
1745 if_percpuq_enqueue(adapter->ipq, m);
1746 } /* ixgbe_rx_input */
1747
1748 /************************************************************************
1749 * ixgbe_rx_discard
1750 ************************************************************************/
1751 static __inline void
1752 ixgbe_rx_discard(struct rx_ring *rxr, int i)
1753 {
1754 struct ixgbe_rx_buf *rbuf;
1755
1756 rbuf = &rxr->rx_buffers[i];
1757
1758 /*
1759 * With advanced descriptors the writeback
1760 * clobbers the buffer addrs, so its easier
1761 * to just free the existing mbufs and take
1762 * the normal refresh path to get new buffers
1763 * and mapping.
1764 */
1765
1766 if (rbuf->fmp != NULL) {/* Partial chain ? */
1767 bus_dmamap_sync(rxr->ptag->dt_dmat, rbuf->pmap, 0,
1768 rbuf->buf->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
1769 m_freem(rbuf->fmp);
1770 rbuf->fmp = NULL;
1771 rbuf->buf = NULL; /* rbuf->buf is part of fmp's chain */
1772 } else if (rbuf->buf) {
1773 bus_dmamap_sync(rxr->ptag->dt_dmat, rbuf->pmap, 0,
1774 rbuf->buf->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
1775 m_free(rbuf->buf);
1776 rbuf->buf = NULL;
1777 }
1778 ixgbe_dmamap_unload(rxr->ptag, rbuf->pmap);
1779
1780 rbuf->flags = 0;
1781
1782 return;
1783 } /* ixgbe_rx_discard */
1784
1785
1786 /************************************************************************
1787 * ixgbe_rxeof
1788 *
1789 * Executes in interrupt context. It replenishes the
1790 * mbufs in the descriptor and sends data which has
1791 * been dma'ed into host memory to upper layer.
1792 *
1793 * Return TRUE for more work, FALSE for all clean.
1794 ************************************************************************/
1795 bool
1796 ixgbe_rxeof(struct ix_queue *que)
1797 {
1798 struct adapter *adapter = que->adapter;
1799 struct rx_ring *rxr = que->rxr;
1800 struct ifnet *ifp = adapter->ifp;
1801 #ifdef LRO
1802 struct lro_ctrl *lro = &rxr->lro;
1803 #endif /* LRO */
1804 union ixgbe_adv_rx_desc *cur;
1805 struct ixgbe_rx_buf *rbuf, *nbuf;
1806 int i, nextp, processed = 0;
1807 u32 staterr = 0;
1808 u32 count = adapter->rx_process_limit;
1809 #ifdef RSS
1810 u16 pkt_info;
1811 #endif
1812
1813 IXGBE_RX_LOCK(rxr);
1814
1815 #ifdef DEV_NETMAP
1816 if (adapter->feat_en & IXGBE_FEATURE_NETMAP) {
1817 /* Same as the txeof routine: wakeup clients on intr. */
1818 if (netmap_rx_irq(ifp, rxr->me, &processed)) {
1819 IXGBE_RX_UNLOCK(rxr);
1820 return (FALSE);
1821 }
1822 }
1823 #endif /* DEV_NETMAP */
1824
1825 for (i = rxr->next_to_check; count != 0;) {
1826 struct mbuf *sendmp, *mp;
1827 u32 rsc, ptype;
1828 u16 len;
1829 u16 vtag = 0;
1830 bool eop;
1831
1832 /* Sync the ring. */
1833 ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1834 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1835
1836 cur = &rxr->rx_base[i];
1837 staterr = le32toh(cur->wb.upper.status_error);
1838 #ifdef RSS
1839 pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
1840 #endif
1841
1842 if ((staterr & IXGBE_RXD_STAT_DD) == 0)
1843 break;
1844
1845 count--;
1846 sendmp = NULL;
1847 nbuf = NULL;
1848 rsc = 0;
1849 cur->wb.upper.status_error = 0;
1850 rbuf = &rxr->rx_buffers[i];
1851 mp = rbuf->buf;
1852
1853 len = le16toh(cur->wb.upper.length);
1854 ptype = le32toh(cur->wb.lower.lo_dword.data) &
1855 IXGBE_RXDADV_PKTTYPE_MASK;
1856 eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
1857
1858 /* Make sure bad packets are discarded */
1859 if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) {
1860 #if __FreeBSD_version >= 1100036
1861 if (adapter->feat_en & IXGBE_FEATURE_VF)
1862 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
1863 #endif
1864 rxr->rx_discarded.ev_count++;
1865 ixgbe_rx_discard(rxr, i);
1866 goto next_desc;
1867 }
1868
1869 bus_dmamap_sync(rxr->ptag->dt_dmat, rbuf->pmap, 0,
1870 rbuf->buf->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
1871
1872 /*
1873 * On 82599 which supports a hardware
1874 * LRO (called HW RSC), packets need
1875 * not be fragmented across sequential
1876 * descriptors, rather the next descriptor
1877 * is indicated in bits of the descriptor.
1878 * This also means that we might proceses
1879 * more than one packet at a time, something
1880 * that has never been true before, it
1881 * required eliminating global chain pointers
1882 * in favor of what we are doing here. -jfv
1883 */
1884 if (!eop) {
1885 /*
1886 * Figure out the next descriptor
1887 * of this frame.
1888 */
1889 if (rxr->hw_rsc == TRUE) {
1890 rsc = ixgbe_rsc_count(cur);
1891 rxr->rsc_num += (rsc - 1);
1892 }
1893 if (rsc) { /* Get hardware index */
1894 nextp = ((staterr & IXGBE_RXDADV_NEXTP_MASK) >>
1895 IXGBE_RXDADV_NEXTP_SHIFT);
1896 } else { /* Just sequential */
1897 nextp = i + 1;
1898 if (nextp == adapter->num_rx_desc)
1899 nextp = 0;
1900 }
1901 nbuf = &rxr->rx_buffers[nextp];
1902 prefetch(nbuf);
1903 }
1904 /*
1905 * Rather than using the fmp/lmp global pointers
1906 * we now keep the head of a packet chain in the
1907 * buffer struct and pass this along from one
1908 * descriptor to the next, until we get EOP.
1909 */
1910 mp->m_len = len;
1911 /*
1912 * See if there is a stored head
1913 * that determines what we are
1914 */
1915 sendmp = rbuf->fmp;
1916 if (sendmp != NULL) { /* secondary frag */
1917 rbuf->buf = rbuf->fmp = NULL;
1918 mp->m_flags &= ~M_PKTHDR;
1919 sendmp->m_pkthdr.len += mp->m_len;
1920 } else {
1921 /*
1922 * Optimize. This might be a small packet,
1923 * maybe just a TCP ACK. Do a fast copy that
1924 * is cache aligned into a new mbuf, and
1925 * leave the old mbuf+cluster for re-use.
1926 */
1927 if (eop && len <= IXGBE_RX_COPY_LEN) {
1928 sendmp = m_gethdr(M_NOWAIT, MT_DATA);
1929 if (sendmp != NULL) {
1930 sendmp->m_data += IXGBE_RX_COPY_ALIGN;
1931 ixgbe_bcopy(mp->m_data, sendmp->m_data,
1932 len);
1933 sendmp->m_len = len;
1934 rxr->rx_copies.ev_count++;
1935 rbuf->flags |= IXGBE_RX_COPY;
1936 }
1937 }
1938 if (sendmp == NULL) {
1939 rbuf->buf = rbuf->fmp = NULL;
1940 sendmp = mp;
1941 }
1942
1943 /* first desc of a non-ps chain */
1944 sendmp->m_flags |= M_PKTHDR;
1945 sendmp->m_pkthdr.len = mp->m_len;
1946 }
1947 ++processed;
1948
1949 /* Pass the head pointer on */
1950 if (eop == 0) {
1951 nbuf->fmp = sendmp;
1952 sendmp = NULL;
1953 mp->m_next = nbuf->buf;
1954 } else { /* Sending this frame */
1955 m_set_rcvif(sendmp, ifp);
1956 ++rxr->packets;
1957 rxr->rx_packets.ev_count++;
1958 /* capture data for AIM */
1959 rxr->bytes += sendmp->m_pkthdr.len;
1960 rxr->rx_bytes.ev_count += sendmp->m_pkthdr.len;
1961 /* Process vlan info */
1962 if ((rxr->vtag_strip) && (staterr & IXGBE_RXD_STAT_VP))
1963 vtag = le16toh(cur->wb.upper.vlan);
1964 if (vtag) {
1965 vlan_set_tag(sendmp, vtag);
1966 }
1967 if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) {
1968 ixgbe_rx_checksum(staterr, sendmp, ptype,
1969 &adapter->stats.pf);
1970 }
1971
1972 #if 0 /* FreeBSD */
1973 /*
1974 * In case of multiqueue, we have RXCSUM.PCSD bit set
1975 * and never cleared. This means we have RSS hash
1976 * available to be used.
1977 */
1978 if (adapter->num_queues > 1) {
1979 sendmp->m_pkthdr.flowid =
1980 le32toh(cur->wb.lower.hi_dword.rss);
1981 switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) {
1982 case IXGBE_RXDADV_RSSTYPE_IPV4:
1983 M_HASHTYPE_SET(sendmp,
1984 M_HASHTYPE_RSS_IPV4);
1985 break;
1986 case IXGBE_RXDADV_RSSTYPE_IPV4_TCP:
1987 M_HASHTYPE_SET(sendmp,
1988 M_HASHTYPE_RSS_TCP_IPV4);
1989 break;
1990 case IXGBE_RXDADV_RSSTYPE_IPV6:
1991 M_HASHTYPE_SET(sendmp,
1992 M_HASHTYPE_RSS_IPV6);
1993 break;
1994 case IXGBE_RXDADV_RSSTYPE_IPV6_TCP:
1995 M_HASHTYPE_SET(sendmp,
1996 M_HASHTYPE_RSS_TCP_IPV6);
1997 break;
1998 case IXGBE_RXDADV_RSSTYPE_IPV6_EX:
1999 M_HASHTYPE_SET(sendmp,
2000 M_HASHTYPE_RSS_IPV6_EX);
2001 break;
2002 case IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX:
2003 M_HASHTYPE_SET(sendmp,
2004 M_HASHTYPE_RSS_TCP_IPV6_EX);
2005 break;
2006 #if __FreeBSD_version > 1100000
2007 case IXGBE_RXDADV_RSSTYPE_IPV4_UDP:
2008 M_HASHTYPE_SET(sendmp,
2009 M_HASHTYPE_RSS_UDP_IPV4);
2010 break;
2011 case IXGBE_RXDADV_RSSTYPE_IPV6_UDP:
2012 M_HASHTYPE_SET(sendmp,
2013 M_HASHTYPE_RSS_UDP_IPV6);
2014 break;
2015 case IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX:
2016 M_HASHTYPE_SET(sendmp,
2017 M_HASHTYPE_RSS_UDP_IPV6_EX);
2018 break;
2019 #endif
2020 default:
2021 M_HASHTYPE_SET(sendmp,
2022 M_HASHTYPE_OPAQUE_HASH);
2023 }
2024 } else {
2025 sendmp->m_pkthdr.flowid = que->msix;
2026 M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
2027 }
2028 #endif
2029 }
2030 next_desc:
2031 ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
2032 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2033
2034 /* Advance our pointers to the next descriptor. */
2035 if (++i == rxr->num_desc)
2036 i = 0;
2037
2038 /* Now send to the stack or do LRO */
2039 if (sendmp != NULL) {
2040 rxr->next_to_check = i;
2041 IXGBE_RX_UNLOCK(rxr);
2042 ixgbe_rx_input(rxr, ifp, sendmp, ptype);
2043 IXGBE_RX_LOCK(rxr);
2044 i = rxr->next_to_check;
2045 }
2046
2047 /* Every 8 descriptors we go to refresh mbufs */
2048 if (processed == 8) {
2049 ixgbe_refresh_mbufs(rxr, i);
2050 processed = 0;
2051 }
2052 }
2053
2054 /* Refresh any remaining buf structs */
2055 if (ixgbe_rx_unrefreshed(rxr))
2056 ixgbe_refresh_mbufs(rxr, i);
2057
2058 rxr->next_to_check = i;
2059
2060 IXGBE_RX_UNLOCK(rxr);
2061
2062 #ifdef LRO
2063 /*
2064 * Flush any outstanding LRO work
2065 */
2066 tcp_lro_flush_all(lro);
2067 #endif /* LRO */
2068
2069 /*
2070 * Still have cleaning to do?
2071 */
2072 if ((staterr & IXGBE_RXD_STAT_DD) != 0)
2073 return (TRUE);
2074
2075 return (FALSE);
2076 } /* ixgbe_rxeof */
2077
2078
2079 /************************************************************************
2080 * ixgbe_rx_checksum
2081 *
2082 * Verify that the hardware indicated that the checksum is valid.
2083 * Inform the stack about the status of checksum so that stack
2084 * doesn't spend time verifying the checksum.
2085 ************************************************************************/
2086 static void
2087 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype,
2088 struct ixgbe_hw_stats *stats)
2089 {
2090 u16 status = (u16)staterr;
2091 u8 errors = (u8)(staterr >> 24);
2092 #if 0
2093 bool sctp = false;
2094
2095 if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
2096 (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
2097 sctp = true;
2098 #endif
2099
2100 /* IPv4 checksum */
2101 if (status & IXGBE_RXD_STAT_IPCS) {
2102 stats->ipcs.ev_count++;
2103 if (!(errors & IXGBE_RXD_ERR_IPE)) {
2104 /* IP Checksum Good */
2105 mp->m_pkthdr.csum_flags = M_CSUM_IPv4;
2106 } else {
2107 stats->ipcs_bad.ev_count++;
2108 mp->m_pkthdr.csum_flags = M_CSUM_IPv4|M_CSUM_IPv4_BAD;
2109 }
2110 }
2111 /* TCP/UDP/SCTP checksum */
2112 if (status & IXGBE_RXD_STAT_L4CS) {
2113 stats->l4cs.ev_count++;
2114 int type = M_CSUM_TCPv4|M_CSUM_TCPv6|M_CSUM_UDPv4|M_CSUM_UDPv6;
2115 if (!(errors & IXGBE_RXD_ERR_TCPE)) {
2116 mp->m_pkthdr.csum_flags |= type;
2117 } else {
2118 stats->l4cs_bad.ev_count++;
2119 mp->m_pkthdr.csum_flags |= type | M_CSUM_TCP_UDP_BAD;
2120 }
2121 }
2122 } /* ixgbe_rx_checksum */
2123
2124 /************************************************************************
2125 * ixgbe_dma_malloc
2126 ************************************************************************/
2127 int
2128 ixgbe_dma_malloc(struct adapter *adapter, const bus_size_t size,
2129 struct ixgbe_dma_alloc *dma, const int mapflags)
2130 {
2131 device_t dev = adapter->dev;
2132 int r, rsegs;
2133
2134 r = ixgbe_dma_tag_create(
2135 /* parent */ adapter->osdep.dmat,
2136 /* alignment */ DBA_ALIGN,
2137 /* bounds */ 0,
2138 /* maxsize */ size,
2139 /* nsegments */ 1,
2140 /* maxsegsize */ size,
2141 /* flags */ BUS_DMA_ALLOCNOW,
2142 &dma->dma_tag);
2143 if (r != 0) {
2144 aprint_error_dev(dev,
2145 "%s: ixgbe_dma_tag_create failed; error %d\n", __func__,
2146 r);
2147 goto fail_0;
2148 }
2149
2150 r = bus_dmamem_alloc(dma->dma_tag->dt_dmat, size,
2151 dma->dma_tag->dt_alignment, dma->dma_tag->dt_boundary,
2152 &dma->dma_seg, 1, &rsegs, BUS_DMA_NOWAIT);
2153 if (r != 0) {
2154 aprint_error_dev(dev,
2155 "%s: bus_dmamem_alloc failed; error %d\n", __func__, r);
2156 goto fail_1;
2157 }
2158
2159 r = bus_dmamem_map(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs,
2160 size, &dma->dma_vaddr, BUS_DMA_NOWAIT);
2161 if (r != 0) {
2162 aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
2163 __func__, r);
2164 goto fail_2;
2165 }
2166
2167 r = ixgbe_dmamap_create(dma->dma_tag, 0, &dma->dma_map);
2168 if (r != 0) {
2169 aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
2170 __func__, r);
2171 goto fail_3;
2172 }
2173
2174 r = bus_dmamap_load(dma->dma_tag->dt_dmat, dma->dma_map,
2175 dma->dma_vaddr, size, NULL, mapflags | BUS_DMA_NOWAIT);
2176 if (r != 0) {
2177 aprint_error_dev(dev, "%s: bus_dmamap_load failed; error %d\n",
2178 __func__, r);
2179 goto fail_4;
2180 }
2181 dma->dma_paddr = dma->dma_map->dm_segs[0].ds_addr;
2182 dma->dma_size = size;
2183 return 0;
2184 fail_4:
2185 ixgbe_dmamap_destroy(dma->dma_tag, dma->dma_map);
2186 fail_3:
2187 bus_dmamem_unmap(dma->dma_tag->dt_dmat, dma->dma_vaddr, size);
2188 fail_2:
2189 bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs);
2190 fail_1:
2191 ixgbe_dma_tag_destroy(dma->dma_tag);
2192 fail_0:
2193
2194 return (r);
2195 } /* ixgbe_dma_malloc */
2196
2197 /************************************************************************
2198 * ixgbe_dma_free
2199 ************************************************************************/
2200 void
2201 ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma)
2202 {
2203 bus_dmamap_sync(dma->dma_tag->dt_dmat, dma->dma_map, 0, dma->dma_size,
2204 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2205 ixgbe_dmamap_unload(dma->dma_tag, dma->dma_map);
2206 bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, 1);
2207 ixgbe_dma_tag_destroy(dma->dma_tag);
2208 } /* ixgbe_dma_free */
2209
2210
2211 /************************************************************************
2212 * ixgbe_allocate_queues
2213 *
2214 * Allocate memory for the transmit and receive rings, and then
2215 * the descriptors associated with each, called only once at attach.
2216 ************************************************************************/
2217 int
2218 ixgbe_allocate_queues(struct adapter *adapter)
2219 {
2220 device_t dev = adapter->dev;
2221 struct ix_queue *que;
2222 struct tx_ring *txr;
2223 struct rx_ring *rxr;
2224 int rsize, tsize, error = IXGBE_SUCCESS;
2225 int txconf = 0, rxconf = 0;
2226
2227 /* First, allocate the top level queue structs */
2228 adapter->queues = (struct ix_queue *)malloc(sizeof(struct ix_queue) *
2229 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO);
2230 if (adapter->queues == NULL) {
2231 aprint_error_dev(dev, "Unable to allocate queue memory\n");
2232 error = ENOMEM;
2233 goto fail;
2234 }
2235
2236 /* Second, allocate the TX ring struct memory */
2237 adapter->tx_rings = (struct tx_ring *)malloc(sizeof(struct tx_ring) *
2238 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO);
2239 if (adapter->tx_rings == NULL) {
2240 aprint_error_dev(dev, "Unable to allocate TX ring memory\n");
2241 error = ENOMEM;
2242 goto tx_fail;
2243 }
2244
2245 /* Third, allocate the RX ring */
2246 adapter->rx_rings = (struct rx_ring *)malloc(sizeof(struct rx_ring) *
2247 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO);
2248 if (adapter->rx_rings == NULL) {
2249 aprint_error_dev(dev, "Unable to allocate RX ring memory\n");
2250 error = ENOMEM;
2251 goto rx_fail;
2252 }
2253
2254 /* For the ring itself */
2255 tsize = roundup2(adapter->num_tx_desc * sizeof(union ixgbe_adv_tx_desc),
2256 DBA_ALIGN);
2257
2258 /*
2259 * Now set up the TX queues, txconf is needed to handle the
2260 * possibility that things fail midcourse and we need to
2261 * undo memory gracefully
2262 */
2263 for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2264 /* Set up some basics */
2265 txr = &adapter->tx_rings[i];
2266 txr->adapter = adapter;
2267 txr->txr_interq = NULL;
2268 /* In case SR-IOV is enabled, align the index properly */
2269 #ifdef PCI_IOV
2270 txr->me = ixgbe_vf_que_index(adapter->iov_mode, adapter->pool,
2271 i);
2272 #else
2273 txr->me = i;
2274 #endif
2275 txr->num_desc = adapter->num_tx_desc;
2276
2277 /* Initialize the TX side lock */
2278 mutex_init(&txr->tx_mtx, MUTEX_DEFAULT, IPL_NET);
2279
2280 if (ixgbe_dma_malloc(adapter, tsize, &txr->txdma,
2281 BUS_DMA_NOWAIT)) {
2282 aprint_error_dev(dev,
2283 "Unable to allocate TX Descriptor memory\n");
2284 error = ENOMEM;
2285 goto err_tx_desc;
2286 }
2287 txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
2288 bzero((void *)txr->tx_base, tsize);
2289
2290 /* Now allocate transmit buffers for the ring */
2291 if (ixgbe_allocate_transmit_buffers(txr)) {
2292 aprint_error_dev(dev,
2293 "Critical Failure setting up transmit buffers\n");
2294 error = ENOMEM;
2295 goto err_tx_desc;
2296 }
2297 if (!(adapter->feat_en & IXGBE_FEATURE_LEGACY_TX)) {
2298 /* Allocate a buf ring */
2299 txr->txr_interq = pcq_create(IXGBE_BR_SIZE, KM_SLEEP);
2300 if (txr->txr_interq == NULL) {
2301 aprint_error_dev(dev,
2302 "Critical Failure setting up buf ring\n");
2303 error = ENOMEM;
2304 goto err_tx_desc;
2305 }
2306 }
2307 }
2308
2309 /*
2310 * Next the RX queues...
2311 */
2312 rsize = roundup2(adapter->num_rx_desc * sizeof(union ixgbe_adv_rx_desc),
2313 DBA_ALIGN);
2314 for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2315 rxr = &adapter->rx_rings[i];
2316 /* Set up some basics */
2317 rxr->adapter = adapter;
2318 #ifdef PCI_IOV
2319 /* In case SR-IOV is enabled, align the index properly */
2320 rxr->me = ixgbe_vf_que_index(adapter->iov_mode, adapter->pool,
2321 i);
2322 #else
2323 rxr->me = i;
2324 #endif
2325 rxr->num_desc = adapter->num_rx_desc;
2326
2327 /* Initialize the RX side lock */
2328 mutex_init(&rxr->rx_mtx, MUTEX_DEFAULT, IPL_NET);
2329
2330 if (ixgbe_dma_malloc(adapter, rsize, &rxr->rxdma,
2331 BUS_DMA_NOWAIT)) {
2332 aprint_error_dev(dev,
2333 "Unable to allocate RxDescriptor memory\n");
2334 error = ENOMEM;
2335 goto err_rx_desc;
2336 }
2337 rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2338 bzero((void *)rxr->rx_base, rsize);
2339
2340 /* Allocate receive buffers for the ring */
2341 if (ixgbe_allocate_receive_buffers(rxr)) {
2342 aprint_error_dev(dev,
2343 "Critical Failure setting up receive buffers\n");
2344 error = ENOMEM;
2345 goto err_rx_desc;
2346 }
2347 }
2348
2349 /*
2350 * Finally set up the queue holding structs
2351 */
2352 for (int i = 0; i < adapter->num_queues; i++) {
2353 que = &adapter->queues[i];
2354 que->adapter = adapter;
2355 que->me = i;
2356 que->txr = &adapter->tx_rings[i];
2357 que->rxr = &adapter->rx_rings[i];
2358
2359 mutex_init(&que->dc_mtx, MUTEX_DEFAULT, IPL_NET);
2360 que->disabled_count = 0;
2361 }
2362
2363 return (0);
2364
2365 err_rx_desc:
2366 for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2367 ixgbe_dma_free(adapter, &rxr->rxdma);
2368 err_tx_desc:
2369 for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2370 ixgbe_dma_free(adapter, &txr->txdma);
2371 free(adapter->rx_rings, M_DEVBUF);
2372 rx_fail:
2373 free(adapter->tx_rings, M_DEVBUF);
2374 tx_fail:
2375 free(adapter->queues, M_DEVBUF);
2376 fail:
2377 return (error);
2378 } /* ixgbe_allocate_queues */
2379